U
    U?h                     @   s   d dl Z d dlZd dlZd dlmZ ddlmZmZmZm	Z	m
Z
 ddlmZ ddlmZ G dd	 d	eZG d
d deZG dd deZG dd deZdS )    N)onnx_pb   )TENSOR_NAME_QUANT_SUFFIXQuantizedValueQuantizedValueTypefind_by_nameget_mul_node   )QuantOperatorBase)QDQOperatorBasec                       s$   e Zd Z fddZdd Z  ZS )	QOpMatMulc                    s   t  || d S Nsuper__init__selfZonnx_quantizerZ	onnx_node	__class__ [/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/quantization/operators/matmul.pyr      s    zQOpMatMul.__init__c                 C   s   | j | js(td| jj d dS | j | jjd sj| j | jjd sjtd| jj d dS | j j	r| j 
| jjd std| j j d| jj d dS d	S )
NzIgnore MatMul ]Fr	   r   z&Ignore MatMul due to non float inputs z%Ignore MatMul due to non constant B: [T)	quantizerZshould_quantize_nodenodeloggingdebugnameZis_float_tensorinputinfoZq_matmul_const_b_onlyZfind_initializer_in_pathZgraph_scope)r   r   r   r   should_quantize   s     zQOpMatMul.should_quantize)__name__
__module____qualname__r   r    __classcell__r   r   r   r   r      s   r   c                       s$   e Zd Z fddZdd Z  ZS )MatMulIntegerc                    s   t  || d S r   r   r   r   r   r   r   )   s    zMatMulInteger.__init__c                 C   s  | j }|jdkst| j|dg\}}}}| jj|dgddd\}}}}	|| || || ||	 |jd d }
|jr|jd nd}t	j
d	|| |
g|}|| |
d
 }| jj|jd dd}t	j
jd|
g|g|
d |d}|| t|dkst|r|d n|d d |d  d }t|| jj}|d krft||d |}|| |jd }d}|r|d }|t||g|jd | | j j|7  _d S )NMatMulr   r	   TZreduce_rangeZop_level_per_channelZ_output_quantized_quant r%   Z_cast_output)	mandatoryZCastZ_cast)tor   Z_scales_mul__mulz:0Z_output_scale_mul)r   op_typeAssertionErrorr   quantize_activationquantize_weightextendoutputr   onnxhelper	make_nodeappendZget_tensor_typelenr   	new_nodesr   )r   r   quantized_input_nameszero_point_namesscale_namesnodesquantized_input_names_weightzero_point_names_weightscale_names_weightnodes_weightZmatmul_integer_outputZmatmul_integer_nameZmatmul_integer_nodeZcast_op_outputotypeZ	cast_nodeZscales_mul_opZscales_mul_nodeZscales_mul_op_outputZoutput_scale_mul_opr   r   r   quantize,   st    








zMatMulInteger.quantizer!   r"   r#   r   rC   r$   r   r   r   r   r%   (   s   r%   c                       s(   e Zd Z fddZ fddZ  ZS )QLinearMatMulc                    s   t  || d S r   r   r   r   r   r   r   |   s    zQLinearMatMul.__init__c                    s  | j }|jdkst| j|dg\}}}}| jj|dgddd\}}}}	|| || || ||	 | j|jd \}
}}}}|
r|d krt	 
 S |jd t }|jr|jd nd}g }||d  ||d  ||d  ||d  ||d  ||d  || || | jjtjjtjjtjjtjjhkr\dnd}tjjd	||g||d
}|| t|jd |||tj}|| jj|jd < | j j|7  _d S )Nr&   r   r	   Tr'   r(   r)   zcom.microsoftrE   )domain)r   r.   r/   r   r0   r1   r2   Z_get_quantization_paramsr3   r   rC   r   r   r7   Zweight_qType
onnx_protoZTensorProtoZFLOAT8E4M3FNZFLOAT8E4M3FNUZZ
FLOAT8E5M2ZFLOAT8E5M2FNUZr4   r5   r6   r   r   ZInputZquantized_value_mapr9   )r   r   r:   r;   r<   r=   r>   r?   r@   rA   Z
data_foundZoutput_scale_nameZoutput_zp_namer,   Zqlinear_matmul_outputZqlinear_matmul_nameZqlinear_matmul_inputsrF   Zqlinear_matmul_nodeZq_outputr   r   r   rC      s    







zQLinearMatMul.quantizerD   r   r   r   r   rE   {   s   rE   c                       s$   e Zd Z fddZdd Z  ZS )	QDQMatMulc                    s   t  || d S r   r   r   r   r   r   r      s    zQDQMatMul.__init__c                 C   sx   | j }|jdkst| jr"|j}nt|j|j}|D ]<}| jj	|d|jd\}}|rf| j
|| q6| j| q6d S )Nr&   r	   )Zdefault_axisr.   )r   r.   r/   Zdisable_qdq_for_node_outputr   	itertoolschainr3   r   Zis_tensor_per_channelZ"quantize_weight_tensor_per_channelZquantize_activation_tensor)r   r   Znodes_to_iterateZtensor_nameZis_per_channelZchannel_axisr   r   r   rC      s      
zQDQMatMul.quantizerD   r   r   r   r   rH      s   rH   )rI   r   r4   r   rG   Zquant_utilsr   r   r   r   r   Zbase_operatorr
   Zqdq_base_operatorr   r   r%   rE   rH   r   r   r   r   <module>   s   SU