U
    U?hd                     @   s   d dl Z d dlmZmZ d dlZd dlZd dlZzd dlm	Z	 W n e
k
rX   dZ	Y nX ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZ ddlmZ G d	d
 d
ZG dd dZdS )    N)AnyDict)to_array_extended   )
TensorData)	ONNXModel)ONNX_TYPE_TO_NP_TYPETENSOR_NAME_QUANT_SUFFIX	QuantTypefind_by_namemodel_has_infer_metadatanormalize_axispack_bytes_to_4bitquantize_dataquantize_nparray&save_and_reload_model_with_shape_infertensor_proto_to_array)TensorQuantOverridesHelperc                   @   s:   e Zd Zeeef dddZdd Zdd Zdd	 Z	d
S )QuantizationParamsdatac                 K   s   i | _ | D ]\}}t|ts:tdt| d|dt|tttjfsftdt| d|d|dkr|j	tj
tjfkrtd|j	 d||| j |< qd S )NzKeys must be strings not z for k=.z1Values must be numpy arrays, int, float, str not scalez5scale must a float32 or float16 numpy element but is )r   items
isinstancestr	TypeErrortypeintnpndarraydtypefloat32float16
ValueError)selfr   kv r(   Y/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/quantization/base_quantizer.py__init__&   s    
zQuantizationParams.__init__c                 c   s   | j E d H  d S Nr   r%   r(   r(   r)   __iter__1   s    zQuantizationParams.__iter__c                 C   s
   | j | S r+   r   )r%   keyr(   r(   r)   __getitem__4   s    zQuantizationParams.__getitem__c                 C   s
   t | jS r+   )lenr   r,   r(   r(   r)   __len__7   s    zQuantizationParams.__len__N)
__name__
__module____qualname__r   r   r   r*   r-   r/   r1   r(   r(   r(   r)   r   %   s   r   c                   @   sl   e Zd ZdddZdd Zdd Zdd	 Zd
d Zdd Zdd Z	dddZ
dddZdddZdd ZdS )BaseQuantizerNc                 C   s  t |st|}dd |jjD | _| jdd |jjD  | jdd |jjD  t|| _	|| _
|| _|
rv|
ni | _d| jko| jd | _d | _d| jko| jd | _| jd|tjtjtjfk| _| jdd	| _| jd
| _t|d|| _t|d|| _|d k	rLttdd | rLtdtdd | D  d|| _ || _!|| _"|	| _#| $ | _%t&| jdi | _'dd | j	( D | _)| j'*| j)| j+ |\}}|st,|| j'- | _.d S )Nc                 S   s   i | ]}|j |qS r(   name).0vir(   r(   r)   
<dictcomp>K   s      z*BaseQuantizer.__init__.<locals>.<dictcomp>c                 S   s   i | ]}|j |qS r(   r6   )r8   Zotr(   r(   r)   r:   L   s      c                 S   s   i | ]}|j |qS r(   r6   )r8   itr(   r(   r)   r:   M   s      ZEnableSubgraphZForceQuantizeNoInputCheckZWeightSymmetricZActivationSymmetricFZMinimumRealRangetensor_typec                 S   s   t | t S r+   )r   r   )tr(   r(   r)   <lambda>o       z(BaseQuantizer.__init__.<locals>.<lambda>z(tensors_range contains unexpected types c                 s   s   | ]}t |V  qd S r+   )r   )r8   r'   r(   r(   r)   	<genexpr>q   s     z)BaseQuantizer.__init__.<locals>.<genexpr>z, not TensorData.ZTensorQuantOverridesc                 S   s   i | ]}|j |qS r(   r6   )r8   Zinitzerr(   r(   r)   r:   }   s      )/r   r   graphZ
value_infoZvalue_infosupdateoutputinputr   modelper_channelreduce_rangeextra_optionsenable_subgraph_quantizationparentZforce_quantize_no_input_checkgetr
   ZQInt8ZQInt16ZQFLOAT8E4M3FNis_weight_symmetricis_activation_symmetricmin_real_rangegetattractivation_qTypeweight_qTypeanymapvaluesr   settensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizecheck_opset_versionopset_versionr   tensor_quant_overridesinitializerZinitializersZis_validkeysr$   Zget_quant_typesZtensor_quant_override_qtypes)r%   rE   rF   rG   rQ   rP   rV   rW   rX   rY   rH   Zoverrides_validZoverrides_errr(   r(   r)   r*   <   sR    
 "
  zBaseQuantizer.__init__c                 C   s   t d S r+   )NotImplementedErrorr,   r(   r(   r)   quantize_model   s    zBaseQuantizer.quantize_modelc                 C   s   t || j }|d k	S r+   )r   rE   r]   )r%   Z
input_namer]   r(   r(   r)   is_input_a_initializer   s    z$BaseQuantizer.is_input_a_initializerc                 C   s   | j S r+   )rF   r,   r(   r(   r)   is_per_channel   s    zBaseQuantizer.is_per_channelc                 C   sN   t || j }|d k	r.|jtjjtjjfkS | jr>| j	d krBdS | j	
|S )NF)r   rE   r]   	data_typeonnxTensorProtoFLOATFLOAT16rI   rJ   is_valid_quantize_weight)r%   weight_nameweightr(   r(   r)   rh      s    z&BaseQuantizer.is_valid_quantize_weightc                 C   sV   | j d k	r(t| j dkr(|j| j kr(dS |j| jkr8dS | jd k	rR|j| jkrRdS dS )Nr   FT)rW   r0   r7   op_typerY   rX   )r%   noder(   r(   r)   should_quantize_node   s    
z"BaseQuantizer.should_quantize_nodec                 C   s  dd | j j jD }t|dkr(td|d j}|dkrPtd| d dS |dk rtd| d	 | j j j|d  | j j jt	j
d
dg d}|dk r| jt	jjkrtd| d | j j j|d  | j j jt	j
d
dg d| j j _d}|S )Nc                 S   s    g | ]}|j r|j d kr|qS )zai.onnx)domain)r8   Zopsetr(   r(   r)   
<listcomp>   s     
 z5BaseQuantizer.check_opset_version.<locals>.<listcomp>r   z$Failed to find proper ai.onnx domainr   
   z$The original model opset version is ze, which does not support node fusions. Please update the model to opset >= 11 for better performance.z, which does not support quantization. Please update the model to opset >= 11. Updating the model automatically to opset 11. Please verify the quantized model.       z, which does not support quantization to float 8. Please update the model to opset >= 19. Updating the model automatically to opset 19. Please verify the quantized model.	   )rE   Zopset_importr0   r$   versionloggingwarningremoveextendrd   helperZmake_opsetidrQ   re   FLOAT8E4M3FNZ
ir_version)r%   Zai_onnx_domainr[   r(   r(   r)   rZ      s6    




z!BaseQuantizer.check_opset_version      ?c                 C   s  t || j }t|}|t }| jtjjkrt	
|}|jt	jkrNtjj}	n(|jt	jkrdtjj}	ntd|j d|t	j}
t	jdg|
jd}|d}tj|
|}| j|g d}nz|| | }t	
||  t	j}
t	j
|
t	jd|j}tj||}| j|g t	j
||jdd}d}| j}	|d }tj||}| j|g | jtjjkrv| j}ntjj}|d	 }| jtjjkrtj|| jdgd
g}nH|jdkrt	j|jt	jdd}tj||}ntj||g dg}| j|g ||||||	fS )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        zEOnly float16 or float32 are supported with float 8 but bias dtype is r   r   r!   ZCastZDequantizeLinear_scale_zero_point        r   ) r   rE   r]   r   r	   rQ   rd   re   r{   r   asarrayr!   r#   rg   r"   rf   r   Zastypearrayreshapenumpy_helper
from_arrayinitializer_extendroundZint32dimsZINT32rz   make_tensorsizeZzerosshape)r%   Z	bias_nameZinput_scaleZweight_scalebetaZbias_initializerZ	bias_dataZquantized_bias_namer   Z
node_qtypeZquantized_dataZ
bias_scaleZbias_scale_dataZpacked_bias_initializerZ	node_typeZbias_np_dataZquantized_bias_scale_nameZpacked_bias_scale_initializerr<   Zquantized_bias_zp_nameZpacked_bias_zp_initializerZbias_zp_datar(   r(   r)   quantize_bias_static_impl   sZ    



z'BaseQuantizer.quantize_bias_static_implFc                 C   st  |j t }|j d }|j d }t|}| jj|j i d}	d|	krJ|	d j}d|	krd|	krtj|	d t| d}
t|	d }t	||
 ||
}t|
tjstdt|
 |
jtjkr|
jtjkstd	|
j t|tjstdt| nt|
 ||	d
| j|	d| jo|| j|	d|	dd\}}}
}}t|
tjsbtdt|
 |
jtjkr~|
jtjkstd	|
j t|tjstdt| |j}tj||g |d }tj||g |
d }| j||g |sj| jtj j!krt  }| j|_|j"#|j" ||_ |
 $ % |_&t'dk	r\t'|}|j(|j(ksz|% |% kr\t)d|j( d|% dd  d|% dd  d|j( dt*|dd  dn|tj j+tj j,fkr0|jtj-tj.fkrt)d| dt/t0|% }tjj|||j"|dd}n,tj1|tj2|d|j"}tj34||}| j|g |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        r   r   Zdefault_val
quant_typer   
zero_pointr}   Unexpected type Unexpected dtype 	symmetricrG   rminrmaxrG   rN   Zrmin_overrideZrmax_override)r~   NzThe initializer of shape z! could not be created, expecting rp   z, got z and shape=z
raw=   r   Quantized weights for . must be 8-bit before packing as 4-bit values.Traw)5r7   r	   r   r\   Zget_per_tensor_overridesr<   r   r   r   r   flattenr   r    AssertionErrorr   r!   r"   r#   r   rK   rL   rG   rN   rc   rd   rz   r   r   tolistrE   r   rQ   re   r{   r   ry   copytobytesraw_datar   r   RuntimeErrorr   INT4UINT4int8uint8bytesr   r   tensor_dtype_to_np_dtyper   r   )r%   rj   ZqTyperG   keep_float_weightq_weight_namezp_name
scale_nameZweight_dataZquant_overridesr   r   Zq_weight_data_Zscale_dtypescale_initializerzero_initializerq_weight_initializercheckpacked_datar(   r(   r)   quantize_initializer_impl  s    






 
 


 
 J
z'BaseQuantizer.quantize_initializer_implTc           (      C   s  t || j }|d kr"td|t|}t|j}t||\}	}
|	s`td| d| d| |
}|j| }| jj	|d|igd}t|}|dkr||krtd| d	| d
t|d d |\}}|r||krtd| d| d|d d  dd|d kr|d d j
}|d d| jp>|tjjtjjtjjfk}|d d| joV|}g }g }g }t|D ]}|||}||k r|nd}|| }d|krrd|krrtj|d t| d}t|d }t|| ||}t|tjstdt| |jtjkr |jtj ks0td|j t|tjsPtdt| t|tjs0tdt| nt!| |||| j"|d|dd\}}}}}t|tjstdt| |jtjkr|jtj kstd|j t|tjstdt| t|tjs0tdt| |#| |#| |#| qnt$|j}t$|}d||< t%|d &|}tdt|D ]*}t%|| &|}t'||f|}q|t( } |d }!|d }"|j)| g}#tj*+|"|j,|#t-|. }$tj*+|!||#t-|. }%| j/|$|%g |s|tjjtjj0fkr|jtj1tj2fkrht3d|  dt4t5|6 }&tj*j+| |||&dd }'| j/|'g n:tj%|tj*7|d&|j)}tj89|| }'| j/|'g | |!|"fS )!Nz{} is not an initializerzWeight z# has a per-channel axis with value z  that is out-of-bounds for rank Zaxisr   r   z.Per-channel tensor quantization overrides for z must have either 1 or z& elements in the list of dictionaries.r   z"Tensor quantization overrides for z& specify an unexpected axis. Expected z
, but got r   r   r   rG   r   r   r}   r   r   r   r   r   r   r   r   r   Tr   ):r   rE   r]   r$   r   r0   r   r   r\   Zget_per_channel_overridesr<   rK   rL   rd   re   ZINT8r{   r   rG   rangeZtaker   r   r   r   r   r   r    r   r   r!   r"   r#   r   rN   appendlistr   r   Zconcatenater	   r   rz   r   rc   Zhstackr   r   r   r   r   r   r   r   r   r   r   r   )(r%   ri   rQ   Zchannel_axisrG   r   r]   weightsZweights_rankZis_axis_validZ	axis_normZchannel_countZquant_overrides_for_channelsZnum_channel_overridesZis_axis_override_validZaxis_overrider   Zzero_point_listZ
scale_listZquantized_per_channel_data_listiZper_channel_dataZchannel_override_indexZchannel_quant_overridesr   r   Zquantized_per_channel_datar   Zweights_shapeZreshape_dimsZquantized_weightsZchannel_weightsr   r   r   Zzero_scale_shaper   r   r   r   r(   r(   r)    quantize_weight_per_channel_impll  s   


     


  
 


  


      
    
z.BaseQuantizer.quantize_weight_per_channel_implc                 C   s   | j d krd S | j D ]}|jdkr| jr.q| |s:qt| j |jd  dkrXq|jd | j ks|j	d | j krzq| j |j	d  }t
|tstdt| d|j	d d|| j |jd < q|jdkrttdtd	d
| j |j	d < qd S )N)ZClipZRelur   r   r   z for r   ZSoftmaxr   r|   )ZlowestZhighest)rV   rE   Znodesrk   rM   rm   r0   Zinput_name_to_nodesrD   rC   r   r   r   r   r   r"   )r%   rl   tdr(   r(   r)   adjust_tensor_ranges  s$    


 
 
z"BaseQuantizer.adjust_tensor_ranges)N)r|   )FF)TF)r2   r3   r4   r*   r`   ra   rb   rh   rm   rZ   r   r   r   r   r(   r(   r(   r)   r5   ;   s    
J#
H
^  
 r5   )rv   typingr   r   numpyr   rd   Zonnx.numpy_helperZonnx.reference.op_runr   ImportErrorZ	calibrater   Z
onnx_modelr   Zquant_utilsr   r	   r
   r   r   r   r   r   r   r   r   r\   r   r   r5   r(   r(   r(   r)   <module>   s   
4