U
    U?hl                     @  s~  d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	Z
d dlZd dlZd dlmZ d dlmZ dd	lmZmZ dd
lmZ ddlmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' ddl(m)Z) G dd deZ*eG dd dZ+G dd dZ,eG dd dZ-eG dd dZ.eG dd dZ/eG dd dZ0eG dd dZ1G dd deZ2dS )    )annotationsN)	dataclass)Enum)Any)TensorProto)onnx_pb   )BaseQuantizerQuantizationParams)
TensorData)DEQUANT_OP_NAMEQUANT_OP_NAMEQuantizedValueQuantizedValueType__producer____version__add_dequant_output_suffixadd_dequant_suffixadd_quant_input_suffixadd_quant_output_suffixadd_quant_suffixcompute_scale_zpcompute_scale_zp_float8find_by_nameget_qmin_qmax_for_qType	ms_domainnormalize_axistensor_proto_to_array)CreateQDQQuantizerc                   @  s   e Zd ZdZdZdZdS )QDQQuantTensorTyper   r      N)__name__
__module____qualname__
ACTIVATIONWEIGHTZBIAS r&   r&   X/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/quantization/qdq_quantizer.pyr   ,   s   r   c                   @  s   e Zd ZU ded< ded< dS )QDQQuantParamProviderstr
input_name	node_nameNr!   r"   r#   __annotations__r&   r&   r&   r'   r(   5   s   
r(   c                   @  s    e Zd ZejdddfddZdS )QDQTensorQuantInfoNc                 C  s2   || _ || _|| _|d k	| _|d k	s(t|| _d S N)tensor_typequant_para_provideraxis	is_sharedAssertionError	data_type)selfr0   r1   r2   r5   r&   r&   r'   __init__>   s    
zQDQTensorQuantInfo.__init__)r!   r"   r#   r   r$   r7   r&   r&   r&   r'   r.   =   s   r.   c                   @  s.   e Zd ZU ded< ded< ded< ded< dS )QDQBiasQuantInfor)   r+   r*   weight_namefloatbetaNr,   r&   r&   r&   r'   r8   H   s   
r8   c                   @  s&   e Zd ZU ded< ded< ded< dS )QDQTensorQuantParamsr
   originalzQuantizationParams | None	convertedset[str] | Noneconverted_recv_nodesNr,   r&   r&   r&   r'   r<   S   s   
r<   c                   @  s   e Zd ZU ded< ded< dS )QDQScaleZpInitializersr   scale
zero_pointNr,   r&   r&   r&   r'   rA   [   s   
rA   c                   @  s&   e Zd ZU ded< ded< ded< dS )QDQTensorScaleZpInitializersrA   r=   zQDQScaleZpInitializers | Noner>   r?   r@   Nr,   r&   r&   r&   r'   rD   d   s   
rD   c                   @  s4   e Zd ZU ded< ded< ded< dddd	Zd
S )QDQTensorQuantizedValuer   r=   zQuantizedValue | Noner>   r?   r@   returnc                 C  s6   | j d kr| jS | jd kr | j S || jkr0| j S | jS r/   )r>   r=   r@   )r6   Zconsumer_node_namer&   r&   r'   get_for_consumert   s
    

z(QDQTensorQuantizedValue.get_for_consumerN)r!   r"   r#   r-   rH   r&   r&   r&   r'   rE   n   s   
rE   c                   @  s  e Zd Zd`ddZdd Zdd Zdejfdd	Zd
dddZ	d
d
d
dddZ
d
dddZdd ZdaddZdd Zdd Zdd Zdd Zdbd
d
d
d
d
d d!d"d#Zdcd
d
d
d
d
d d$d%d&Zddd'd(Zded)d*Zdfd+d,Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd
dd5d6Zdgd8d9d:d:d;d<d=d>Zdhd
d?d@dAdBdCdDZdid
d9d?d:d:d;dFdGdHZd
dId
dJdKdLZdjd
dNd
dOdPdQdRZ d
dSdTdUdVZ!dWdXdNdYdZd[Z"d\d]d^d_Z#dS )kQDQQuantizerNc                   s&  t | |||||||||	|
 i | _i | _g | _|
dg | _|
dd| _|
dd| _|
dd| _	i | _
|
di | _|
ddrtnd | _|
d	d| _| jd
k rtjtjtjtjf t fdd| jD }| js| j ks| j ks|rtdt d t| _|  | _i | _d S )NZ"OpTypesToExcludeOutputQuantizationZAddQDQPairToWeightFZQuantizeBiasTZDedicatedQDQPairZ QDQOpTypePerChannelSupportToAxisZUseQDQContribOpsZQDQKeepRemovableActivations   c                 3  s   | ]}|j  kV  qd S r/   )r0   ).0tZopset21_typesr&   r'   	<genexpr>   s    z(QDQQuantizer.__init__.<locals>.<genexpr>zONNX QuantizeLinear and DequantizeLinear operators do not support 16-bit/4-bit integer quantization types prior to opset 21. The domain of QuantizeLinear and DequantizeLinear operators will be set to 'z' to enable support.)r	   r7   tensors_to_quantizebias_to_quantizenodes_to_removegetZ'op_types_to_exclude_output_quantizationadd_qdq_pair_to_weightquantize_biasdedicated_qdq_pairtensor_to_its_receiving_nodes'qdq_op_type_per_channel_support_to_axisr   qdq_op_domainZqdq_keep_removable_activationsopset_versionr   ZUINT16ZINT16ZUINT4ZINT4anyZtensor_quant_override_qtypesactivation_qTypeweight_qTypeloggingwarningcalc_graph_quant_paramsquantization_paramsquantized_value_map)r6   modelper_channelreduce_ranger\   r[   tensors_rangeZnodes_to_quantizeZnodes_to_excludeZop_types_to_quantizeZextra_optionsZoverrides_have_opset21_typesr&   rM   r'   r7      sT    

zQDQQuantizer.__init__c                 C  sL   t || j }|dk	r|jS || jkrH| j| }|jdrH|jjjS dS )2
        Check if tensor can be quantized
        Nr0   )	r   rb   initializerr5   value_infostypeHasFieldr0   	elem_typer6   tensor_nameweightvir&   r&   r'   _get_tensor_type   s    


zQDQQuantizer._get_tensor_typec                 C  s   t || j }|dk	r4|jtjjtjjfkrdS nN|| jkrp| j| }|j	
dr|j	jjtjtjfkrdS ntd| d dS )rf   NTr0   z$failed to infer the type of tensor: z6. Skip to quantize it. Please check if it is expected.F)r   rb   rg   r5   
onnx_protor   FLOATFLOAT16rh   ri   rj   r0   rk   r]   r^   rl   r&   r&   r'   _is_tensor_quantizable   s    


z#QDQQuantizer._is_tensor_quantizablec                 C  sv   |  |rr|rLt|ts,tdt| d| |}t|||d| j|< n&|| jkrr| |}t||d| j|< dS )a  
        Adds a tensor to the list (actually a dict) of tensors to quantize. Called indirectly by op quantizers that
        want to quantize a tensor (i.e., "mark" a tensor for quantization).

        If quant_sharing_provider is not None, tensor with name tensor_name will be quantized with the same
        quantization parameters as the node input specified in quant_sharing_provider. Ex: A Tranpose node's output
        will typically use the same quantization parameter initializers used at the Transpose node's input.

        Args:
            tensor_name: name of the tensor to quantize
            quant_sharing_provider: name of the tensor and node that provides quantization parameter
            tensor_type: QDQQuantTensorType default ACTIVATION
        zBquant_sharing_provider must be of type QDQQuantParamProvider, not .)r0   r1   r5   )r0   r5   N)rt   
isinstancer(   	TypeErrorri   rp   r.   rO   )r6   rm   Zquant_sharing_providerr0   r5   r&   r&   r'   Z__quantize_tensor   s    


  

zQDQQuantizer.__quantize_tensorr)   )rm   c                 C  s   |  |dtjS )z
        Adds a tensor to the list of tensors to quantize. Called by op quantizers that
        want to quantize a tensor (i.e., "mark" a tensor for quantization).

        Args:
            tensor_name: name of the tensor to quantize
        N)_QDQQuantizer__quantize_tensorr   r$   r6   rm   r&   r&   r'   quantize_activation_tensor  s    z'QDQQuantizer.quantize_activation_tensor)output_namer*   r+   c                 C  s   |  |t||tjS )a  
        Adds a tensor to the list of tensors to quantize. Called by op quantizers that
        want to quantize an output tensor using the same quantization parameters as one of the node's inputs.

        Ex: A Tranpose node's output will typically use the same quantization parameter initializers used at
        the Transpose node's input.

        Args:
            output_name: name of the node output to quantize so that it uses the same quantization params as an input.
            input_name: name of the node input from which the output tensor will get its quantization params.
            node_name: name of the node that consumes `input_name`.
        )rx   r(   r   r$   )r6   r{   r*   r+   r&   r&   r'   quantize_output_same_as_input&  s
      z*QDQQuantizer.quantize_output_same_as_inputc                 C  s   |  |dtjS )z
        Adds a tensor to the list of weight tensors to quantize. Called by op quantizers that
        want to quantize a weight (i.e., "mark" a weight for quantization).

        Args:
            tensor_name: name of the weight to quantize
        N)rx   r   r%   ry   r&   r&   r'   quantize_weight_tensor7  s    z#QDQQuantizer.quantize_weight_tensorc                 C  sZ   t || j }|rD|jtjjtjjfkrVtt	j
||jd| j|< ntd| d d S )N)r0   r2   r5   z9only support per-channel quantization on weight. Tensor: z is not quantized.)r   rb   rg   r5   rq   r   rr   rs   r.   r   r%   rO   r]   r^   )r6   rm   r2   rn   r&   r&   r'   "quantize_weight_tensor_per_channelA  s      z/QDQQuantizer.quantize_weight_tensor_per_channel      ?c           	      C  s   | j |rPtd| d | j|dd\}}|rB| || n
| | dS t|| j	 }|dk	r|j
tjjtjjfkr|| jkrt||||| j|< qtd| d ntd| d	 dS )
a  
        Adds a bias tensor to the list of bias tensors to quantize. Called by op quantizers that
        want to quantize a bias with bias_zero_point = 0 and bias_scale = input_scale * weight_scale * beta.
        TODO: Explain the reasoning for using this formula.

        Args:
            node_name: name of the node that consumes the bias, input, and weight tensors.
            bias_name: name of the bias tensor to quantize.
            input_name: name of the input tensor whose scale is used to compute the bias's scale.
            weight_name: name of the weight tensor whose scale is used to compute the bias's scale.
            beta: Multiplier used to compute the bias's scale.
        zQuantizing bias tensor 'z=' as a weight due to the presence of user-specified overridesr   )default_axisNzBias z) has already been marked for quantizationz	Expected z to be a weight)tensor_quant_overridesrR   r]   infois_tensor_per_channelr~   r}   r   rb   rg   r5   rq   r   rr   rs   rP   r8   r^   )	r6   r+   	bias_namer*   r9   r;   Zis_per_channelr2   rn   r&   r&   r'   quantize_bias_tensorK  s     


z!QDQQuantizer.quantize_bias_tensorc                 C  s   | j | d S r/   )rQ   append)r6   noder&   r&   r'   remove_noden  s    zQDQQuantizer.remove_nodec                 C  s   | j | j d S r/   )rb   remove_nodesrQ   )r6   r&   r&   r'   r   q  s    zQDQQuantizer.remove_nodesc                 C  s   | j  D ]P}| |r
t| |}|  |jD ](}|| jkrHg | j|< | j| | q0q
|   | 	  | j
rz|   |   | js| j   t| j j _t| j j _| jtkr| j td | j j S )Nr   )rb   ZnodesZshould_quantize_noder   quantizeinputrV   r   _quantize_normal_tensors_quantize_sharing_param_tensorsrT   _quantize_bias_tensorsr   rS   Zclean_initializersr   Zproducer_namer   Zproducer_versionrX   r   Zset_opset_import)r6   r   Zop_quantizerrm   r&   r&   r'   quantize_modelt  s(    








zQDQQuantizer.quantize_modelc                 C  s   || j kr|| j | jd kr|| j | jd kr|t| j | dkr|| j|s|| j|s|| j|| || jkrx| j|= dS dS )Nr   TF)	r`   r>   lenrb   Zinput_name_to_nodesis_graph_outputZis_graph_inputreplace_output_of_all_nodesrO   )r6   Zupstream_output_namer{   r&   r&   r'   try_replacing_upstream_output  s"    


z*QDQQuantizer.try_replacing_upstream_outputz
int | None)q_inputq_outputquant_node_name
scale_namezp_namer2   c                 C  s4   t jjt|||g|g||| jd}| j|g dS )zI
        Creates a QuantizeLinear node and adds it to the model.
        r2   domainN)onnxhelper	make_noder   rX   rb   	add_nodes)r6   r   r   r   r   r   r2   qlinear_noder&   r&   r'   _create_q_node  s    zQDQQuantizer._create_q_node)dq_input	dq_outputdequant_node_namer   r   r2   c                 C  s4   t jjt|||g|g||| jd}| j|g dS )zK
        Creates a DequantizeLinear node and adds it to the model.
        r   N)r   r   r   r   rX   rb   r   )r6   r   r   r   r   r   r2   dequant_noder&   r&   r'   _create_dq_node  s    zQDQQuantizer._create_dq_nodec
                 C  sX   t jjt|||g|g||	| jd}
t jjt|||g|g||	| jd}| j|
|g d S )Nr   )r   r   r   r   rX   r   rb   r   )r6   r   r   r   r   r   r   r   r   r2   r   r   r&   r&   r'   _create_qdq_nodes  s"    zQDQQuantizer._create_qdq_nodesc                 C  s  |j }|d k	rh| jdk r td|tjkr0| jn| j}|tjj	j
krLtj	j}| j|||| jd\}}}n*| j||tjkr~| jn| j| jd\}}}t|}	| j||	 | jrt|}
| ||
t||
|	t||||	 n2tjjt|||g|	gt||| jd}| j| d S )N   zLPer-Channel support with QDQ format requires onnx opset version 13 or above.)keep_float_weightr   )namerY   
ValueErrorr   r%   r\   r[   r   r   r   ZUINT8rq   ZINT8quantize_weight_per_channelrS   quantize_initializerr   rb   replace_input_of_all_nodesr   r   r   r   r   r   r   rX   add_node)r6   Zweight_protor0   r2   r9   Zqtypeq_weight_namer   r   Zweight_dequant_outputZweight_quant_outputr   r&   r&   r'   _add_qdq_pair_for_initializer  sT    
z*QDQQuantizer._add_qdq_pair_for_initializerc                 C  s|  | j r|| jkrt| j| dkrt| j| }t|D ]}d|d  }t|| }t|| }	t|| }
t|| }| |||
||	||| | j| | }| j	
|||	 |dkr8t||	||tj|d}t|d d | j|< q8n|}t|}| j	|rt|}|}| j	|| n| j	|| | |t|t|t||t||| t||||tj|d}t|d d | j|< d S )Nr   _r   
scale_type)rU   rV   r   ranger   r   r   r   r   rb   Zreplace_node_inputr   r   InputrE   ra   r   r   r   r   )r6   rm   r   r   r5   Znum_dedicated_qdq_pairiZpostfixZ tensor_name_quant_output_postfixZ"tensor_name_dequant_output_postfixZquant_node_name_postfixZdequant_node_name_postfixr   quantized_valuer   r   r&   r&   r'   _add_qdq_pair_for_activation  sz    z)QDQQuantizer._add_qdq_pair_for_activationc                 C  s  t dd | j|g D }| jrF|| jkrFt| j| dkrFtd|}	|dkr^|}t  }	n|	| }	t|t|k}
| j|}|}|rt|}| j	|| t
|}| ||t||| t|}|r|
s|}|	r||kr| j|||	 | ||t||| |}|
s8t| d}| ||t| d|| t
| d}| ||t| d|| t| d}|r|
r|}|r||kr| j||| | ||t| d|| t||||tj|d}t||||tj|d}t|||| j|< dS )	a  
        Adds Q and DQ ops to a tensor whose quantized data type is converted. That is, some consumers may use the
        original data type from the producer, while other consumers use the converted data type.
        This is generally done by adding a sequence of ops that convert from one data type (e.g., uint8) to another (e.g., uint16).

        T_float ---> Quant(to u8) ---> Convert(to u16) ---> Dequant(to float) ---> T_float'
        where Convert(to u16) is equivalent to: ---> Dequant(to float) ---> Quant(to u16) --->

        This function handles the following scenarios:

        1) Tensor T is not a graph output; all consumers use the converted type

            <Producer> ---> Q1 ---> DQ1 ---> Q2 ---> DQ2 ---> <Consumers>

        2) Tensor T is not a graph output; some consumers use the original type, others use the converted type

            <Producer> ---> Q1 -+-> DQ1 ---> <Consumers of original type>
                                |
                                +-> DQ1' ---> Q2 ---> DQ2 ---> <Consumers of converted type>

        3) Tensor T is a graph output; all consumers use the converted type

            <Producer> ---> Q1 ---> DQ1 ---> Q2 ---> DQ2 -+-> <Consumers>
                                                          |
                                                          +-> <Graph output>

        4) Tensor T is a graph output; some consumers use the original type, others use the converted type

            <Producer> ---> Q1 -+-> DQ1 -+-> <Consumers of original type>
                                |        |
                                |        +-> <Graph output>
                                |
                                +-> DQ1' ---> Q2 ---> DQ2 ---> <Consumers of converted type>

        5) Tensor T is a graph output that is not consumed by any other nodes.

            <Producer> ---> Q1 ---> DQ1 ---> Q2 ---> DQ2 ---> <Graph output>
        c                 S  s   g | ]
}|j qS r&   )r   )rK   r   r&   r&   r'   
<listcomp>  s     zFQDQQuantizer._add_qdq_ops_for_converted_activation.<locals>.<listcomp>r   z|Do not currently support converted quant_types in TensorQuantOverrides when the `dedicated_qdq_pair` extra_option is enabledN_convertZ_convert_cloner   )setrV   rR   rU   r   r   rb   r   r   r   r   r   r   r   Zreplace_input_of_nodesr   r   r   r   r   rE   ra   )r6   rm   Zfirst_scale_nameZfirst_zp_nameZscale_data_typeZconvert_scale_nameZconvert_zp_nameZconvert_recv_nodesZtensor_recv_nodesZoriginal_recv_nodesZall_use_convertedr   Zfirst_q_inputZfirst_q_outputZfirst_dq_outputZsecond_q_inputZsecond_q_outputZsecond_dq_outputZoriginal_quantized_valueZconverted_quantized_valuer&   r&   r'   %_add_qdq_ops_for_converted_activationY  s    0        			  z2QDQQuantizer._add_qdq_ops_for_converted_activationc              
   C  s   | j   D ]\}}|| jkr"q|jst|| j }|rP| ||j	|j
 n| |}|sntd| d|jdkr| j||jjj|jjj|jd nF|j|jjjkst| ||jjj|jjj|j|jjj|jjj|j | j |= qdS )z}
        Adds Q/DQ ops to tensors (activations and weights) that have been marked for quantization by op quantizers.
        z4Quantization parameters are not specified for param zb. In static mode quantization params for inputs and outputs of nodes to be quantized are required.N)r5   )rO   copyitemsra   r3   r   rb   rg   r   r0   r2   "_make_tensor_scale_zp_initializersr   r>   r   r=   rB   r   rC   r5   r4   r   r@   )r6   rm   tensor_inforg   Ztensor_qparam_initializersr&   r&   r'   r     s<    





z%QDQQuantizer._quantize_normal_tensorsc              
   C  s   | j r| j   D ]\}}|j}|r|j| jkr| j |= | j|j |j}| |r`t	dd}d}|| j
kr| j
| }|jr| ||jd}|j}|dkr| ||j|j q| ||j|j|jj|jj|jj| qq dS )a{  
        Adds Q/DQ ops to tensors that have been marked for quantization by op quantizers.
        Only operates on tensors that want to use the quantization parameter initializers from an upstream tensor.
        For example, a Transpose node's output tensor will typically want to use the same quantization parameter
        initializers as the Transpose node's input.
        zBQuantization parameter shared mode is not supported for weight yetNr   )rO   r   r   r1   r*   ra   rH   r+   Zis_input_a_initializerr   r`   r>   _make_scale_zp_initializersr@   r   r   r   r   rB   r5   r   rC   )r6   rm   r   Zquant_providerr   Zconverted_qparam_initsr@   tensor_paramsr&   r&   r'   r   $  sH    


    z,QDQQuantizer._quantize_sharing_param_tensorsc              	   C  sl  | j  D ]Z\}}|| jkr q
| || t|| j }| j| | j| j}|j	dkrt
|jtstdt|j d|jt|}tjjd|jg|g||jd}n|j	dkrH|jtjjtjjtjjhkrtd|j d|j|j|jg}t|}|jdk	r,tjjd	||g||j| jd
}ntjjd	||g|| jd}ntd|j	d| j| q
dS )zq
        Adds DQ ops (or Cast) for bias tensors that have been marked for quantization by op quantizers.
        ZCastUnexpected type z for input=)r   to)NDequantizeLinearzUnexpected quantize type z for DequantizeLinear.Nr   r   )r   zUnexpected operator type ru   ) rP   r   ra   quantize_bias_staticr   rb   rg   Zremove_initializerr=   	node_typerv   r5   intrw   ri   r*   r   r   r   r   q_name
node_qtyper   rs   ZBFLOAT16rr   RuntimeErrorr   r   r2   rX   r   )r6   r   	bias_infoinitZquant_valuer+   r   inputsr&   r&   r'   r   V  sZ    

	z#QDQQuantizer._quantize_bias_tensorsc                 C  s   || j kp|| jkS r/   )rO   rP   ry   r&   r&   r'   is_tensor_quantized  s    z QDQQuantizer.is_tensor_quantizedFzonnx.TensorProtozonnx.TensorProto.DataTypeboolztuple[str, str, str])rn   qTyperd   r   rG   c           	      C  st   |j | jkr*| j|j  j}|j|j|jfS | ||||\}}}t|j |||tj	d}t
|dd| j|j < |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        N)r   ra   r=   r   r   r   Zquantize_initializer_implr   r   InitializerrE   )	r6   rn   r   rd   r   r   r   r   r   r&   r&   r'   r     s,       
z!QDQQuantizer.quantize_initializerr   z
str | Noneztuple[bool, int | None])rm   r   op_typerG   c                 C  s   | j |}|dkrdS | j|r(dS | j|}| jsB|sBdS |rT| j||n|}|rt| j|}|d d }t|}t	|j
}	t||	\}
}|
std| d| d|	  dS d|fS )	a  
        Checks if a given tensor is configured to be quantized per-channel. If so, also returns the channel axis.

        ORT only supports per-channel quantization on static weights (i.e., ONNX initializers). If the user did not provide
        tensor quantization overrides for this tensor, then the value of self.per_channel determines if the weight
        is to be quantized per-channel.

        Params:
            tensor_name: The name of the tensor to check.
            default_axis: The default channel axis. This method checks if the normalized axis is within bounds.
                          Can be overridden via the extra_options 'QDQOpTypePerChannelSupportToAxis'
                          and 'TensorQuantOverrides'.
            op_type: Optional, defaults to None. The operator type that is the only consumer of this weight.
                     Used to access the extra option 'QDQOpTypePerChannelSupportToAxis'.
        Returns:
            A tuple (is_per_channel, axis) in which the first element indicates whether the tensor is
            quantized per-channel and the second element is the channel axis.
            The returned axis is only None if the tensor is not per-channel or the axis is out of bounds.
        N)FNr   r2   zAxis z is out-of-range for weight 'z' with rank T)ZinitializersrR   r   Zhas_per_tensor_overridesZhas_per_channel_overridesrc   rW   Zget_per_channel_overridesr   r   shaper   r]   r^   )r6   rm   r   r   weight_initializerZhas_per_chan_overridesr2   Zper_chan_overridesZweight_nparrayZweight_rankZ
axis_validr&   r&   r'   r     s&    

z"QDQQuantizer.is_tensor_per_channelT)r9   r\   channel_axisrd   r   rG   c           
      C  sn   || j kr&| j | j}|j|j|jfS | |||||\}}}	t|||	|tjd }t	|d d | j |< |||	fS r/   )
ra   r=   r   r   r   Z quantize_weight_per_channel_implr   r   r   rE   )
r6   r9   r\   r   rd   r   r   r   r   r   r&   r&   r'   r     s.    	
    
z(QDQQuantizer.quantize_weight_per_channelr8   )r   r   rG   c              
   C  s   || j kr| j | jjS | j |j jj}t|| j }t|}| j |j	 
|jj}t|| j }t|}| ||||j\}	}
}}}}t||	|
|tj|jdkrdnd||d}t|dd| j |< |	S )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        r   r   N)r   r   )ra   r=   r   r9   r   r   rb   rg   r   r*   rH   r+   Zquantize_bias_static_implr;   r   r   r   sizerE   )r6   r   r   Zweight_scale_namer   Zweight_scaleZinput_scale_nameZinputscale_initializerZinput_scaleZquantized_bias_nameZquantized_bias_scale_nameZquantized_bias_zp_nameZbias_scale_datar   r   r   r&   r&   r'   r   	  s:    
	
z!QDQQuantizer.quantize_bias_static r
   rA   )
param_nameparamsinit_name_suffixrG   c                 C  s:  t |d g}t|d dr4|d jt jt jfkrPtdt|d  d|t |d g}|jt jkspt	|j
d| j}g }|d | }g }	|d | }
tj||||  }| j| |jt jkrtjj}n,|jt jkrtjj}ntd	|j d
|tj|
||	|d }| j| t||S )z
        Creates and returns scale and zero-point initializers for the given quantization params. The initializers are
        named:
            - {param_name}_zero_point{init_name_suffix}
            - {param_name}_scale{init_name_suffix}
        rC   rB   dtyper   z and param_name=
quant_typeZ_zero_pointZ_scalezUnexpected dtype=z for param_name=))nparrayhasattrr   Zfloat32Zfloat16r   ri   Zfloat64r4   datarR   r[   r   r   Zmake_tensorZraveltolistrb   Zadd_initializerrq   r   rr   rs   ZreshaperA   )r6   r   r   r   Zzero_point_valuesZscale_valuesZzero_point_typeZzero_point_shapeZzero_point_nameZscale_shaper   Zinit_zpr   Z
init_scaler&   r&   r'   r   5  s2    	$   


z(QDQQuantizer._make_scale_zp_initializersz#QDQTensorScaleZpInitializers | None)rm   rG   c                 C  s   | j dks|| j kr*td| d dS | j | }t|tsXtdt| d|d| ||j}|j	r|| ||j	dnd}t
|||jS )a  
        Create and returns all scale/zero_point initializers for a given tensor. If the tensor is converted
        to a different quantization type, this function creates two pairs of zp/scale initializers. Otherwise,
        only one pair of zp/scale initializers is created.
        Nz$Quantization parameters for tensor:"z" not specifiedr    for ru   r   )r`   r]   r   rv   r<   rw   ri   r   r=   r>   rD   r@   )r6   rm   r   Zoriginal_initsZconverted_initsr&   r&   r'   r   [  s    

z/QDQQuantizer._make_tensor_scale_zp_initializersr   zdict[str, Any])tensor_dataquant_overridesrG   c                 C  s   | j }d|kr|d j}d|kr<d|kr<|d |d  }}n|tjjkr^t||jd \}}nh|d|jd }|d|jd }|d| j	}|d	d
}	t
||	|d\}
}t|||
||| j\}}t|||dS )z
        Calculates quantization parameters (scale/zero-point) given a tensor's min/max range and optional
        user-provided overrides.
        r   rB   rC   r   rminr   rmax	symmetricrd   F)rd   r   )rC   rB   r   )r[   r0   r   r   ZFLOAT8E4M3FNr   Zavg_stdrR   Zrange_valueZis_activation_symmetricr   r   Zmin_real_ranger
   )r6   r   r   r   zerorB   r   r   r   rd   ZqminZqmaxr&   r&   r'   calc_quant_paramsr  s    
zQDQQuantizer.calc_quant_paramszdict[str, QDQTensorQuantParams]rF   c                 C  s   | j dkri S |   i }| j D ]}| j | }t|tsRtdt| d|d| jj|i d}| ||}d}d}d|kr| ||d }|d 	d}t
|||||< q |S )z
        Calculates quantization parameters (scale/zero-point) for all tensors in the graph using each tensor's min/max range
        and optional user-provided overrides.
        Nr   r   ru   )Zdefault_valconvertZ
recv_nodes)re   Zadjust_tensor_rangesrv   r   rw   ri   r   Zget_per_tensor_overridesr   rR   r<   )r6   r`   rm   tdr   r=   r>   r@   r&   r&   r'   r_     s"    



z$QDQQuantizer.calc_graph_quant_params)N)r   )N)N)N)N)N)FF)N)TF)r   )$r!   r"   r#   r7   rp   rt   r   r$   rx   rz   r|   r}   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r_   r&   r&   r&   r'   rI      sT    
Z



#   

8
C .27  + 7   - &rI   )3
__future__r   r]   dataclassesr   enumr   typingr   numpyr   r   Zonnx.numpy_helperr   r   rq   Zbase_quantizerr	   r
   Z	calibrater   Zquant_utilsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   registryr   r   r(   r.   r8   r<   rA   rD   rE   rI   r&   r&   r&   r'   <module>   s8   P	
	