U
    U?hŠ<  ã                	   @   s¶  d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZ ddlZddlZddlmZmZ ddlZddlmZmZ ddlmZ dd	lmZmZmZmZmZmZ d
ZeeƒZ d(ee!ef ee!ef e
ee!  e"ddœdd„Z#d)e!ee
ee!  ee!e	ej$ f dœdd„Z%ed Z&ee!ee!eej$ f f e!e
eej$  e
eej$  ddœdd„Z'd*ee!eej$ f e
ee!eej$ f  ee!ee!eej$ f f dœdd„Z(ej$ej$ej$e)e
ej$ dœdd„Z*e!e!ee!ee!ej$f f dœdd„Z+eeej$ ej$f eeej$ ej$f e,dœd d!„Z-e-fee!ee!ej$f f eej$ej$ge,f ee!e,f d"œd#d$„Z.e-fee!ee!eej$ f f eeej$ eej$ ge,f ee!ee!e,f f d%œd&d'„Z/dS )+aÇ  Utilities to run a given ONNX model, while saving input/output tensors of
eligible operator nodes.

A use case is to debug quantization induced accuracy drop. An AI engineer can
run the original float32 model and the quantized model with the same inputs,
then compare the corresponding activations between the two models to find
where the divergence is.

Example Usage:

```python
    class ExampleDataReader(CalibrationDataReader):
        def __init__(self):
            ...
        def get_next(self):
            ...

    input_data_reader = ExampleDataReader()

    augmented_model_path = str(Path(self._tmp_model_dir.name).joinpath("augmented_model.onnx"))
    modify_model_output_intermediate_tensors (path_to_onnx_model, augmented_model_path)

    tensor_dict = collect_activations(augmented_model_path, input_data_reader)
```

`tensor_dict` points to a dictionary where the keys are tensor names and each value
is a list of tensors, one from each model run

é    N)ÚPath)ÚCallableÚDictÚListÚOptionalÚSequenceÚUnion)ÚhelperÚnumpy_helperé   )ÚCalibraterBaseÚCalibrationDataReader)Ú	ONNXModel)ÚDEQUANT_OP_NAMEÚDEQUANT_OUTPUT_SUFFIXÚQUANT_INPUT_SUFFIXÚTENSOR_NAME_QUANT_SUFFIXÚfind_by_nameÚload_model_with_shape_inferZ_ReshapedSavedOutputF)Úinput_model_pathÚoutput_model_pathÚop_types_for_savingÚsave_as_external_dataÚreturnc                 C   sÚ   |dkrg }t | |d}|j}| |¡\}}dtt ¡ ƒ }t tjdgtj	d|¡}	|j
j |	¡ |D ]\}
|
t }tjjd|
|g|g|d}|j
j |¡ t |||
 jjjdg¡}|j
j |¡ qhtj|||d dS )	aà  Augment a given ONNX model to save node input/output tensors.

    Add all input/output tensors of operator nodes to model outputs
    so that their values can be retrieved for debugging purposes.

    Args:
        input_model: the path to load the model.
        op_types_for_saving: Operator types for which the
                input/output should be saved. By default, saving all the
                float32/float16 tensors.

    Returns:
        The augmented ONNX model
    N)Zop_types_to_calibrateZLinearReshape_éÿÿÿÿ©ZdtypeZReshape)ÚinputsÚoutputsÚname)r   )r   ÚmodelZselect_tensors_to_calibrateÚstrÚtimer
   Z
from_arrayÚnumpyÚarrayZint64ÚgraphÚinitializerÚappendÚ_TENSOR_SAVE_POSTFIXÚonnxr	   Z	make_nodeÚnodeZmake_tensor_value_infoÚtypeZtensor_typeZ	elem_typeÚoutputÚsave)r   r   r   r   ZsaverZmodel_to_augmentÚtensorsZvalue_infosZreshape_shape_nameZreshape_shapeÚtensor_nameZreshape_outputZreshape_nodeZreshape_output_value_info© r/   úY/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/quantization/qdq_loss_debug.pyÚ(modify_model_output_intermediate_tensors@   s8    ü  ÿýr1   )Úaugmented_modelÚinput_readerÚexecution_providersr   c                 C   sÀ   |dkrt  ¡ }t jj|_|dkr(dg}t j| ||d}g }|D ]}| | d|¡¡ q@|sdtdƒ‚i }| 	¡ }|D ]F}	t
||	ƒD ]6\}
}|
j t¡r‚|
jdt … }| |g ¡ |¡ q‚qt|S )a´  Run augmented model and collect activations tensors.

    Args:
        augmented_model: Path to augmented model created by modify_model_output_intermediate_tensors ()
        input_reader: Logic for reading input for the model, augmented model have the same
            input with the original model.
        session_options: Optional OnnxRuntime session options for controlling model run.
            By default graph optimization is turned off
        execution_providers: Collection of execution providers for running the model.
            Only CPU EP is used by default.

    Returns:
        A dictionary where the key is tensor name and values are list of tensors from each batch
    NZCPUExecutionProvider)Zsess_optionsÚ	providersz3No data is collected while running augmented model!)ÚonnxruntimeZSessionOptionsZGraphOptimizationLevelZORT_DISABLE_ALLZgraph_optimization_levelZInferenceSessionr&   ÚrunÚRuntimeErrorÚget_outputsÚzipr   Úendswithr'   Ú_TENSOR_SAVE_POSTFIX_LENÚ
setdefault)r2   r3   Zsession_optionsr4   Zinference_sessionZintermediate_outputsZinput_dZoutput_dictZoutput_infoÚbatchr+   Zoutput_dataZoutput_namer/   r/   r0   Úcollect_activationss   s.    
ýr?   Z_1)Úqdq_cmpÚactivation_nameÚpre_qdq_tensorsÚpost_qdq_tensorsr   c                 C   s4   |d k	r0|d k	r0i | |< || | d< || | d< d S )NÚpre_qdqÚpost_qdqr/   )r@   rA   rB   rC   r/   r/   r0   Ú_add_pre_post_qdq_pair¨   s    rF   )Úqdq_activationsÚfloat_activationsr   c                 C   sú   i }|   ¡ D ]´\}}| t¡rN|dttƒ … }|  |¡}|}t||||ƒ q| t¡rˆ|dttƒ … }|  |¡}|}t||||ƒ q| t¡r|dttƒ … }|  |¡}|}t||||ƒ q|sÊ|S |  ¡ D ]"\}}	| |¡}
|
dk	rÒ|
|	d< qÒ|S )a©  Comparing activation values to help debugging accuracy loss due to quantization.

    This functions takes saved activations from the QDQ model and (optionally) the
    float point model, and provides a data structure for comparing:
        * from the qdq model, activation values before and after QDQ operation
        * across both models, activations from the orignal model vs the corresponding
          activations in the QDQ model

    Arg:
        qdq_activations: Output of `collect_activations`. This must be from a quantized
            model with QDQ format.
        float_activations: Output of `collect_activations`. This must be from the float
            point model.

    Returns:
        Dict for comparing pre and post quantized activation tensors. E.g.
        ```
        qdq_cmp = cmp_qdq_input_output(qdq_activations)
        print(qdq_cmp['activation1']['pre_qdq'][0])
        print(qdq_cmp['activation1'][`post_qdq'][0])


        qdq_cmp = cmp_qdq_input_output(qdq_activations, float_activations)
        print(qdq_cmp['activation1']['float'][0])
        print(qdq_cmp['activation1']['pre_qdq'][0])
        print(qdq_cmp['activation1'][`post_qdq'][0])
        ```
    NÚfloat)Úitemsr;   r   ÚlenÚgetrF   r   Ú_POST_QDQ_POSTFIX1)rG   rH   r@   r.   r-   Zpre_namerC   rB   Zact_nameZ
act_valuesZ
float_actsr/   r/   r0   Úcreate_activation_matching´   s0    !







rN   )Úweight_tensorÚweight_scaleÚ	weight_zpÚchannel_axisr   c                 C   sØ   |j |j kst‚|jdkr&| | | S |jdks4t‚t| j ƒ}d||< | j | }d }t|ƒD ]^}|  ||¡}|||  ||  }	|dkršt |	¡ 	|¡}q\t |	¡ 	|¡}
t 
||
f|¡}q\|d krÈd S | 	| j ¡ |S )Nr   r   )ÚshapeÚAssertionErrorÚsizeÚndimÚlistÚrangeZtaker"   ZasarrayÚreshapeÚconcatenate)rO   rP   rQ   rR   Zreshape_dimsZchannel_countZdequantized_weightsÚiZper_channel_dataZdequantized_per_channel_dataZchannel_weightsr/   r/   r0   Ú_run_dequantize_linearò   s&    


r\   )Úfloat_model_pathÚqdq_model_pathr   c                 C   sì  t tt| ƒƒƒ}t tt|ƒƒƒ}i }| ¡ }| ¡ D ]°}|jtkrFq4|jd }t||ƒ}|s`q4| 	t
¡s„t d|› d|› d¡ q4d}	|jD ]}
|
jdkrŽ|
j}	qŽt |¡}t t|jd |ƒ¡}t|jƒdkrêt t|jd |ƒ¡}ntj|jtjd	}|j|j  krdkr4n n| tƒ ¡}| tƒ ¡}|j|jkrZtd
|j› d|j› ƒ‚t||||	d}|dtt
ƒ … }|dkr t d|› d|› d¡ q4t|| ¡ ƒ}|sÎt d| › d|› d¡ q4t |¡}||dœ||< q4|S )aˆ  Comparing weight values to help debugging accuracy loss due to quantization.

    This functions takes the float model and the qdq model, and provides a data structure for comparing
    their corresponding weights to locate quantization errors

    Arg:
        float_model_path: Path points to the float point model.
        qdq_model_path: Path points to the qdq model.

    Returns:
        Dict for comparing weight tensors. E.g.
        ```
        qdq_weight_cmp = create_weight_matching(float_model, qdq_model)
        print(qdq_weight_cmp['activation1']['float'])
        print(qdq_weight_cmp['activation1']['dequantized'])
        ```
    r   zModel Error in 'z': Dequantized tensor name 'z' not recognized!r   Úaxisr   é   r   z2scale and zero_point must have the same shape but z != )rR   Nz': 'z'' per-channel quantization on 0 channelz': weight tensor 'z' not found!)rI   Údequantized)r   r   r   r%   ZnodesZop_typer   Úinputr   r;   r   ÚloggingÚerrorÚ	attributer   r[   r
   Zto_arrayrK   r"   ZzerosrS   Zint32rU   rY   Útupler8   r\   )r]   r^   Zfloat_onnx_modelZqdq_onnx_modelZmatched_weightsZinitializersr)   Úweight_nameZweight_valuesr_   ÚattrrO   rP   rQ   Zweight_quantZfloat_valuesZweight_floatr/   r/   r0   Úcreate_weight_matching  sT    





 ÿ

ri   )ÚxÚyr   c           
      C   s®   t | tjƒr| g}n| }t |tjƒr,|g}n|}t|ƒt|ƒkrHtdƒ‚t |¡ ¡ }t |¡ ¡ }t d¡j}t	tj
 |¡|ƒ}t	tj
 || ¡|ƒ}|| }	dt |	¡ S )Nz%Unequal number of tensors to compare!rI   é   )Ú
isinstancer"   ÚndarrayrK   r8   rZ   ÚflattenZfinfoÚepsÚmaxZlinalgZnormÚmathÚlog10)
rj   rk   ZxlistZylistÚleftÚrightÚepsilonZtensor_normZ	diff_normÚresr/   r/   r0   Ú*compute_signal_to_quantization_noice_ratioU  s    rx   )Úweights_matchÚerr_funcr   c                 C   s0   i }|   ¡ D ]\}}||d |d ƒ||< q|S )NrI   ra   ©rJ   )ry   rz   Úresultrg   Zweight_matchr/   r/   r0   Úcompute_weight_errorm  s    r}   )Úactivations_matchrz   r   c                 C   sZ   i }|   ¡ D ]H\}}i }||d |d ƒ|d< |d }|rL|||d ƒ|d< |||< q|S )NrD   rE   Zqdq_errrI   Z
xmodel_errr{   )r~   rz   r|   r   ÚmatchZ
err_resultZfloat_activationr/   r/   r0   Úcompute_activation_errorw  s    
r€   )NF)NN)N)0Ú__doc__rc   rr   r!   Úpathlibr   Útypingr   r   r   r   r   r   r"   r(   r	   r
   r6   Z	calibrater   r   Z
onnx_modelr   Zquant_utilsr   r   r   r   r   r   r'   rK   r<   r    Úboolr1   rn   r?   rM   rF   rN   Úintr\   ri   rI   rx   r}   r€   r/   r/   r/   r0   Ú<module>   sŠ     	  ü


û6  ü
û2û þý?   þ$H þþ
ýüÿû