U
    U?h<                  	   @   s  d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZ ddlZddlZddlmZmZ ddlZddlmZmZ ddlmZ dd	lmZmZmZmZmZmZ d
ZeeZ d(ee!ef ee!ef e
ee!  e"ddddZ#d)e!ee
ee!  ee!e	ej$ f dddZ%ed Z&ee!ee!eej$ f f e!e
eej$  e
eej$  ddddZ'd*ee!eej$ f e
ee!eej$ f  ee!ee!eej$ f f dddZ(ej$ej$ej$e)e
ej$ dddZ*e!e!ee!ee!ej$f f dddZ+eeej$ ej$f eeej$ ej$f e,dd d!Z-e-fee!ee!ej$f f eej$ej$ge,f ee!e,f d"d#d$Z.e-fee!ee!eej$ f f eeej$ eej$ ge,f ee!ee!e,f f d%d&d'Z/dS )+a  Utilities to run a given ONNX model, while saving input/output tensors of
eligible operator nodes.

A use case is to debug quantization induced accuracy drop. An AI engineer can
run the original float32 model and the quantized model with the same inputs,
then compare the corresponding activations between the two models to find
where the divergence is.

Example Usage:

```python
    class ExampleDataReader(CalibrationDataReader):
        def __init__(self):
            ...
        def get_next(self):
            ...

    input_data_reader = ExampleDataReader()

    augmented_model_path = str(Path(self._tmp_model_dir.name).joinpath("augmented_model.onnx"))
    modify_model_output_intermediate_tensors (path_to_onnx_model, augmented_model_path)

    tensor_dict = collect_activations(augmented_model_path, input_data_reader)
```

`tensor_dict` points to a dictionary where the keys are tensor names and each value
is a list of tensors, one from each model run

    N)Path)CallableDictListOptionalSequenceUnion)helpernumpy_helper   )CalibraterBaseCalibrationDataReader)	ONNXModel)DEQUANT_OP_NAMEDEQUANT_OUTPUT_SUFFIXQUANT_INPUT_SUFFIXTENSOR_NAME_QUANT_SUFFIXfind_by_nameload_model_with_shape_inferZ_ReshapedSavedOutputF)input_model_pathoutput_model_pathop_types_for_savingsave_as_external_datareturnc                 C   s   |dkrg }t | |d}|j}||\}}dtt  }ttjdgtj	d|}	|j
j|	 |D ]\}
|
t }tjjd|
|g|g|d}|j
j| t|||
 jjjdg}|j
j| qhtj|||d dS )	a  Augment a given ONNX model to save node input/output tensors.

    Add all input/output tensors of operator nodes to model outputs
    so that their values can be retrieved for debugging purposes.

    Args:
        input_model: the path to load the model.
        op_types_for_saving: Operator types for which the
                input/output should be saved. By default, saving all the
                float32/float16 tensors.

    Returns:
        The augmented ONNX model
    N)Zop_types_to_calibrateZLinearReshape_ZdtypeZReshape)inputsoutputsname)r   )r   modelZselect_tensors_to_calibratestrtimer
   Z
from_arraynumpyarrayZint64graphinitializerappend_TENSOR_SAVE_POSTFIXonnxr	   Z	make_nodenodeZmake_tensor_value_infotypeZtensor_typeZ	elem_typeoutputsave)r   r   r   r   ZsaverZmodel_to_augmenttensorsZvalue_infosZreshape_shape_nameZreshape_shapetensor_nameZreshape_outputZreshape_nodeZreshape_output_value_info r/   Y/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/quantization/qdq_loss_debug.py(modify_model_output_intermediate_tensors@   s8      r1   )augmented_modelinput_readerexecution_providersr   c                 C   s   |dkrt  }t jj|_|dkr(dg}t j| ||d}g }|D ]}||d| q@|sdtdi }|	 }|D ]F}	t
||	D ]6\}
}|
jtr|
jdt  }||g | qqt|S )a  Run augmented model and collect activations tensors.

    Args:
        augmented_model: Path to augmented model created by modify_model_output_intermediate_tensors ()
        input_reader: Logic for reading input for the model, augmented model have the same
            input with the original model.
        session_options: Optional OnnxRuntime session options for controlling model run.
            By default graph optimization is turned off
        execution_providers: Collection of execution providers for running the model.
            Only CPU EP is used by default.

    Returns:
        A dictionary where the key is tensor name and values are list of tensors from each batch
    NZCPUExecutionProvider)Zsess_options	providersz3No data is collected while running augmented model!)onnxruntimeZSessionOptionsZGraphOptimizationLevelZORT_DISABLE_ALLZgraph_optimization_levelZInferenceSessionr&   runRuntimeErrorget_outputszipr   endswithr'   _TENSOR_SAVE_POSTFIX_LEN
setdefault)r2   r3   Zsession_optionsr4   Zinference_sessionZintermediate_outputsZinput_dZoutput_dictZoutput_infobatchr+   Zoutput_dataZoutput_namer/   r/   r0   collect_activationss   s.    
r?   Z_1)qdq_cmpactivation_namepre_qdq_tensorspost_qdq_tensorsr   c                 C   s4   |d k	r0|d k	r0i | |< || | d< || | d< d S )Npre_qdqpost_qdqr/   )r@   rA   rB   rC   r/   r/   r0   _add_pre_post_qdq_pair   s    rF   )qdq_activationsfloat_activationsr   c                 C   s   i }|   D ]\}}|trN|dtt  }| |}|}t|||| q|tr|dtt  }| |}|}t|||| q|tr|dtt  }| |}|}t|||| q|s|S |  D ]"\}}	||}
|
dk	r|
|	d< q|S )a  Comparing activation values to help debugging accuracy loss due to quantization.

    This functions takes saved activations from the QDQ model and (optionally) the
    float point model, and provides a data structure for comparing:
        * from the qdq model, activation values before and after QDQ operation
        * across both models, activations from the orignal model vs the corresponding
          activations in the QDQ model

    Arg:
        qdq_activations: Output of `collect_activations`. This must be from a quantized
            model with QDQ format.
        float_activations: Output of `collect_activations`. This must be from the float
            point model.

    Returns:
        Dict for comparing pre and post quantized activation tensors. E.g.
        ```
        qdq_cmp = cmp_qdq_input_output(qdq_activations)
        print(qdq_cmp['activation1']['pre_qdq'][0])
        print(qdq_cmp['activation1'][`post_qdq'][0])


        qdq_cmp = cmp_qdq_input_output(qdq_activations, float_activations)
        print(qdq_cmp['activation1']['float'][0])
        print(qdq_cmp['activation1']['pre_qdq'][0])
        print(qdq_cmp['activation1'][`post_qdq'][0])
        ```
    Nfloat)itemsr;   r   lengetrF   r   _POST_QDQ_POSTFIX1)rG   rH   r@   r.   r-   Zpre_namerC   rB   Zact_nameZ
act_valuesZ
float_actsr/   r/   r0   create_activation_matching   s0    !







rN   )weight_tensorweight_scale	weight_zpchannel_axisr   c                 C   s   |j |j kst|jdkr&| | | S |jdks4tt| j }d||< | j | }d }t|D ]^}| ||}|||  ||  }	|dkrt|		|}q\t|		|}
t
||
f|}q\|d krd S |	| j  |S )Nr   r   )shapeAssertionErrorsizendimlistrangeZtaker"   Zasarrayreshapeconcatenate)rO   rP   rQ   rR   Zreshape_dimsZchannel_countZdequantized_weightsiZper_channel_dataZdequantized_per_channel_dataZchannel_weightsr/   r/   r0   _run_dequantize_linear   s&    


r\   )float_model_pathqdq_model_pathr   c                 C   s  t tt| }t tt|}i }| }| D ]}|jtkrFq4|jd }t||}|s`q4|	t
std| d| d q4d}	|jD ]}
|
jdkr|
j}	qt|}tt|jd |}t|jdkrtt|jd |}ntj|jtjd	}|j|j  krdkr4n n|t }|t }|j|jkrZtd
|j d|j t||||	d}|dtt
  }|dkrtd| d| d q4t|| }|std|  d| d q4t|}||d||< q4|S )a  Comparing weight values to help debugging accuracy loss due to quantization.

    This functions takes the float model and the qdq model, and provides a data structure for comparing
    their corresponding weights to locate quantization errors

    Arg:
        float_model_path: Path points to the float point model.
        qdq_model_path: Path points to the qdq model.

    Returns:
        Dict for comparing weight tensors. E.g.
        ```
        qdq_weight_cmp = create_weight_matching(float_model, qdq_model)
        print(qdq_weight_cmp['activation1']['float'])
        print(qdq_weight_cmp['activation1']['dequantized'])
        ```
    r   zModel Error in 'z': Dequantized tensor name 'z' not recognized!r   axisr      r   z2scale and zero_point must have the same shape but z != )rR   Nz': 'z'' per-channel quantization on 0 channelz': weight tensor 'z' not found!)rI   dequantized)r   r   r   r%   ZnodesZop_typer   inputr   r;   r   loggingerror	attributer   r[   r
   Zto_arrayrK   r"   ZzerosrS   Zint32rU   rY   tupler8   r\   )r]   r^   Zfloat_onnx_modelZqdq_onnx_modelZmatched_weightsZinitializersr)   weight_nameZweight_valuesr_   attrrO   rP   rQ   Zweight_quantZfloat_valuesZweight_floatr/   r/   r0   create_weight_matching  sT    






 

ri   )xyr   c           
      C   s   t | tjr| g}n| }t |tjr,|g}n|}t|t|krHtdt| }t| }tdj}t	tj
||}t	tj
|| |}|| }	dt|	 S )Nz%Unequal number of tensors to compare!rI      )
isinstancer"   ndarrayrK   r8   rZ   flattenZfinfoepsmaxZlinalgZnormmathlog10)
rj   rk   ZxlistZylistleftrightepsilonZtensor_normZ	diff_normresr/   r/   r0   *compute_signal_to_quantization_noice_ratioU  s    rx   )weights_matcherr_funcr   c                 C   s0   i }|   D ]\}}||d |d ||< q|S )NrI   ra   rJ   )ry   rz   resultrg   Zweight_matchr/   r/   r0   compute_weight_errorm  s    r}   )activations_matchrz   r   c                 C   sZ   i }|   D ]H\}}i }||d |d |d< |d }|rL|||d |d< |||< q|S )NrD   rE   Zqdq_errrI   Z
xmodel_errr{   )r~   rz   r|   r   matchZ
err_resultZfloat_activationr/   r/   r0   compute_activation_errorw  s    
r   )NF)NN)N)0__doc__rc   rr   r!   pathlibr   typingr   r   r   r   r   r   r"   r(   r	   r
   r6   Z	calibrater   r   Z
onnx_modelr   Zquant_utilsr   r   r   r   r   r   r'   rK   r<   r    boolr1   rn   r?   rM   rF   rN   intr\   ri   rI   rx   r}   r   r/   r/   r/   r0   <module>   s     	  


6  
2 ?   $H 
