U
    U?hP                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZmZmZ d dlZd dlZd dlmZmZmZmZ d dlZddlmZmZmZ ejejejdd	d
Zd,ejejee eejdddZG dd dZ G dd dZ!G dd deZ"G dd de j#dZ$G dd dZ%G dd de%Z&G dd de%Z'G dd de'Z(G dd  d e'Z)G d!d" d"e'Z*G d#d$ d$e j#dZ+G d%d& d&e+Z,dd'e"j-d(i fee.ef eee.  d)d*d+Z/dS )-    N)Enum)Path)DictOptionalSequenceTupleUnion)
ModelProtoTensorProtohelpernumpy_helper   )
apply_plotload_model_with_shape_infersmooth_distribution)pkqkreturnc                 C   s|   t j| j| jd}| dd t | dd |dd   |dd< | dk|dk@ }d||< | dk|dk@ }t j|| < |S )z
    See https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.rel_entr.html#scipy.special.rel_entr.
    Python implementation.
    dtypeNr   )npemptyshaper   loginf)r   r   resc2c1 r   T/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/quantization/calibrate.pyrel_entr   s    2r    )r   r   baseaxisr   c                 C   s   |dks|dkst d|dk	s(t dt| tj} d|  tj| |dd } t|tj}t| |\} }d| tj||dd }t| |}tj||d}|dk	r|t| }|| j	S )	z
    Simplifeied version of entropy.
    Source: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.entropy.html.
    This avoids taking a dependency on scipy just for this function.
    Nr   z0base={base} must be a positive number or `None`.z
qk is None      ?T)r"   keepdimsr"   )
AssertionErrorr   asarrayastypefloat32sumZbroadcast_arraysr    r   r   )r   r   r!   r"   Zvecsr   r   r   entropy'   s    
r,   c                   @   sT   e Zd ZedddddddgZedddddgZdd	 Zed
d Zedd Z	dS )
TensorDataavgstdlowesthighesthist
hist_edgesbinsc                 K   s   |  D ]\}}|tjkr2td|dtj d|tjkrt|ds^tdt| d||jtj	tj
fkrtd|j d|t| || qd S )NzUnexpected value z not in .r   Unexpected type z for k=zUnexpected dtype )itemsr-   _allowed
ValueError_floatshasattrtyper   r   Zfloat16r)   setattr)selfkwargskvr   r   r   __init__G   s    


zTensorData.__init__c                 C   s4   t | drt | ds(tdt|  d| j| jfS )Nr0   r1   z0Attributes 'lowest' and/or 'highest' missing in r5   )r;   AttributeErrordirr0   r1   r>   r   r   r   range_valueR   s    zTensorData.range_valuec                 C   s4   t | drt | ds(tdt|  d| j| jfS )Nr.   r/   z)Attributes 'avg' and/or 'std' missing in r5   )r;   rC   rD   r.   r/   rE   r   r   r   avg_stdX   s    zTensorData.avg_stdN)
__name__
__module____qualname__	frozensetr8   r:   rB   propertyrF   rG   r   r   r   r   r-   C   s   
r-   c                   @   sZ   e Zd Zeeeeef f dddZdd Z	dd Z
dd	 Zd
d Zdd Zdd ZdS )TensorsDatadatac              	   C   s   || _ i | _| D ]\}}t|ts:tdt| dt|tr|tj	krvt
|dkrvt|d |d d| j|< qt
|dkrt|d |d |d |d d	| j|< qtd
|ddt
| d| dt|tstdt| d|| j|< qd S )NzKeys must be strings not r5      r   r   r0   r1         )r0   r1   r2   r4   zUnexpected tuple for rz	, it has z elements: zValues must be TensorData not )calibration_methodrO   r7   
isinstancestr	TypeErrorr<   tupleCalibrationMethodMinMaxlenr-   )r>   rU   rO   r@   rA   r   r   r   rB   `   s     

&"
zTensorsData.__init__c                 c   s   | j E d H  d S NrN   rE   r   r   r   __iter__r   s    zTensorsData.__iter__c                 C   s
   || j kS r]   rN   r>   keyr   r   r   __contains__u   s    zTensorsData.__contains__c                 C   s
   | j | S r]   rN   r_   r   r   r   __getitem__x   s    zTensorsData.__getitem__c                 C   s(   || j krtd|d|| j |< d S )Nz)Only an existing tensor can be modified, z is not.)rO   RuntimeError)r>   r`   valuer   r   r   __setitem__{   s    
zTensorsData.__setitem__c                 C   s
   | j  S r]   )rO   valuesrE   r   r   r   rf      s    zTensorsData.valuesc                 C   s
   | j  S r]   )rO   r7   rE   r   r   r   r7      s    zTensorsData.itemsN)rH   rI   rJ   r   rW   r   r-   r   rB   r^   ra   rb   re   rf   r7   r   r   r   r   rM   _   s   rM   c                   @   s   e Zd ZdZdZdZdZdS )rZ   r   r   rP   rS   N)rH   rI   rJ   r[   Entropy
PercentileDistributionr   r   r   r   rZ      s   rZ   c                   @   sT   e Zd Zedd ZejedddZdd Z	dd	 Z
d
d ZeedddZdS )CalibrationDataReaderc                 C   s   t |drt|jptS )Nget_next)r;   callablerk   NotImplemented)clssubclassr   r   r   __subclasshook__   s    z&CalibrationDataReader.__subclasshook__r   c                 C   s   t dS )z9generate the input data dict for ONNXinferenceSession runNNotImplementedErrorrE   r   r   r   rk      s    zCalibrationDataReader.get_nextc                 C   s   | S r]   r   rE   r   r   r   r^      s    zCalibrationDataReader.__iter__c                 C   s   |   }|d krt|S r]   )rk   StopIteration)r>   resultr   r   r   __next__   s    zCalibrationDataReader.__next__c                 C   s   t d S r]   rr   rE   r   r   r   __len__   s    zCalibrationDataReader.__len__)start_index	end_indexc                 C   s   t d S r]   rr   )r>   rx   ry   r   r   r   	set_range   s    zCalibrationDataReader.set_rangeN)rH   rI   rJ   classmethodrp   abcabstractmethoddictrk   r^   rv   rw   intrz   r   r   r   r   rj      s   
rj   )	metaclassc                   @   s~   e Zd Zdeeef eee  dddZdgfdd	Z	d
d Z
edddZdd Zdd ZedddZedddZdS )CalibraterBaseNaugmented_model.onnxF
model_pathop_types_to_calibratec                 C   sn   t |trtt|| _nt |tr0t|| _ntd|| _|| _|| _|| _	|| _
d| _d| _dg| _dS )a  
        :param model_path: ONNX model to calibrate. It should be a model file path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param use_external_data_format: use external data format to store model which size is >= 2Gb.
        :param per_channel: whether to compute ranges per each channel.
        z model_path should be model path.NCPUExecutionProvider)rV   rW   r   r   modelr9   r   augmented_model_path	symmetricuse_external_data_formatper_channelZaugment_modelinfer_sessionexecution_providers)r>   r   r   r   r   r   r   r   r   r   rB      s    

zCalibraterBase.__init__r   c                 C   s   || _ |   dS )zz
        reset the execution providers to execute the collect_data. It triggers to re-creating inference session.
        N)r   create_inference_session)r>   r   r   r   r   set_execution_providers   s    z&CalibraterBase.set_execution_providersc                 C   s,   t  }t jj|_t j| j|| jd| _dS )z9
        create an OnnxRuntime InferenceSession.
        )sess_options	providersN)	onnxruntimeZSessionOptionsZGraphOptimizationLevelZORT_DISABLE_ALLZgraph_optimization_levelZInferenceSessionr   r   r   )r>   r   r   r   r   r      s    
z'CalibraterBase.create_inference_sessionr   c           	      C   s   dd |j jD }|dd |j jD  |dd |j jD  dd |j jD }t }tjtj	h}|j j
D ]h}| jr|j| jkrnt|j|jD ]@}||kr|| }|jdr|jjj|kr||kr|| qqn||fS )z
        select input/output tensors of candidate nodes to calibrate.
        returns:
            tensors (set): set of tensor name.
            value_infos (dict): tensor name to value info.
        c                 S   s   i | ]}|j |qS r   name.0vir   r   r   
<dictcomp>   s      z>CalibraterBase.select_tensors_to_calibrate.<locals>.<dictcomp>c                 S   s   i | ]}|j |qS r   r   )r   Zotr   r   r   r      s      c                 S   s   i | ]}|j |qS r   r   )r   itr   r   r   r      s      c                 S   s   h | ]
}|j qS r   r   )r   initr   r   r   	<setcomp>   s     z=CalibraterBase.select_tensors_to_calibrate.<locals>.<setcomp>tensor_type)graph
value_infoupdateoutputinputinitializersetr
   FLOATZFLOAT16noder   op_type	itertoolschainr<   ZHasFieldr   	elem_typeadd)	r>   r   value_infosr   tensors_to_calibrateZtensor_type_to_calibrater   tensor_namer   r   r   r   select_tensors_to_calibrate   s&    
z*CalibraterBase.select_tensors_to_calibratec                 C   s   | j S )zP
        return: augmented onnx model. Call after calling augment_graph
        r   rE   r   r   r   get_augment_model   s    z CalibraterBase.get_augment_modelc                 C   s   t dS )z
        abstract method: augment the input model to prepare for collecting data. It will:
            1. augment the model to be able to collect desired statistics data
            2. save augmented model to augmented_model_paths
        Nrr   rE   r   r   r   augment_graph  s    zCalibraterBase.augment_graphdata_readerc                 C   s   t dS )z
        abstract method: collect the tensors that will be used for range computation. It can be called multiple times.
        Nrr   )r>   r   r   r   r   collect_data	  s    zCalibraterBase.collect_datarq   c                 C   s   t dS )ze
        abstract method: compute data based on the calibration method stored in TensorsData
        Nrr   rE   r   r   r   compute_data  s    zCalibraterBase.compute_data)Nr   FFF)rH   rI   rJ   r   rW   r   r   r   rB   r   r   r	   r   r   r   rj   r   rM   r   r   r   r   r   r      s         

"r   c                       sj   e Zd Zdeeef eee  d fddZdd	 Z	d
d Z
edddZdd ZedddZ  ZS )MinMaxCalibraterNr   F{Gz?r   c
           
         s|   t  j||||||	d g | _d| _t| jjj| _dd | jjjD | _	|| _
|rl|dk sd|dkrltd|| _|| _dS )aw  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param moving_average: compute the moving average of the minimum and maximum values instead of the global minimum and maximum.
        :param averaging_constant: constant smoothing factor to use when computing the moving average.
        :param max_intermediate_outputs: maximum number of intermediate outputs before an intermediate range is computed.
        :param per_channel: whether to compute ranges per each channel.
        )r   r   r   r   r   Nc                 S   s   h | ]
}|j qS r   r   r   r   r   r   r   r   9  s     z,MinMaxCalibrater.__init__.<locals>.<setcomp>r   r   z;Invalid averaging constant, which should not be < 0 or > 1.)superrB   intermediate_outputscalibrate_tensors_ranger\   r   r   r   num_model_outputsmodel_original_outputsmoving_averager9   averaging_constantmax_intermediate_outputs)
r>   r   r   r   r   r   r   r   r   r   	__class__r   r   rB     s"    zMinMaxCalibrater.__init__c                    s    j\}}tt ttjdgtj	d}jj
j| dd   fdd}|D ]}||d ||d qbtjjjjd	 d
S )z
        Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in
        model and ensures their outputs are stored as part of the graph output
        :return: augmented ONNX model
        r   c                 S   s:   |j D ]}tj| |jr|j  S qtd|  dd S )Nz&Model does not contain a version for 'z'.)opset_importonnxZdefshasdomainversionrc   )r   r   r   r   r   r   get_op_versionK  s    
z6MinMaxCalibrater.augment_graph.<locals>.get_op_versionc                    s  d}| d | }|d }t jj|| g|g||d}t jjd|g|g|d}dd jjjD }|d	d jjjD  |d
d jjjD  | |kr||  j	j
j}ntd| djrNt||  j	j
jj}	dtd|	}
 |jdk r|jtd|
 n@tt }ttj|
tjd|}|j| jjj| jjj||g jjjt ||d g d S )Nr   _Z_Reshape)r$   r   ZReshape)inputsoutputsr   c                 S   s   i | ]}|j |qS r   r   r   r   r   r   r   d  s      zNMinMaxCalibrater.augment_graph.<locals>.add_reduce_min_max.<locals>.<dictcomp>c                 S   s   i | ]}|j |qS r   r   )r   or   r   r   r   e  s      c                 S   s   i | ]}|j |qS r   r   r   ir   r   r   r   f  s      z'Unable to guess tensor type for tensor zE, running shape inference before quantization may resolve this issue.r   rP      Zaxesr   )r   )!r   r   Z	make_noder   r   r   r   r   r   r<   r   r   r9   r   r\   r   dimrange	attributeappendZmake_attributerW   uuiduuid4r   
from_arrayr   arrayint64r   r   extendZmake_tensor_value_info)r   Zreduce_op_namer$   Zreduce_outputintermediate_outputZreduce_nodeZreshape_noder   Z	onnx_typeZtensor_rankZreduced_axesZreduce_axes_nameZreduce_axesr   Zreshape_shape_namer>   r   r   add_reduce_min_maxQ  sF        
z:MinMaxCalibrater.augment_graph.<locals>.add_reduce_min_maxZ	ReduceMinZ	ReduceMaxZsave_as_external_dataN)r   r   rW   r   r   r   r   r   r   r   r   r   r   r   saver   r   )r>   Ztensorsr   Zreshape_shaper   tensorr   r   r   r   @  s    .
zMinMaxCalibrater.augment_graphc                 C   s
   g | _ d S r]   r   rE   r   r   r   clear_collected_data  s    z%MinMaxCalibrater.clear_collected_datar   c                 C   s   |  }|sqH| j| jd | | jd k	r t| j| jkr |   q t| jdkrh| jd krht	d| 
 }t|tstdt| d|   d S )Nr   No data is collected.z+compute_data must return a TensorsData not r5   )rk   r   r   r   runr   r\   r   r   r9   r   rV   rM   rX   r<   )r>   r   r   tr   r   r   r     s    

zMinMaxCalibrater.collect_datac                 C   s   |s|S |  D ]\}}t|tr8|jd }|jd }n|\}}t|| trl|| jd }|| jd }n|| \}}| jr|| j||   }	|| j||   }
nt||}	t||}
t|tst|| trt|	|
d||< q|	|
f||< q|S )Nr   r   rQ   )r7   rV   r-   rF   r   r   minmax)r>   Z	old_rangeZ	new_ranger`   rd   old_minold_maxnew_minnew_max	min_value	max_valuer   r   r   merge_range  s(    



zMinMaxCalibrater.merge_rangerq   c                    s  t jdkrjS fddtt jd D fddjD }i |D ](}| D ]\}}|g | q\qPjd   fddtdt  dD }fdd	D }g }tdt  dD ]}jrt	j
| |  dd
}	t	j
| |d   dd
}
n0t	j| |  dd
}	t	j| |d   dd
}
jrxt	jt	|	t	|
gdd
}|t| |g q|t|	|
g qttjtt||}jrj|_n|_jS )z
        Compute the min-max range of tensor
        :return: dictionary mapping: {added node names: (ReduceMin, ReduceMax) pairs }
        r   c                    s   g | ]} j  | jqS r   r   get_outputsr   r   rE   r   r   
<listcomp>  s     z1MinMaxCalibrater.compute_data.<locals>.<listcomp>c                    s   g | ]}t t |qS r   r~   zipr   r   output_namesr   r   r     s    Nc                    s   g | ]} |  d d qS )r   r   )
rpartitionr   )added_output_namesr   r   r     s    rP   c                    s    i | ]}|j kr| | qS r   )r   r   )merged_output_dictr>   r   r   r     s    
  z1MinMaxCalibrater.compute_data.<locals>.<dictcomp>r%   r   )r\   r   r   r   r7   
setdefaultr   r   r   r   Zmeanr   r   r   absrY   rM   rZ   r[   r~   r   r   )r>   output_dicts_listdr@   rA   Zcalibrate_tensor_namesZmerged_added_output_dictpairsr   Zmin_value_arrayZmax_value_arrayZmax_absolute_valueZnew_calibrate_tensors_ranger   )r   r   r   r>   r   r     sB     

zMinMaxCalibrater.compute_data)Nr   FFFr   NF)rH   rI   rJ   r   rW   r   r   r   rB   r   r   rj   r   r   rM   r   __classcell__r   r   r   r   r     s"           

)I!r   c                	       sb   e Zd Zdeeef eee  d	 fd
dZdd Z	dd Z
edddZedddZ  ZS )HistogramCalibraterNr   F
percentile      -X@samer   c                    sv   t  j|||||d g | _d| _t| jjj| _dd | jjjD | _	d| _
|| _|| _|| _|	| _d| _|
| _dS )a=  
        :param model_path: ONNX model to calibrate. It is a model path.
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_bins: number of bins to create a new histogram for collecting tensor values.
        :param num_quantized_bins: number of quantized bins. Default 128.
        :param percentile: A float number between [0, 100]. Default 99.99.
        :param scenario: see :class:`DistributionCalibrater`
        )r   r   r   r   Nc                 S   s   h | ]
}|j qS r   r   r   r   r   r   r     s     z/HistogramCalibrater.__init__.<locals>.<setcomp>)r   rB   r   r   r\   r   r   r   r   r   	collectormethodnum_binsnum_quantized_binsr  r   scenario)r>   r   r   r   r   r  r   r  r	  r  r
  r   r   r   rB     s$    zHistogramCalibrater.__init__c                 C   sV   |  | j\| _}| jD ]"}|| jkr| jjj||  qtj| j| j	| j
d dS )z
        make all quantization_candidates op type nodes as part of the graph output.
        :return: augmented ONNX model
        r   N)r   r   r   r   r   r   r   r   r   r   r   )r>   r   r   r   r   r   r   !  s    

z!HistogramCalibrater.augment_graphc                 C   s
   g | _ d S r]   r   rE   r   r   r   r   1  s    z(HistogramCalibrater.clear_collected_datar   c                    s   |  }|sq&jjd| q tjdkr<tdfddttjd D fddjD }i  |D ](}| D ]\}} 	|g | qqx fdd D }j
stjjjjjjd	_
j
|   dS )
zy
        Entropy Calibrator collects operators' tensors as well as generates tensor histogram for each operator.
        Nr   r   c                    s   g | ]} j  | jqS r   r   r   rE   r   r   r   A  s     z4HistogramCalibrater.collect_data.<locals>.<listcomp>c                    s   g | ]}t t |qS r   r   r   r   r   r   r   B  s    c                    s    i | ]}|j kr| | qS r   )r   r   )merged_dictr>   r   r   r   K  s     
  z4HistogramCalibrater.collect_data.<locals>.<dictcomp>)r  r   r  r	  r  r
  )rk   r   r   r   r   r\   r9   r   r7   r   r  HistogramCollectorr  r   r  r	  r  r
  collectr   )r>   r   r   r   r   r@   rA   Zclean_merged_dictr   )r  r   r>   r   r   4  s4     
z HistogramCalibrater.collect_datarq   c                 C   sh   | j stdt| tr tj}n8t| tr2tj}n&t| trDtj	}nt
dt|  dt|| j  S )z
        Compute the min-max range of tensor
        :return: dictionary mapping: {tensor name: (min value, max value)}
        z9No collector created and can't generate calibration data.zUnknown calibrater z". This method must be overwritten.)r  r9   rV   EntropyCalibraterrZ   rg   PercentileCalibraterrh   DistributionCalibraterri   rX   r<   rM   compute_collection_result)r>   calr   r   r   r   Z  s    


z HistogramCalibrater.compute_data)	Nr   Fr  Fr  r  r  r  )rH   rI   rJ   r   rW   r   r   r   rB   r   r   rj   r   rM   r   r   r   r   r   r   r     s"            

,&r   c                       s6   e Zd Zd	eeef eee  d fddZ  Z	S )
r  Nr   Fr,   r  r   c	           	   
      s    t  j||||||||d dS )a  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile', 'distribution'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_bins: number of bins to create a new histogram for collecting tensor values.
        :param num_quantized_bins: number of quantized bins. Default 128.
        )r  r   r  r	  Nr   rB   )	r>   r   r   r   r   r  r   r  r	  r   r   r   rB   n  s    zEntropyCalibrater.__init__)Nr   Fr,   Fr  r  
rH   rI   rJ   r   rW   r   r   r   rB   r   r   r   r   r   r  m  s          

r  c                       s6   e Zd Zd
eeef eee  d fdd	Z  Z	S )r  Nr   Fr  r  r  r   c	           	   
      s    t  j||||||||d dS )a  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile', 'distribution'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_quantized_bins: number of quantized bins. Default 128.
        :param percentile: A float number between [0, 100]. Default 99.99.
        )r  r   r  r  Nr  )	r>   r   r   r   r   r  r   r  r  r   r   r   rB     s    zPercentileCalibrater.__init__)Nr   Fr  Fr  r  r  r   r   r   r   r    s          

r  c                       s6   e Zd Zd
eeef eee  d fdd	Z  Z	S )r  Nr   Fdistributionr  r  r   c              	      s   t  j|||||||d dS )a  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile', 'distribution'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_bins: number of bins to create a new histogram for collecting tensor values.
        :param scenario: for float 8 only, if `scenario="same"`,
            the algorithm weights and float 8 follow the same distribution,
            if `scenario="p3"`, it assumes the weights follow
            a gaussian law and float 8 ~ X^3 where X is a gaussian law
        )r  r  r
  Nr  )r>   r   r   r   r   r  r  r
  r   r   r   rB     s    zDistributionCalibrater.__init__)Nr   Fr  r  r  r  r   r   r   r   r    s         

r  c                   @   s,   e Zd ZdZejdd Zejdd ZdS )CalibrationDataCollectorzL
    Base class for collecting data for calibration-based quantization.
    c                 C   s   t dS )z
        Generate informative data based on given data.
            name_to_arr : dict
                tensor name to NDArray data
        Nrr   r>   name_to_arrr   r   r   r    s    z CalibrationDataCollector.collectc                 C   s   t dS )z?
        Get the optimal result among collection data.
        Nrr   rE   r   r   r   r    s    z2CalibrationDataCollector.compute_collection_resultN)rH   rI   rJ   __doc__r|   r}   r  r  r   r   r   r   r    s
   
r  c                   @   sv   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd ZedddZdd Zdd ZdS )r  a`  
    Collecting histogram for each tensor. Percentile and Entropy method are supported.

    ref: https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
    ref: https://docs.nvidia.com/deeplearning/tensorrt/pytorch-quantization-toolkit/docs/_modules/
                 pytorch_quantization/calib/histogram.html
    c                 C   s.   i | _ || _|| _|| _|| _|| _|| _d S r]   )histogram_dictr  r   r  r	  r  r
  )r>   r  r   r  r	  r  r
  r   r   r   rB     s    zHistogramCollector.__init__c                 C   s   | j S r]   )r  rE   r   r   r   get_histogram_dict  s    z%HistogramCollector.get_histogram_dictc                 C   sN   t d | jdkr| |S | jdkrB| jr6| |S | |S ntdd S )Nz/Collecting tensor data and making histogram ...>   r,   r  r  DOnly 'entropy', 'percentile' or 'distribution' methods are supported)printr  collect_valuer   collect_absolute_valuer9   r  r   r   r   r    s    



zHistogramCollector.collectc                 C   s  |  D ]p\}}t|tr|D ](}t|tjs tdt| d|q tdd |D }t|dks|td| d|t	|}n*t|tjst
dt| d|n|}| }|jdkrt|}t|}n tjd|jd}tjd|jd}t|}|| jkrZtj|| jd	\}	}
|
|j}
|jtjksFtd
|	|
||f| j|< q| j| }|d }|d }t|dstdt| t|dstdt| |d }|d }t|}||d kr|d |d  }t|d | || |}t||f}tj||d	\}	}
|
|j}
|	dt|  |7  < |jtjks\td
|	|
t||t||f| j|< qdS )z5
        Collect histogram on absolute value
        r6   z for tensor=c                 s   s   | ]}|j V  qd S r]   r   )r   ar   r   r   	<genexpr>  s     z<HistogramCollector.collect_absolute_value.<locals>.<genexpr>r   z6The calibration expects only one element type but got r   r   )r4   zMonly float32 or float16 is supported, every constant must be explicitly typedrP   rS   r   z'old_min should be a numpy array but is r   N)r7   rV   listr   ndarrayr&   r<   r   r\   r'   r9   flattensizer   r   r   r   absoluter  	histogramr  r(   float64r;   ZarangeZhstack)r>   r  r   data_arrZarrZdtypesZdata_arr_npr   r   r2   r3   old_histogramr   r   old_histold_hist_edgesZ	temp_amaxwidthZnew_bin_edgesr   r   r   r    s`    
&







z)HistogramCollector.collect_absolute_valuec           
      C   s   |  D ]\}}t|}| }|jdkrBt|}t|}n tjd|jd}tjd|jd}tjtt	|t	||jd}|| j
kr| j
| }| |||||| j
|< qtj|| j| |fd\}}	||	|||f| j
|< qdS )z1
        Collect histogram on real value
        r   r   r   N)r7   r   r'   r$  r%  r   r   r   r   r   r  merge_histogramr'  r  )
r>   r  r   r)  r   r   	thresholdr*  r2   r3   r   r   r   r  H  s2    




    z HistogramCollector.collect_valuec                 C   s  |\}}}}	}
||
krRt j|t||
 |
fd\}}|| |t||t|	||
fS |
dkrt j|t|| |fd\}}||7 }nrt|}d|
 | }t||
 | d }|d|  }|| |
 }t j||| |fd\}}||||   |7  < ||t||t|	||fS d S )Nr.  r   rP   r   )r   r'  r\   r   r   r   )r>   r*  r)  r   r   Znew_thresholdr+  r,  r   r   Zold_thresholdZnew_histr   r2   r3   Zold_num_binsZ
old_strideZhalf_increased_binsZnew_num_binsr   r   r   r/  h  s2    
z"HistogramCollector.merge_histogramc                 C   sp   | j rt| j dkrtdtd| jd | jdkr@|  S | jdkrR|  S | jdkrd|  S tdd S )	Nr   z=Histogram has not been collected. Please run collect() first.z0Finding optimal threshold for each tensor using z algorithm ...r,   r  r  r  )r  r\   r9   r  r  compute_entropycompute_percentilecompute_distributionrE   r   r   r   r    s    


z,HistogramCollector.compute_collection_resultc                 C   s  | j dk s| j dkrtd| j}| j }i }tdt|  td| j  tdd|  d| d	 | D ]X\}}|d }|d
 }| }t	|| }	| j
rt|	|d }
tj||
 |jd tj||
 |jdf||< nTd| d }t|	d| }
t|	|}tj|| |jdtj||
 |jdf||< |d }|d }|| d |k rp||| d
 f||< || d
 |kr|| d |f||< || |d d ||< tjdddkrpt|| qp|S )Nr   d   z<Invalid percentile. Must be in range 0 <= percentile <= 100.Number of tensors : Number of histogram bins : zPercentile : (g      Y@,)r   r   g      i@r#   rP   rS   QUANTIZATION_DEBUGr   1)r  r9   r  r  r\   r  r7   r*   r   Zcumsumr   Zsearchsortedr   r   osenvirongetr   )r>   r  r  thresholds_dictr   r'  r2   r3   totalZcdfZ	idx_rightZpercent_to_cut_one_sideZidx_leftr   r   r   r   r   r2    sD    
z%HistogramCollector.compute_percentilec                 C   s   | j }| j}i }tdt|  td| j d td| j  | D ]T\}}| ||}|||< ||d d ||< tj	dddkrLt
|d |d	  qL|S )
Nr5  r6  z: (The number may increase depends on the data it collects)zNumber of quantized bins : rP   r9  r   r:  r   )r  r	  r  r\   r  r7   get_entropy_thresholdr<  r=  r>  r   )r>   r  r	  r?  r   r'  optimal_thresholdr   r   r   r1    s    z"HistogramCollector.compute_entropyr   c                 C   s  |dkrt d| d|d d |dd   d }|dkr| |  |   }| |d   |   |d  d }tj||jdtj||jdfS t||krt|d dkr| ||   |   }| || | d   |   d }tj||jdtj||jdfS t|| }d|t|< d|t|< t|| | }| |  |   }| |d   |   |d  d }tj||jdtj||jdfS )	Nr   zpower=z <= 0 is invalid.r   r   g      ?rP   r   )	r9   r*   r   r   r   r   r   isnanisinf)r2   r3   powerrf   r.   r/   Zfactr   r   r   _avg_std  s$    $  $ $zHistogramCollector._avg_stdc           	   	   C   s:  | j dk rtd| j}i }tdt|  td| j   td| jd | D ]\}}|d }|d }|jtj	kst
| jd	kr| j||dd
\}}n(| jdkr| j||dd
\}}ntd|jtj	kst
|jtj	kst
|jtj	kst
t||||| | d||< tjdddkrXt|| qX|S )Ni   z3Invalid num_bins. Must be in range 512 <= num_bins.r5  r6  zScenario : r8  r   r   r  )rE  Zp3gUUUUUU?z,Invalid scenario. Must be in {'same', 'p3'}.)r.   r/   r2   r3   r0   r1   r9  r:  )r  r9   r  r  r\   r
  r7   r   r   r(  r&   rF  r-   r   r   r<  r=  r>  r   )	r>   r  r?  r   r'  r2   r3   Zavg_coefZstd_coefr   r   r   r3    s<    




z'HistogramCollector.compute_distributionc                    s  |d }|d }|j }|d }|d }|d j t|| d } fddt|j D }	t||d dD ]}
||
 }t||
 d |}|| || f|	|
| < t||| }| }t|d| }t||d }|d  |7  < |d  |7  < |dk	tj
}tj|tj
d}|j | }t|D ]*}|| }|| }t||| ||< q.|d  t||| d 7  < tj|j tj
d}t|D ]D}|| }|| }t||| }|dkr|| | |||< qt|}t|}|dks|dkrtjtj d}ntjt|| d}|||
| < qjt|}|	| }|d }|d	 }|d |k rn||d f}|d |kr|d |f}t|d d
stt|d d
st|S )aF  Given a dataset, find the optimal threshold for quantizing it.
        The reference distribution is `q`, and the candidate distribution is `p`.
        `q` is a truncated version of the original distribution.
        Ref: http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
        r   r   rP   c                    s(   g | ] }t jd  dt jd  dfqS )r   r   )r   r   r   r   r   r   r   %  s     z<HistogramCollector.get_entropy_threshold.<locals>.<listcomp>Nr   r   rS   r   )r%  r   r   Zzerosr   r   copydeepcopyr*   r(   r   r   r   r   r,   Zargminr;   r&   )r>   r'  r	  r2   r3   r  Zzero_bin_indexZnum_half_quantized_binZkl_divergenceZ
thresholdsr   rx   ry   Zsliced_distributionpZleft_outliers_countZright_outliers_countnonzerosZquantized_binsZnum_merged_binsindexstartendqZnormdivZmin_kl_divergence_idxrB  r   r   r   r   r   rA    sd    

 

z(HistogramCollector.get_entropy_thresholdN)r   )rH   rI   rJ   r  rB   r  r  r  r  r/  r  r2  r1  staticmethodrF  r3  rA  r   r   r   r   r    s   	:  .(r  r   F)r   r   c                 C   sV  d }|t jkrf|dd}|dd}|dd}	|dd }
|dd}t| ||||||	|
|d	}n|t jkr|d	d
}|dd
}|dd}t| ||||||d}n~|t jkr|d	d}|dd}|dd}t| ||||||d}n8|t jkr*|d	d}|dd}t	| |||||d}|rD|
  |  |S td| d S )Nr   Fr   r   r   r   r   )r   r   r   r   r   r   r  r  r	  )r   r   r  r	  r  r  r  T)r   r   r  r  r
  r  )r   r  r
  zUnsupported calibration method )rZ   r[   r>  r   rg   r  rh   r  ri   r  r   r   r9   )r   r   r   Zcalibrate_methodr   Zextra_optionsZ
calibratorr   r   r   r   r   r  r	  r  r
  r   r   r   create_calibratorr  sx    

	

	rQ  )Nr   )0r|   rG  r   r<  r   enumr   pathlibr   typingr   r   r   r   r   numpyr   r   r	   r
   r   r   r   Zquant_utilsr   r   r   r#  r    floatr   r,   r-   rM   rZ   ABCMetarj   r   r   r   r  r  r  r  r  r[   rW   rQ  r   r   r   r   <module>   s^     (n _y""#   

