U
    T?h
                     @   s`   d dl Z d dlZd dlZd dlZd dlmZ e eZdd Z	dd Z
dd ZG d	d
 d
ZdS )    N)Conv1Dc                 C   s<   | j j\}}tj||}| j jj |j _| jj|j_|S )N)	weightshapetorchnnLineardataT
contiguousZbias)moduleZin_sizeZout_sizelinear r   Z/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/quantize_helper.py_conv1d_to_linear   s
    r   c                 C   sN   t d t| jD ]4}| j| }t|tr@t|}|| j|< qt| qdS )zsin-place
    This is for Dynamic Quantization, as Conv1D is not recognized by PyTorch, convert it to nn.Linear
    zreplace Conv1D with LinearN)loggerdebuglistZ_modules
isinstancer   r   conv1d_to_linear)modelnamer   r   r   r   r   r      s    


r   c                 C   s.   t |  d tjdd }td |S )Nztemp.p   )r   saveZ
state_dictospathgetsizeremove)r   sizer   r   r   _get_size_of_pytorch_model'   s    
r   c                   @   s,   e Zd ZeejfddZedddZdS )QuantizeHelperc                 C   sL   t |  tjj| tjjh|d}tdt|   tdt|  |S )z{
        Usage: model = quantize_model(model)

        TODO: mix of in-place and return, but results are different
        )dtypez'Size of full precision Torch model(MB):z"Size of quantized Torch model(MB):)	r   r   Zquantizationquantize_dynamicr   r   r   infor   )r   r    Zquantized_modelr   r   r   quantize_torch_model/   s
    z#QuantizeHelper.quantize_torch_modelFc                 C   s   ddl m} ddlm} ||jjddd tdtj	
| d   || ||dtjjid	 td
|  tdtj	
|d   d S )Nr   )Path)r!   T)parentsexist_okz&Size of full precision ONNX model(MB):r   ZDefaultTensorType)use_external_data_formatZextra_optionszquantized model saved to:z!Size of quantized ONNX model(MB):)pathlibr$   Zonnxruntime.quantizationr!   parentmkdirr   r"   r   r   r   onnxZTensorProtoFLOAT)Zonnx_model_pathZquantized_model_pathr'   r$   r!   r   r   r   quantize_onnx_model<   s    
z"QuantizeHelper.quantize_onnx_modelN)F)__name__
__module____qualname__staticmethodr   Zqint8r#   r-   r   r   r   r   r   .   s   r   )loggingr   r+   r   Ztransformers.modeling_utilsr   	getLoggerr.   r   r   r   r   r   r   r   r   r   <module>   s   
