U
    T?h*                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZ d dlmZmZ d d	lmZmZ d d
lmZmZ d dlmZ e eZ dddddgZ!dddddgZ"G dd dZ#dS )    N)Path)DictListUnion)float_to_float16_max_diff)	OnnxModel)optimize_model)	T5DecoderT5DecoderHelperT5DecoderInit)	T5EncoderT5EncoderHelper)T5EncoderDecoderInitT5EncoderDecoderInitHelper)MT5ForConditionalGenerationT5ForConditionalGeneration)InferenceSessionzt5-smallzt5-basezt5-largezt5-3bzt5-11bzgoogle/mt5-smallzgoogle/mt5-basezgoogle/mt5-largezgoogle/mt5-xlzgoogle/mt5-xxlc                   @   s   e Zd ZedeeeeedddZedeeejeeee	eej
jf dd	d
Zedeeeeef ejeeeeedddZeddddgfeee dddZedeeeeeeeedddZeeeeeef eejedddZdS ) T5Helper F)
output_dirmodel_name_or_pathsuffix
new_folderreturnc                 C   s^   |}t j|r t|jd }n|dd  ||7 }|rHt j| |n| }t j||d S )a  Build onnx path

        Args:
            output_dir (str): output directory
            model_name_or_path (str): pretrained model name, or path to the model checkpoint
            suffix (str, optional): suffix like "_encoder" or "_decoder_fp16" will be appended to file name. Defaults to None.
            new_folder (bool, optional): create a new directory for the model. Defaults to False.

        Returns:
            str: path of onnx model
        /z.onnx)ospathisdirr   partssplitjoin)r   r   r   r   Z
model_name	directory r#   ^/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/models/t5/t5_helper.pyget_onnx_path   s    zT5Helper.get_onnx_pathTt5)r   	cache_dirdevicemerge_encoder_and_decoder_init
model_typestate_dict_pathr   c                 C   s   |dkrt j| |d}n |dkr0tj| |d}ntd|rL|t| t|j|j	|j
}| | |rt|j|j|j	|j
dd}||dS t|j|j
}	|	 | t|j|j	|j
}
|
 | |	||
dS dS )	ab  Load model given a pretrained name or path, then build models for ONNX conversion.

        Args:
            model_name_or_path (str): pretrained model name or path
            cache_dir (str): cache directory
            device (torch.device): device to run the model
            merge_encoder_and_decoder_init (bool, optional): Whether merge encoder and decoder initialization into one ONNX model. Defaults to True.
            is_mt5 (bool, optional): whether the model is MT5 instead of T5
        Returns:
            Dict[str, torch.nn.Module]: mapping from name to modules for ONNX conversion.
        r&   )r'   Zmt5z only support mode_type=t5 or mt5N)Zdecoder_start_token_id)encoder_decoder_initdecoder)encoderr-   decoder_init)r   Zfrom_pretrainedr   
ValueErrorZload_state_dicttorchloadr	   r-   Zlm_headconfigevaltor   r.   r   r   )r   r'   r(   r)   r*   r+   modelr-   r,   r.   r/   r#   r#   r$   
load_model;   s4    
zT5Helper.load_modelr6   r(   onnx_model_pathverboseuse_external_data_formatZuse_decoder_input_idsuse_int32_inputsc              	   C   sZ   t | tr t| ||||| n6t | trBt| |||||| nt| ||||| d S )N)
isinstancer   r   export_onnxr   r   r
   r8   r#   r#   r$   r>   p   s6    



zT5Helper.export_onnxZSimplifiedLayerNormalizationZ SkipSimplifiedLayerNormalizationZReluAdd)
onnx_modelop_block_listc                 C   sT  dd |   D }t|}||}td| d|  |  jd j}d}|  }||ksbt	|| }d}	|j
dkr|}	td	|j  d}
|jD ]}| |}
|
dk	r qqt|
}td
|j d|  |dk }ntd|j
 d|j  g }g }|s|	dk	r|g}|	jg}||||d}td|  | jf ddi| |S )a  Convert model to mixed precision.
           It detects whether original model has fp16 precision weights, and set parameters for float16 conversion automatically.
        Args:
            onnx_model (OnnxModel): optimized ONNX model
            op_block_list (List[str], optional): . Defaults to ["SimplifiedLayerNormalization", "SkipSimplifiedLayerNormalization", "Relu", "Add"]
        Returns:
            parameters(dict): a dictionary of parameters used in float16 conversion
        c                 S   s   h | ]
}|j qS r#   )op_type).0noder#   r#   r$   	<setcomp>   s     z0T5Helper.auto_mixed_precision.<locals>.<setcomp>z	fp32 op: z
 fp16 op: r   FNZMatMulz#Found last MatMul node for logits: z3max diff of converting weights in last MatMul node z: gư>z-Failed to find MatMul node for logits. Found z	 of node )keep_io_typesrA   node_block_listZforce_fp16_initializersz!auto_mixed_precision parameters: Zuse_symbolic_shape_inferT)Znodesset
differenceloggerinfographoutputnameoutput_name_to_nodeAssertionErrorrB   inputZget_initializerr   debugwarningZconvert_float_to_float16)r@   rA   Zop_full_setZfp32_op_setZfp16_op_setZlogits_output_nameZis_weight_fp16_precisionrO   rD   Zlast_matmul_nodeZinitializerrQ   Zmax_diffrF   rG   
parametersr#   r#   r$   auto_mixed_precision   sF    




zT5Helper.auto_mixed_precision)r9   optimized_model_path
is_float16num_attention_headshidden_sizer;   rU   use_gpuc              
   C   sx   ddl m} d}	|r"|d}	d|	_t| d|||s4dnd|	d| d}
|rd|rXt|
 n|
jdd |
j||d	d
 dS )zHOptimize ONNX model with an option to convert it to use mixed precision.r   )FusionOptionsNr&   F   )r*   Z	num_headsrY   	opt_leveloptimization_optionsrZ   Zonly_onnxruntime)Zcast_input_outputT)Zall_tensors_to_one_file)Zfusion_optionsr[   Zenable_skip_layer_normr   r   rU   Z convert_model_float32_to_float16Zsave_model_to_file)r9   rV   rW   rX   rY   r;   rU   rZ   r[   r^   mr#   r#   r$   optimize_onnx   s(    
zT5Helper.optimize_onnxr6   Zort_sessionr(   r<   c                 C   sD   t | trt| |||S t | tr4t| |||S t| |||S )zQCompare the result from PyTorch and OnnxRuntime to verify the ONNX model is good.)r=   r   r   verify_onnxr   r   r
   ra   r#   r#   r$   rb     s
    

zT5Helper.verify_onnxN)r   F)Tr&   r   )TFTF)FTF)__name__
__module____qualname__staticmethodstrboolr%   r1   r(   r   nnModuler7   r   r   r	   r   r   r>   r   r   rU   intr`   r   rb   r#   r#   r#   r$   r      s        4    &C   &r   )$loggingr   pathlibr   typingr   r   r   r1   Zfloat16r   r@   r   Z	optimizerr   Z
t5_decoderr	   r
   r   Z
t5_encoderr   r   Zt5_encoder_decoder_initr   r   Ztransformersr   r   Zonnxruntimer   	getLoggerrc   rJ   ZPRETRAINED_T5_MODELSZPRETRAINED_MT5_MODELSr   r#   r#   r#   r$   <module>   s    
