U
    T?hA                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZ d dl	Z	d dl
Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ e eZG dd dejjZ G dd dejjZ!G dd dZ"G dd dZ#dS )    N)Path)ListOptionalUnion)
TypeHelper)	OnnxModel)PastKeyValuesHelper)T5EncoderInputs)torch_onnx_export)	MT5ConfigT5Config)InferenceSessionc                       sZ   e Zd ZdZd	ejjejjeee	f e
e d fddZejejejdddZ  ZS )
T5DecoderInitz~A T5 decoder with LM head to create initial past key values.
    This model is only called once during starting decoding.
    N)decoderlm_headconfigdecoder_start_token_idc                    sP   t    || _|| _|| _|d k	r(|n| jj| _t| jdrF| jjnd| _d S Ntie_word_embeddingsT)super__init__r   r   r   r   hasattrr   )selfr   r   r   r   	__class__ _/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/models/t5/t5_decoder.pyr   !   s    
zT5DecoderInit.__init__)decoder_input_idsencoder_attention_maskencoder_hidden_statesc                 C   s   |d kr0|j d }tj|dftj|jd| j }| j|||ddd}|j}|j}| j	rf|| j
jd  }| |}t|\}	}
||	|
fS )Nr      dtypedeviceT)	input_idsr   r   	use_cachereturn_dict      )shapetorchZoneslongr#   r   r   last_hidden_statepast_key_valuesr   r   d_modelr   r   group_by_self_or_cross)r   r   r   r   
batch_sizedecoder_outputssequence_outputpresent_key_values	lm_logitsZ	past_selfZ
past_crossr   r   r   forward3   s0    
	
zT5DecoderInit.forward)N)__name__
__module____qualname____doc__r)   nnModuler   r   r   r   intr   ZTensorZFloatTensorr4   __classcell__r   r   r   r   r      s   	 
r   c                       s(   e Zd ZdZ fddZdd Z  ZS )	T5Decoderz-A T5 decoder with LM head and past key valuesc                    s:   t    || _|| _|| _t| jdr0| jjnd| _d S r   )r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   Z   s    
zT5Decoder.__init__c                 G   sv   | j j}t||}|d}| j||||ddd}|j}|j}	| jrV|| j j	d  }| 
|}
t|	\}}|
|fS )N   T)r$   r,   r   r   r%   r&   r'   )r   num_decoder_layersr   Zgroup_by_layerZ	unsqueezer   r+   r,   r   r-   r   r.   )r   r   r   pastr?   r,   Zdummy_encoder_hidden_statesr0   r1   r2   r3   Zpresent_self_r   r   r   r4   c   s$    
	
zT5Decoder.forward)r5   r6   r7   r8   r   r4   r<   r   r   r   r   r=   W   s   	r=   c                
   @   sV   e Zd ZdddZedeeef eeee	j
eedddZedd	d
Zdd ZdS )T5DecoderInputsNc                 C   s   || _ || _|| _d S N)r   r   r,   )r   r   r   r,   r   r   r   r      s    zT5DecoderInputs.__init__F)r   r/   encode_sequence_lengthpast_decode_sequence_lengthr#   float16use_int32_inputsc                 C   s   | j }| j}| j}	| j}
d}tjd|	d ||f|r8tjntj|d}tj	|||	||d}|rbtj
ntj}|dkr||||
g}||||
g}g }td| D ]}|tj|||d qtd| D ]}|tj|||d qnd}t||j|S )aZ  Create dummy inputs for T5Decoder.

        Args:
            decoder: decoder
            batch_size (int): batch size
            encode_sequence_length (int): sequence length of input_ids for encoder
            past_decode_sequence_length (int): past sequence length of input_ids for decoder
            device (torch.device): device of output tensors
            float16 (bool): whether the model uses float32 or float16 in input
            use_int32_inputs(bool): whether use int32 instead of int64 for some inputs

        Returns:
            T5DecoderInputs: dummy inputs for decoder
        r    r   )lowhighsizer"   r#   )rG   r>   r!   N)Z	num_headsr?   
vocab_sizeZd_kvr)   randintZint32Zint64r	   create_dummyrF   float32rangeappendZrandrB   Zattention_mask)r   r/   rD   rE   r#   rF   rG   Znum_attention_heads
num_layersrK   Z	head_sizeZsequence_lengthr   Zencoder_inputsZ
float_typeZself_attention_past_shapeZcross_attention_past_shaper@   rA   r   r   r   rM      sL    zT5DecoderInputs.create_dummy)returnc                 C   s"   | j | jg}| jr|| j |S rC   )r   r   r,   extend)r   
input_listr   r   r   to_list   s    zT5DecoderInputs.to_listc                 C   s2   | j rdd | j D nd }t| j | j |S )Nc                 S   s   g | ]}|j tjd qS ))r"   )tor)   rN   ).0pr   r   r   
<listcomp>   s     z+T5DecoderInputs.to_fp32.<locals>.<listcomp>)r,   rB   r   cloner   )r   r@   r   r   r   to_fp32   s    zT5DecoderInputs.to_fp32)N)FF)r5   r6   r7   r   staticmethodr   r   r   r;   r)   r#   boolrM   r   rU   r[   r   r   r   r   rB      s     

  
K	rB   c                	   @   sl   e Zd Zedeeef eje	e
e
e
dddZeedddZedeeef eeje
ed
ddZdS )T5DecoderHelperTF)r   r#   onnx_model_pathverboseuse_external_data_formatrG   c                 C   s  t | ttfsttj| jddt | tr,dnd||d}| }| jj}t	j
|dd}	t	j
|dd}
|
d	d|  }t | tr|	ng }t | tr|n|
}d|}dg}|d || ddiddddddddid}|D ]}dd|krdndd||< q|D ]H}d|kr"ddd||< n(t | tr>ddd||< nddi||< qt|jjddd t z}tj|d}t|jjddd t| t||r|n|d|||dd||d |rtj|dd}tj||ddd W 5 Q R X d	S )a  Export decoder to ONNX

        Args:
            decoder (Union[T5Decoder, T5DecoderNoPastState]): decoder object
            device (torch.device): device of decoder object
            onnx_model_path (str): onnx path
            verbose (bool, optional): print verbose information. Defaults to True.
            use_external_data_format (bool, optional): use external data format or not. Defaults to False.
            use_int32_inputs (bool, optional): use int32 inputs
        r>         r   )r/   rD   rE   r#   rG   F)ZpresentTNlogitsr$   r   r/   rD   )r   r    )r$   r   r   rd   r   rE   )r   r>   crosszpast_decode_sequence_length + 1)parentsexist_okzdecoder.onnx   )
argsfZexport_paramsinput_namesoutput_namesdynamic_axesZopset_versionZdo_constant_foldingra   r`   )Zload_external_data)Zsave_as_external_dataZall_tensors_to_one_file)rd   )
isinstancer=   r   AssertionErrorrB   rM   r   rU   r?   r   get_past_namesrP   rS   r   parentmkdirtempfileTemporaryDirectoryospathjoinr
   tupleonnxZ
load_modelr   save)r   r#   r_   r`   ra   rG   inputsrT   r?   
past_namesZpresent_namesZpresent_self_namesZinput_past_namesZoutput_present_namesrl   rk   rm   nameZtmp_dir_nameZtemp_onnx_model_pathmodelr   r   r   export_onnx   s    

  
 
zT5DecoderHelper.export_onnx)r{   c                 C   s   t d t|j  t|j  d}|jrt|jd dksPt	t
t|jd }t|}t|jD ]"\}}t|  ||| < qv| d|}|S )zRun inference of ONNX model.zstart onnxruntime_inference)r$   r      r   N)loggerdebugnumpyZascontiguousarrayr   cpur   r,   lenro   r;   r   rp   	enumeraterun)ort_sessionr{   Z
ort_inputsrQ   r|   iZpast_tensorort_outputsr   r   r   onnxruntime_inference_  s    

z%T5DecoderHelper.onnxruntime_inferencer   )r~   r   r#   rG   	max_casesc                 C   s  t |ddk}ddddg}g }|d| D ]\}}	}
t| trFd}
tj| j||	|
|||d	}|  }t	
  | | }W 5 Q R X t||}| jj}tt|d   |d  }|}td
|  td| D ]P}tt|d |   |d|   }td| d|  t||}qt| trtd| D ]Z}tt|d |   |dd|  |   }td| d|  t||}qJ|| td||	|
| q,|S )zQCompare the result from PyTorch and OnnxRuntime to verify the ONNX model is good.Zpast_key_self_0ztensor(float16))r      rb   )r    r>   rc   )rb   r    r    )   rc   r>   Nr   )r#   rF   rG   zlogits max_diff=r>   r    zself attention past state z
 max_diff=zcross attention past state zUbatch_size=%s, encode_sequence_length=%s, past_decode_sequence_length=%s, max_diff=%s)r   Zget_input_typern   r   rB   rM   r   r[   rU   r)   Zno_gradr^   r   r?   r   Zamaxabsr   r   r   rO   maxrP   info)r~   r   r#   rG   r   rF   Z
test_casesZtest_cases_max_diffr/   rD   rE   r{   rT   Ztorch_outputsr   r?   Zmax_diffZmax_diff_allr   r   r   r   verify_onnxs  s^    	


$,,
zT5DecoderHelper.verify_onnxN)TFF)r   )r5   r6   r7   r\   r   r=   r   r)   r#   strr]   r   rB   r   r   r;   r   r   r   r   r   r^      s.      
u 
r^   )$loggingru   rs   pathlibr   typingr   r   r   r   ry   r)   Zio_binding_helperr   Z
onnx_modelr   Zpast_helperr   Z
t5_encoderr	   Ztorch_onnx_export_helperr
   Ztransformersr   r   Zonnxruntimer   	getLoggerr5   r   r9   r:   r   r=   rB   r^   r   r   r   r   <module>   s&   
;(i