U
    T?h=                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZ d dl	Z	d dl
Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ e eZG dd dejjZ G dd dejjZ!G dd dZ"G dd dZ#dS )    N)Path)ListOptionalUnion)
TypeHelper)PastKeyValuesHelper)	OnnxModel)torch_onnx_export)WhisperConfig
file_utils)WhisperDecoderInitOpenai)InferenceSessionc                       sH   e Zd ZdZd	ejjeee	 d fddZ
ejejdddZ  ZS )
WhisperDecoderInitzvA Whisper decoder to create initial past key values.
    This model is only called once during starting decoding.
    N)decoderconfigdecoder_start_token_idc                    s0   t    || _|| _|d k	r"|n| jj| _d S N)super__init__r   r   r   )selfr   r   r   	__class__ i/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/models/whisper/whisper_decoder.pyr   !   s
    
zWhisperDecoderInit.__init__)decoder_input_idsencoder_hidden_statesc                 C   sV   t  }||d< d |d< d |d< | jjd ||d ddd}| j|d }||j|jfS )Nlast_hidden_statehidden_states
attentionsTencoder_outputsr   past_key_valuesZ	use_cacheZreturn_dictr   )r   ModelOutputr   modelZproj_outr!   Zencoder_last_hidden_state)r   r   r   r    outlogitsr   r   r   forward.   s    zWhisperDecoderInit.forward)N)__name__
__module____qualname____doc__torchnnModuler
   r   intr   ZTensorZFloatTensorr&   __classcell__r   r   r   r   r      s    r   c                       s6   e Zd ZdZd	eejjd fddZdd Z	  Z
S )
WhisperDecoderz&A Whisper decoder with past key valueshfN)
model_implr#   c                    s4   t    || _|| _|| _|d k	r0t||| _d S r   )r   r   r   r   r2   r   whisper_decoder_openai_init)r   r   r   r2   r#   r   r   r   r   G   s    
zWhisperDecoder.__init__c                 G   s   t  }t|jd dt| jjf}||d< ||d< d |d< | jdkrn|	d | j
|||d\}}||fS t|dkrd }n
t|}| jd |||ddd	}|d }	t|j\}
}|	|
fS )
Nr     r   r   r   Zopenai)pastTr   )r   r"   r+   Zrandnshaper.   r   d_modelr2   Z	unsqueezer3   lenr   Zback_group_by_layerr   Zgroup_by_self_and_crossr!   )r   r   r5   r    Zdummy_encoder_hidden_statesZdec_outpresentr!   Zdecoder_outr%   Zpresent_self_r   r   r   r&   O   s6    

  

zWhisperDecoder.forward)r1   N)r'   r(   r)   r*   strr+   r,   r-   r   r&   r/   r   r   r   r   r0   D   s   r0   c                   @   sP   e Zd ZdddZedeeeeeje	e	e
dddZed	d
dZdd ZdS )WhisperDecoderInputsNc                 C   s   || _ || _d S r   )r   r!   )r   r   r!   r   r   r   r   p   s    zWhisperDecoderInputs.__init__Fr1   )r   
batch_sizeencode_sequence_lengthpast_decode_sequence_lengthdevicefloat16use_int32_inputsr2   c                 C   s   | j }| j}	| j}
| j| j  }d}tjd|
d ||f|r>tjntj|d}|rTtjntj	}|dkr||||g}|||dkr~|n||g}g }t
d|	 D ]}|tj|||d qt
d|	 D ]}|tj|||d qnd}t||S )ad  Create dummy inputs for WhisperDecoder.

        Args:
            decoder: decoder
            batch_size (int): batch size
            encode_sequence_length (int): sequence length of input_ids for encoder
            past_decode_sequence_length (int): past sequence length of input_ids for decoder
            device (torch.device): device of output tensors
            float16 (bool): whether the model uses float32 or float16 in input
            use_int32_inputs(bool): whether use int32 instead of int64 for some inputs

        Returns:
            WhisperDecoderInputs: dummy inputs for decoder
           r   )lowhighsizedtyper@   r1      )rG   r@   N)Zencoder_attention_headsdecoder_layers
vocab_sizer7   r+   randintZint32Zint64rA   float32rangeappendZrandr<   )r   r=   r>   r?   r@   rA   rB   r2   Znum_attention_heads
num_layersrJ   Z	head_sizesequence_lengthr   Z
float_typeZself_attention_past_shapeZcross_attention_past_shaper5   r:   r   r   r   create_dummyx   s>    z!WhisperDecoderInputs.create_dummy)returnc                 C   s   | j g}| jr|| j |S r   )r   r!   extend)r   
input_listr   r   r   to_list   s    zWhisperDecoderInputs.to_listc                 C   s*   | j rdd | j D nd }t| j |S )Nc                 S   s   g | ]}|j tjd qS ))rG   )tor+   rL   ).0pr   r   r   
<listcomp>   s     z0WhisperDecoderInputs.to_fp32.<locals>.<listcomp>)r!   r<   r   clone)r   r5   r   r   r   to_fp32   s
    zWhisperDecoderInputs.to_fp32)N)FFr1   )r'   r(   r)   r   staticmethodr
   r.   r+   r@   boolr;   rQ   r   rU   r[   r   r   r   r   r<   o   s$    
   Dr<   c                	   @   sd   e Zd ZedeejeeeedddZ	ee
dddZedeeef eejeed
ddZdS )WhisperDecoderHelperTF)r   r@   onnx_model_pathverboseuse_external_data_formatrB   c                 C   s  t | ttfsttj| jddt | tr,dnd||| jd}| }t	j
| jjdd}t	j
| jjdd}	|	d	d| jj  }
t | tr|ng }t | tr|
n|	}d|}dg}|| ddiddddddd}|D ]}dd|krdndd||< q|D ]F}d|krddd||< qt | tr6ddd||< qddi||< qt|jjddd t z}tj|d}t|jjddd t| t||r|n|d|||dd||d |rtj|dd}tj||ddd W 5 Q R X d	S )a  Export decoder to ONNX

        Args:
            decoder (Union[WhisperDecoder, WhisperDecoderNoPastState]): decoder object
            device (torch.device): device of decoder object
            onnx_model_path (str): onnx path
            verbose (bool, optional): print verbose information. Defaults to True.
            use_external_data_format (bool, optional): use external data format or not. Defaults to False.
            use_int32_inputs (bool, optional): use int32 inputs
        rH   r4      r   )r=   r>   r?   r@   rB   r2   F)r9   TNr%   	input_idsr=   zencode_sequence_length / 2)r   rC   rP   )rc   r   r%   r   r?   r>   )r   rH   crosszpast_decode_sequence_length + 1)parentsexist_okzdecoder.onnx   )
argsfZexport_paramsinput_namesoutput_namesdynamic_axesZopset_versionZdo_constant_foldingra   r`   )Zload_external_data)Zsave_as_external_dataZall_tensors_to_one_file)r%   )
isinstancer0   r   AssertionErrorr<   rQ   r   r2   rU   r   get_past_namesrI   rS   r   parentmkdirtempfileTemporaryDirectoryospathjoinr	   tupleonnxZ
load_modelr   save)r   r@   r_   r`   ra   rB   inputsrT   
past_namesZpresent_namesZpresent_self_namesZinput_past_namesZoutput_present_namesrk   rj   rl   nameZtmp_dir_nameZtemp_onnx_model_pathr#   r   r   r   export_onnx   s|    	

 

z WhisperDecoderHelper.export_onnx)rz   c                 C   s   t d dt|j  i}|jrt|jd dks>tt	t|jd }t
|}t|jD ]"\}}t|  ||| < qd| d|}|S )zRun inference of ONNX model.zstart onnxruntime_inferencerc      r   N)loggerdebugnumpyZascontiguousarrayr   cpur!   r8   rn   r.   r   ro   	enumeraterun)ort_sessionrz   Z
ort_inputsrO   r{   iZpast_tensorort_outputsr   r   r   onnxruntime_inference;  s    
 
z*WhisperDecoderHelper.onnxruntime_inferencer~   )r#   r   r@   rB   	max_casesc                 C   s  t |ddk}ddddg}g }|d| D ]\}}	}
t| trHd}n|
}tj| j||	||||d	}|  }t	
  | | }W 5 Q R X t||}tt|d   |d  }|}td
|  td| jj D ]P}tt|d |   |d|   }td| d|  t||}qt| trtd| jj D ]^}tt|d |   |dd| jj  |   }td| d|  t||}qP|| td||	|
| q,|S )zQCompare the result from PyTorch and OnnxRuntime to verify the ONNX model is good.Zpast_key_self_0ztensor(float16))r~         )rC   rH      )r   rC   rC   )   r   rH   Nr   )r@   rA   rB   zlogits max_diff=rH   rC   zself attention past state z
 max_diff=zcross attention past state zUbatch_size=%s, encode_sequence_length=%s, past_decode_sequence_length=%s, max_diff=%s)r   Zget_input_typerm   r   r<   rQ   r   r[   rU   r+   Zno_gradr^   r   r   Zamaxabsr   r   r   rM   rO   maxrN   info)r#   r   r@   rB   r   rA   Z
test_casesZtest_cases_max_diffr=   r>   r?   Zdec_seq_lenrz   rT   Ztorch_outputsr   Zmax_diffZmax_diff_allr   r   r   r   verify_onnxN  s^    	


$,0
z WhisperDecoderHelper.verify_onnxN)TFF)r~   )r'   r(   r)   r\   r0   r+   r@   r;   r]   r}   r<   r   r   r   r   r.   r   r   r   r   r   r^      s.      n 
r^   )$loggingrt   rr   pathlibr   typingr   r   r   r   rx   r+   Zio_binding_helperr   Zmodels.t5.past_helperr   Z
onnx_modelr   Ztorch_onnx_export_helperr	   Ztransformersr
   r   Zwhisper_openai_helperr   Zonnxruntimer   	getLoggerr'   r   r,   r-   r   r0   r<   r^   r   r   r   r   <module>   s&   
(+\