U
    T?hø8  ã                   @   s€   d dl Z d dlZd dlZd dlmZ d dlmZmZmZ d dlm	Z	m
Z
 d dlmZmZ e  e¡Zdd„ Zdd	d
„Zdd„ ZdS )é    N)Ú	Precision)Úget_shared_initializersÚ.update_decoder_subgraph_output_cross_attentionÚ?update_decoder_subgraph_share_buffer_and_use_decoder_masked_mha)ÚTensorProtoÚhelper)ÚWhisperConfigÚWhisperTokenizerc                 C   sL   t tdd„ | ƒƒ}t|ƒt|ƒks&t‚t||ƒD ]\}}|j|ks0t‚q0d S )Nc                 S   s   | S )N© )Ú
beam_inputr
   r
   úg/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/models/whisper/whisper_chain.pyÚ<lambda>   ó    zverify_inputs.<locals>.<lambda>)ÚlistÚfilterÚlenÚAssertionErrorÚzipÚname)Úbeam_inputsÚgraph_inputsZbeam_required_inputsZgraph_inputr   r
   r
   r   Úverify_inputs   s    r   Tc                 C   s@   |rt tdd„ | ƒƒS t| ƒdkr<| d dkr<|  ¡  qq<q| S )Nc                 S   s   | dkS )NÚ r
   )Úelmr
   r
   r   r   #   r   zclean_list.<locals>.<lambda>r   éÿÿÿÿr   )r   r   r   Úpop)ZarrÚremove_all_stringsr
   r
   r   Ú
clean_list    s    
r   c           2      C   s  t j| jdd}d|j_t j| jdd}d|j_tj| j| j	d}t
j| j| j	d}| jtjkrddnd}| jtjkrxdnd	d
ddd| jtjkr’dnd| jtjkr¤dnd| jr°dnd| jr¼dndd| jrÊdnd| jrÖdnd| jrâdnd| jrîdnd| jrú|ndg}| jtjkrdnd}| jtjkr(dnd}d| jr:|nd| jrH|nd| jrVdnd| jrddndg}	g }
| jtjkr^tjd d	gdgd!tjd"}tjd dgdgd#tjd"}tjd dgdgd$tjd"}|
 |||g¡ | jrtjd dgdgd%tjd"}|
 |¡ | jr2tjd dgdgd&tjd"}|
 |¡ | jr^tjd dgdgd'tjd"}|
 |¡ t d(|j¡t d)|j ¡t d*|j!¡t d+| "d,g¡d- ¡t d.| "d/g¡d- ¡t d0| "d1g¡d- ¡| jrät d2| "d3g¡d- ¡ndt d4| "d5g¡d- ¡t d6| "d7g¡d- ¡t d8| j#¡t d9d¡t d:d;¡| jrFt d<d=¡ndg}tjd>t$|d?d@t$|	d?d@dAdBdC}|j% t$|dd@¡ t &d	tjdDdEdFg¡}t &d
tj'd=g¡}t &dtj'd=g¡}t &dtj'd=g¡}t &dtj'd=g¡}t &dtjd=g¡}t &dtjd=g¡}t &dtj'|j(g¡}t &dtj'dD|j(g¡}t &dtj'dDdGg¡}t &dtj'd=g¡}t &dtj'dHd;g¡}t &dtj'dDdIg¡}t &dtjd=g¡} t$|||||||| jr¬|nd| jrº|nd| jrÈ|nd| jrÖ|nd| jrä|nd| jrò|nd| jr | ndgƒ}!t &dtj'dDdd
g¡}"t &dtjdDg¡}#t &dtjdDg¡}$t &dtjdDddJd
dKg¡}%t &dLtjdDg¡}&t$|"| jr~|#nd| jrŒ|$nd| j)s¦| j*sª| jrª|%nd| jr¸|&ndgƒ}'t+| dMƒr| j,rt-|jƒrìt. /dN¡ n
t. 0dO¡ t+| dPƒr| jrt1|jƒ t2||ƒ}(|j% t dQ|j¡t dR|j¡g¡ tj3dBd=dStj3ddTdSg})|
 |¡ | jr”tjd dgdLgdUtjd"}*|
 |*¡ tj4|
dV|!|'|(dW}+dXdY„ |!D ƒ},dZdY„ |'D ƒ}-| j*rLt j| j*dd}.|.j}/|+j5 |/j5¡ |+j6 |/j6¡ |/j7D ]8}0|0j|,kr|0j|-kr|0jdkr|+j7 |0g¡ q|+j8 |/j8¡ t9||!ƒ |j:|j:ksht;‚t. /d[|j:› d\¡ tj<|+d]|)|j:d^}1t=j> ?| j@¡rÚt. /d_| j@› d`| j@da › ¡ t= A| j@¡ t= A| j@da ¡ t jB|1| j@dddt=j> C| j@¡› dadb t jDjE| j@ddc d S )dNT)Zload_external_datazencoderdecoderinit subgraphzdecoder subgraph)Ú	cache_dirZtemperature_fp16ÚtemperatureZinput_features_fp16Úinput_featuresÚ
max_lengthÚ
min_lengthÚ	num_beamsÚnum_return_sequencesZlength_penalty_fp16Úlength_penaltyZrepetition_penalty_fp16Úrepetition_penaltyÚ
vocab_maskr   Úprefix_vocab_maskÚdecoder_input_idsÚlogits_processorÚcross_qk_layer_headÚextra_decoding_idsZsequence_scores_fp16Úsequence_scoresZscores_fp16ÚscoresÚ	sequencesÚcross_qkZno_speech_probs_beamZCastZCastInputFeaturesToFp16)ÚinputsÚoutputsr   ÚtoZCastLengthPenaltyToFp16ZCastRepetitionPenaltyToFp16Ztemperature_to_fp16ZCastOutputSequenceScoresToFp32ZCastScoresToFp32Úeos_token_idÚpad_token_idÚdecoder_start_token_idZtranslate_token_idz<|translate|>r   Ztranscribe_token_idz<|transcribe|>Zstart_of_lm_token_idz<|startoflm|>Zno_speech_token_idz<|nospeech|>Zno_timestamps_token_idz<|notimestamps|>Zbeginning_timestamp_token_idz<|0.00|>Úno_repeat_ngram_sizeZearly_stoppingZ
model_typeé   Zdecoder_output_cross_qké   ZWhisperBeamSearchF)r   Z
BeamSearchzcom.microsoft)r1   r2   r   ÚdomainZ
batch_sizeZfeature_sizeZsequence_lengthZinitial_sequence_lengthZnum_layer_headZextra_decoding_ids_lenZnum_layer_head_cross_qkÚframesÚno_speech_probsÚuse_gpuzUUpdated whisper decoder subgraph to use DecoderMaskedMultiHeadAttention successfully!zPDecoderMaskedMultiHeadAttention could not be applied to whisper decoder subgraphÚcollect_cross_qkÚdecoderÚencoder)r:   Úversioné   Zno_speech_probs_cast_to_fp32zWhisperBeamSearch Graph)r   r1   r2   Úinitializerc                 S   s   g | ]
}|j ‘qS r
   ©r   )Ú.0Úgir
   r
   r   Ú
<listcomp>  s     zchain_model.<locals>.<listcomp>c                 S   s   g | ]
}|j ‘qS r
   rD   )rE   Úgor
   r
   r   rG     s     zUsing IR version z for chained modelzonnxruntime.transformers)Zproducer_nameZopset_importsÚ
ir_versionzOverwriting z and z.data)Zsave_as_external_dataZall_tensors_to_one_fileZconvert_attributeÚlocation)Z
full_check)FÚonnxZ
load_modelZencoder_pathÚgraphr   Zdecoder_pathr   Zfrom_pretrainedZmodel_name_or_pathr   r	   Ú	precisionr   ZFLOAT16Zuse_vocab_maskZuse_prefix_vocab_maskZuse_forced_decoder_idsZuse_logits_processorr>   r,   Zuse_temperatureZoutput_sequence_scoresZoutput_scoresZoutput_no_speech_probsr   Z	make_noder   ÚextendÚappendÚFLOATZmake_attributer4   r5   r6   Zconvert_tokens_to_idsr7   r   Ú	attributeZmake_tensor_value_infoZINT32Z
vocab_sizeZoutput_cross_qkZcross_qk_onnx_modelÚhasattrr=   r   ÚloggerÚinfoÚwarningr   r   Zmake_opsetidZ
make_graphrC   ÚnodeÚinputÚoutputr   rI   r   Zmake_model_gen_versionÚosÚpathÚisfileZbeam_model_output_dirÚremoveÚsaveÚbasenameÚcheckerZcheck_model)2ÚargsZencoder_modelZdecoder_modelÚconfigÚ	tokenizerZtemperature_namer   Zsequence_scores_nameZscores_nameZbeam_outputsZgraph_nodesZinput_features_cast_nodeZlen_pen_cast_nodeZrep_pen_cast_nodeZtemp_cast_nodeZ output_sequence_scores_cast_nodeZoutput_scores_cast_nodeZbeam_search_attrsrV   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r   r   r/   r-   r.   r0   r<   Zgraph_outputsZinitializersZopset_importZprob_cast_nodeZ
beam_graphZbeam_graph_input_namesZbeam_graph_output_namesZpost_qk_modelZpost_qk_graphZpgiZ
beam_modelr
   r
   r   Úchain_model.   sê   ñûûûûû
û
û
 ÿÿ

í

û  ÿ  ÿ  ÿ  ÿòÿ  ÿýûÿ


þÿ
û
û
ÿþý
üúrc   )T)ÚloggingrY   rK   Zbenchmark_helperr   Zconvert_generationr   r   r   r   r   Ztransformersr   r	   Ú	getLoggerÚ__name__rS   r   r   rc   r
   r
   r
   r   Ú<module>   s   
	
