U
    T?hY                     @   s  d dl Z d dlZd dlmZ d dlmZmZmZ d dlZ	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZmZmZ d dlmZmZ d dl m!Z!m"Z" d dl#m$Z$ e %e&Z'dddddddddddgZ(G dd dZ)dS )    N)Path)DictTupleUnion)float_to_float16_max_diff)	OnnxModel)optimize_model)version)WhisperConfigWhisperForConditionalGenerationWhisperProcessor)__version__)WhisperDecoderWhisperDecoderHelperWhisperDecoderInit)WhisperEncoderWhisperEncoderHelper)WhisperEncoderDecoderInitWhisperEncoderDecoderInitHelper)InferenceSessionzwhisper-tinyzwhisper-tiny.enzwhisper-basezwhisper-base.enzwhisper-smallzwhisper-small.enzwhisper-mediumzwhisper-medium.enzwhisper-largezwhisper-large-v2zwhisper-large-v3c                   @   s6  e Zd Zed#eeeeedddZeeeejej	j
dddZed$eeeejeeeeej	j
f d
ddZed%eeeeef ejeeeeedddZed&eee dddZed'eeeeeeeeed	ddZed(eej	j
ejeedddZeeedddZed)eeeejeedd d!Zd"S )*WhisperHelper F)
output_dirmodel_name_or_pathsuffix
new_folderreturnc                 C   s^   |}t j|r t|jd }n|dd }||7 }|rHt j| |n| }t j||d S )a  Build onnx path

        Args:
            output_dir (str): output directory
            model_name_or_path (str): pretrained model name, or path to the model checkpoint
            suffix (str, optional): suffix like "_encoder" or "_decoder_fp16" will be appended to file name. Defaults to None.
            new_folder (bool, optional): create a new directory for the model. Defaults to False.

        Returns:
            str: path of onnx model
        /z.onnx)ospathisdirr   partssplitjoin)r   r   r   r   
model_name	directory r'   h/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/models/whisper/whisper_helper.pyget_onnx_path,   s    zWhisperHelper.get_onnx_path)r   	cache_dirdevicer   c              	   C   s   ddl m}m}m} ddlm}m} d}| dd dd }	d	\}
}|	|krf|||	 ||}
||	 }t|
d
}t	j
||d}W 5 Q R X ~
|f |d }||}||d  |dk	r|| ||S )  Load model given a pretrained name or path, then build models for ONNX conversion.

        Args:
            model_name_or_path (str): pretrained model name or path
            cache_dir (str): cache directory
            device (torch.device): device to run the model
            merge_encoder_and_decoder_init (bool, optional): Whether merge encoder and decoder initialization into one ONNX model. Defaults to True.
        Returns:
            Dict[str, torch.nn.Module]: mapping from name to modules for ONNX conversion.
        r   )_ALIGNMENT_HEADS_MODELS	_download)ModelDimensionsWhisperFr   r      N)NNrb)Zmap_locationdimsZmodel_state_dict)Zwhisperr-   r.   r/   Zwhisper.modelr0   r1   r#   opentorchloadload_state_dictZset_alignment_headsto)r   r*   r+   r-   r.   r/   r0   r1   Z	in_memoryr%   Zcheckpoint_fileZalignment_headsfp
checkpointr4   modelr'   r'   r(   load_model_openaiI   s"    
zWhisperHelper.load_model_openaiT)r   
model_implr*   r+   merge_encoder_and_decoder_initstate_dict_pathr   c                 C   s  i }t tt dkr d|d< tj| fd|i|}|dkr`t| ||}|j|j }	}
|}n|| }	}
d}|r|j	t
|dd t|
|j||d	}| | |rt|	|
|jd||d
}||dS t|jj|j}| | t|j|j}| | |||dS dS )r,   4.36.0eagerattn_implementationr*   ZopenaiNF)strict)r>   r<   )decoder_start_token_idr>   r<   )encoder_decoder_initdecoder)encoderrG   decoder_init)r	   parsetransformers_versionr   from_pretrainedr   r=   rH   rG   r8   r6   r7   r   configevalr9   r   r   r<   r   )r   r>   r*   r+   r?   r@   extra_kwargsr<   Zopenai_modelZmodel_encoderZmodel_decoderZpassed_modelrG   rF   rH   rI   r'   r'   r(   
load_modelp   s@    

zWhisperHelper.load_modelr<   r+   onnx_model_pathverboseuse_external_data_formatZuse_decoder_input_idsZuse_int32_inputsc              	   C   sX   t | trt| |||| n6t | tr@t| |||||| nt| ||||| d S N)
isinstancer   r   export_onnxr   r   r   rQ   r'   r'   r(   rW      s4    



zWhisperHelper.export_onnxZSimplifiedLayerNormalizationZ SkipSimplifiedLayerNormalizationZReluAdd)
onnx_modelop_block_listc                 C   s\  t dd |  D }t |}||}td| d|  |  jd j}d}|  }||ksft	|| }d}	|j
dkr|}	td	|j  d}
|jD ]}| |}
|
dk	r qqt|
}td
|j d|  |dk }ntd|j
 d|j  g }g }|s"|	dk	r"|g}|	jg}|t|||d}td|  | jf ddi| |S )a  Convert model to mixed precision.
           It detects whether original model has fp16 precision weights, and set parameters for float16 conversion automatically.
        Args:
            onnx_model (OnnxModel): optimized ONNX model
            op_block_list (List[str], optional): . Defaults to ["SimplifiedLayerNormalization", "SkipSimplifiedLayerNormalization", "Relu", "Add"]
        Returns:
            parameters(dict): a dictionary of parameters used in float16 conversion
        c                 S   s   g | ]
}|j qS r'   )op_type).0noder'   r'   r(   
<listcomp>   s     z6WhisperHelper.auto_mixed_precision.<locals>.<listcomp>z	fp32 op: z
 fp16 op: r   FNZMatMulz#Found last MatMul node for logits: z3max diff of converting weights in last MatMul node z: gư>z-Failed to find MatMul node for logits. Found z	 of node )keep_io_typesr[   node_block_listZforce_fp16_initializersz!auto_mixed_precision parameters: Zuse_symbolic_shape_inferT)setZnodes
differenceloggerinfographoutputnameoutput_name_to_nodeAssertionErrorr\   inputZget_initializerr   debugwarninglistZconvert_float_to_float16)rZ   r[   Zop_full_setZfp32_op_setZfp16_op_setZlogits_output_nameZis_weight_fp16_precisionri   r^   Zlast_matmul_nodeZinitializerrk   max_diffr`   ra   
parametersr'   r'   r(   auto_mixed_precision   sF    




z"WhisperHelper.auto_mixed_precisioncpu)	rR   optimized_model_path
is_float16num_attention_headshidden_sizerT   rq   use_gpuproviderc	              
   C   sx   ddl m}	 |	d}
d|
_|dk|
_t| d|||s6dnd|
|dd	}|rd|rXt| n|jdd
 |j||dd dS )zHOptimize ONNX model with an option to convert it to use mixed precision.r   )FusionOptionsZbartTZrocm   NF)Z
model_typeZ	num_headsrv   	opt_leveloptimization_optionsrw   Zonly_onnxruntime)Zcast_input_output)Zall_tensors_to_one_file)	Zfusion_optionsry   Zuse_multi_head_attentionZ!disable_multi_head_attention_biasr   r   rq   Z convert_model_float32_to_float16Zsave_model_to_file)rR   rs   rt   ru   rv   rT   rq   rw   rx   ry   r|   mr'   r'   r(   optimize_onnx  s&    

zWhisperHelper.optimize_onnx   )	processorpt_modelr+   
batch_sizeprompt_modec              
      s2  zddl m} W nV tk
rf } z8tjd| dd d}td| d t| W 5 d }~X Y nX ddl m} |d	d
dd}g }	|dkr |d d d gddj}
n^ |d d d gddj |d d d gddjg}	t	|	|kst
t|	d |	d f}
d\}}}}d\}}|
|||||||ddd	}|rddg} fdd|D }g }g }t|D ]d}t|| |d< |	| ||d< |jf |   }|| | j|ddd  ql|
|d< |d= n8g }|jf |   } j|ddd g}t|}|d= |d= ||||fS )Nr   )load_datasetz.An error occurred while importing `datasets`: T)exc_infozpip install datasetszCCould not import `datasets`. Attempting to install `datasets` via `z`.z)hf-internal-testing/librispeech_asr_dummycleanZ
validation)r#   r   Zaudioarraypt)Zreturn_tensors   )   r   r   r   )      ?r   )	input_features
max_length
min_length	num_beamsnum_return_sequenceslength_penaltyrepetition_penaltyearly_stopping	use_cachezJohn has doubtszMaria has grave doubtsc                    s   g | ]}  |qS r'   )Zget_prompt_idsr]   pr   r'   r(   r_   k  s     zBWhisperHelper.pt_transcription_for_verify_onnx.<locals>.<listcomp>
prompt_idsr   Zskip_special_tokensr   r   )Zdatasetsr   	Exceptionrd   errorrm   r   systemr   lenrj   r6   catr9   rangeZ
from_numpygeneratedetachrr   numpyappendbatch_decodern   )r   r   r+   r   r   r   eZinstall_cmdZdsZinput_features_r   r   r   r   r   r   r   inputsZpromptsr   pt_transcription
pt_outputsiZ	pt_outputr'   r   r(    pt_transcription_for_verify_onnx<  sb    	
z.WhisperHelper.pt_transcription_for_verify_onnxr   r   c           
      C   sD   | dkr*|r*d}d}d}d}||||h}nd}d}d}	|||	h}|S )	Nr   z{ John has doubts whether Sir Frederick Layton's work is really Greek after all and can discover in it but little of Rocky Izy John has doubts whether Sir Frederick Latins work is really Greek after all and can discover in it but little of Rocky Iz Maria has grave doubts whether Sir Frederick Layton's work is really Greek after all and can discover in it but little of Rockyz Maria has grave doubts whether Sir Frederick Latins work is really Greek after all and can discover in it but little of Rocky IzX Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.zY Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.zZ "Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.r'   )
r   r   Z'expected_transcription_no_comma_prompt1Z)expected_transcription_misspelled_prompt1Z'expected_transcription_no_comma_prompt2Z)expected_transcription_misspelled_prompt2expected_transcription_optionsZexpected_transcription_no_commaZ!expected_transcription_with_commaZ+expected_transcription_with_quote_and_commar'   r'   r(   select_transcription_options  s*    z*WhisperHelper.select_transcription_options)r   r*   ort_sessionr+   r   r   c           %      C   s$  i }t tt dkr d|d< tj| fd|i||}tj| |d}tj| |d}	tj	|||||d\}
}}}|	j
g}|jddd	}ttd
d |}|| }ttdd | }ttdd | }tjtjtjtjtjtjd}d|k}t||D ]\}}|dkr0|
|    |
|< q |dkrVtj|	j|| d|
|< q |dkrtj||	jf|| d|
|< q |dkr@|s|r|gn|g}tj||| d|
|< ng }t|D ]}|||   qt dd |D }g }|D ],}||	j!g|t"|  }|||  qtj||| d|
|< n|dkrdtjdg|| d|
|< n|dkrtjddgg|| d|
|< nn|dkrt#tj|g|| d|d|
|< n@|dkrtjdg|| d|
|< ntj|
| g|| d|
|< q |$d|
d dddddf }|j%|dd}t&||} d}!t|D ]$}|!|| | kod|| | k9 }!qFd}"|!st|D ]v}|| j'|| j'kr|| || dddt"|| f  }#n|| ||  }#t |#( |#  t)d }$t |"|$}"q~|"dkr t*+d!|  t*+d"|  |"S )#zRCompare the result from PyTorch and ONNX Runtime to verify the ONNX model is good.rA   rB   rC   r*   )r*   r   englishZ
transcribe)languagetaskc                 S   s   | d S )Nr   r'   )tokenr'   r'   r(   <lambda>      z+WhisperHelper.verify_onnx.<locals>.<lambda>c                 S   s   | j S rU   )rh   entryr'   r'   r(   r     r   c                 S   s   | j S rU   )typer   r'   r'   r(   r     r   )ztensor(float)ztensor(float16)ztensor(int64)ztensor(int32)ztensor(int8)ztensor(uint8)Zextra_decoding_idsr   Z
vocab_mask)dtypeZprefix_vocab_maskZdecoder_input_idsc                 s   s   | ]}t |V  qd S rU   )r   r   r'   r'   r(   	<genexpr>  s     z,WhisperHelper.verify_onnx.<locals>.<genexpr>Zlogits_processorr   Zcross_qk_layer_headr   Ztemperaturer   NTr   )keyzPyTorch outputs: zONNX Runtime outputs: ),r	   rJ   rK   r   rL   r9   r   r
   r   r   rE   Zget_decoder_prompt_idsrn   map
get_inputsnpZfloat32float16Zint64Zint32Zint8Zuint8zipr   rr   r   ZonesZ
vocab_sizer   r   r   tolistmaxZpad_token_idr   repeatrunr   r   shapeminabsrd   rm   )%r   r*   r   r+   r   r   rO   r   r   rM   r   r   r   Zdecoder_prompt_idsZstart_idr   Zforced_decoder_idsZ	ort_namesZ
ort_dtypesZ	ort_to_npZuse_extra_decoding_idsrh   r   Zraw_input_idsZort_promptsr   max_lenZpadded_promptsr   Zpadded_promptZort_outputsZort_transcriptionr   Zparityro   diffZ
max_diff_ir'   r'   r(   verify_onnx  s    
	


 



$
 "
*
zWhisperHelper.verify_onnxN)r   F)Tr   )TFTF)rX   )FTFrr   )r   F)r   F)__name__
__module____qualname__staticmethodstrboolr)   r6   r+   nnModuler=   r   rP   r   r   r   r   r   rW   r   r   rq   intr~   r   r   r   r   r   r'   r'   r'   r(   r   +   s     &  :    % C    &  E   r   )*loggingr   pathlibr   typingr   r   r   r   r   r6   r   r   rZ   r   Z	optimizerr   	packagingr	   Ztransformersr
   r   r   r   rK   Zwhisper_decoderr   r   r   Zwhisper_encoderr   r   Zwhisper_encoder_decoder_initr   r   Zonnxruntimer   	getLoggerr   rd   ZPRETRAINED_WHISPER_MODELSr   r'   r'   r'   r(   <module>   s:   
