U
    T?hX                     @   sB  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZ	d dl
Z
d dlZd dlZd dlmZmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZmZmZ d dlZe e!Z"e j#dd	d
Z$e j#dddZ%dd Z&dd Z'dd Z(dd Z)dd Z*dd Z+dd Z,dd Z-e!dkr>e-  dS )    N)measure_memorysetup_logger)get_library_path)ORTModelForSpeechSeq2Seq)ProfilerActivityprofilerecord_function)trange)AutoModelForSpeechSeq2SeqWhisperConfigWhisperProcessorargsc                    s   j dkrtd fdd fdd j j j j j jd} j dkr| D ],\}}t	j
|gd	|krxt	jnt	jd
||< qZ jrt	j
 jgt	jd
|d<  jrt	j
 jgt	jd
|d<  jrt	j
 jgt	jd
|d< td j  fdd}t | j | j} jr.||d< |S td  j dkrHdnd fdd}t || ||} j dkr||d< |S |j jrtjntj jd|d<  j|d< d|d< d|d<  jrֈ j|d< |S )N>   hf-pt-eagerorthf-orthf-pt-compilez/Unable to auto-detect inputs for provided modelc                     s   t  j} t | } | S N)whisperZ
load_audio
audio_pathZpad_or_trimaudior    c/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/models/whisper/benchmark.pyload_via_ffmpeg$   s    
z#get_inputs.<locals>.load_via_ffmpegc               	      s@   t  jd*} tjt|  tjd}t|g}W 5 Q R X |S )Nrbdtype)openr   npZasarraylistreadZuint8array)fr   r   r   r   load_via_numpy)   s    z"get_inputs.<locals>.load_via_numpy)
max_length
min_length	num_beamsnum_return_sequenceslength_penaltyrepetition_penaltyr   Zpenaltyr   decoder_input_idslogits_processortemperaturezLoad audio: c                    s   | r
 S   S r   r   )Zonnx_e2e)r   r$   r   r   <lambda>D       zget_inputs.<locals>.<lambda>audio_streamzFeature extraction: r   ptc                    s    j j| g jdjS )N)Zreturn_tensorssampling_rate)	processorZfeature_extractorr2   input_featuresr   )r   return_typer   r   r.   P   s     r4   )r   deviceinputsno_repeat_ngram_sizeTZearly_stopping	use_cacheZforced_decoder_ids)benchmark_type	Exceptionr%   r&   r'   r(   r)   r*   itemsr   r"   float32Zint32has_decoder_input_idsr+   has_logits_processorr,   has_temperaturer-   loggerinfor   time_fnhas_audio_streamtouse_fp16torchfloat16target_devicer8   )r   r7   kvZload_audio_fnZ
audio_dataZprocessor_fnr4   r   )r   r   r$   r5   r   
get_inputs    sZ    

&

 


rL   c                 C   s  d\}}d\}}| j dkrx| jr&| jn| j}t }tj|| jrFtjntj	dd
| j}t }| j dkrt|}nd| j dkrt }| j|_|t  | jrd|_d|_| jrtd td ntd	| j  | j d
krLt| jtkr| jd n| j}t| jtkr"| jd nd }t }tj| j|||dd}t }| j dkrt d| j!  t }tj"| j!|| jgd}t }t d||  d |S )N)NN   r   r   T)Ztorch_dtyper9   r   >   r   r      r   Cannot recognize r   )providerprovider_optionsZsession_optionsZuse_io_bindingr   zLoading model from )	providerszLoaded model in  s)#r:   Zhf_pt_model_path
model_nametimer
   from_pretrainedrF   rG   rH   r=   rE   rI   compiler   ZSessionOptionsr   Zenable_profilingZregister_custom_ops_libraryr   verboseZlog_verbosity_levelZlog_severity_leveltuneZset_default_logger_severityZset_default_logger_verbosityr;   typeexecution_providertupler   hf_ort_dir_pathrA   rB   ort_model_pathZInferenceSession)r   modelZsess_options
start_timeend_timesourcerP   rQ   r   r   r   	get_modelh   sb    



 rc   c                 C   sX  t |tkr|d n|}t |tkr,|d n|}t| j}| jdkrPt| jnt| jt	j
dd}| jrz||}t| |D ]}|| q~| jdkrtj| t }	| jdkrt| jnt| jt	j
dd}
|
D ]}|| q| jdkrtj| t }| jdkrtd d}||	 | j }|| }td	| d
 td| d d S )Nr   rN   r   zWarm up)filedesccpuZ	Benchmark z	Latency: rS   zThroughput: z qps)rZ   r\   rG   r6   rI   r:   rangeZwarmup_runsr	   sysstdoutrX   rA   rB   cudaZsynchronizerU   Znum_runs)r   fnr7   Zwarmup_inputsZbenchmark_inputsZtorch_deviceZwarmup_rangeoutputs_r`   Zbench_rangera   Z
batch_sizeZlatencyZ
throughputr   r   r   rC      s@    





rC   c           	      C   s   | j   d| j d| j d|jdd d| dtj d}d }| j dkrtt	j
t	jgddd"}td || W 5 Q R X W 5 Q R X |jdd	j| j| jd
}tj| j| d}t|d}|| W 5 Q R X n|| | d}|S )N-rn   z%Y-%m-%d_%H:%M:%SrM   T)Z
activitiesZrecord_shapesZprofile_memoryZmodel_inference   )Zgroup_by_stack_n)Zsort_byZ	row_limitz.logw.json)r:   lower	precisionr6   __name__replacedatetimenowr   r   ZCPUCUDAr   Zkey_averagestableZpt_filter_byZpt_num_rowsospathjoin
log_folderr   write)	r   rl   r7   Zinputs_typeprefixfilenameZprofZ	prof_datar#   r   r   r   
profile_fn   s$    B

  

r   c                    s   t  }t|}|jdd   td|jd d d t  t	j
  t| jdk fdd| jd tj  d S )	Ng?)intervalzCPU usage: %rf   c                      s    S r   r   r   rl   r7   r   r   r.     r/   zmeasure_fn.<locals>.<lambda>)Zis_gpufuncmonitor_type)r{   getpidpsutilProcessZcpu_percentrA   rB   gcZcollectrG   rk   Zempty_cacher   r6   r   ri   rj   flush)r   rl   r7   pidprocessr   r   r   
measure_fn   s    

 r   c           
         s  fdd fdd}|} j dkr0||  jrVt ||d} j dkrR|d td  }jj }|d	 }tj	|rt
d
| d|  t|tj j| jj }|d }tj	|rt
d
| d|  t|tj j| jj }|d }tj	|rRt
d
| d|  t|tj j| d S t
d t || ||\}}	t
dt|d  d t
d|	d   t || d S )Nc                    s    j f | }|S r   )generate)r7   predicted_idsr_   r   r   get_pred_ids  s    z&run_hf_inference.<locals>.get_pred_idsc                    s>   | }g }t  jD ]}| jj|ddd  q||fS )NTZskip_special_tokensr   )rh   r(   appendr3   batch_decode)r7   r   transcriptionrn   )r   r   r   r   gen_and_dec  s
    z%run_hf_inference.<locals>.gen_and_decr   zgen-and-decr   rr   z-encoder.json	Renaming  to z-decoder.jsonz-decoder-with-past.jsonz
Evaluating PyTorch...Generated token length: r    tokensTranscription: )r:   r   r   lenencodersessionend_profilingr{   r|   isfilerA   warningrenamer}   r~   decoderZdecoder_with_pastrB   rC   r   )
r   r7   r_   r   generate_fnnew_logname
new_prefixold_lognamer   r   r   )r   r   r_   r   run_hf_inference  s>    

r   c                    sl  d fdd	}fdd}fdd} fdd	} j d
krB|n|}||} jrt ||d}	 }
td|
 d|	  t|
tj	 j
|	 d S td |} jr||dd}||f}t || ||} j d
kr| }|d } jrtd|d d   nH||d d }tdt| d  jj|d ddd }t|  t || d S )NFc                    s   t tdd  }t |  }|| }t|rJtd|  td|r` jr`| d | d< || }t|r|D ]}t	d| d | |= qt j
d	kr }|  D ]\}}	|||	 q D ]}
|j|
j j
 jd
 q|S | S )Nc                 S   s   | j S r   nameZmodel_inputr   r   r   r.   N  r/   z?run_ort_inference.<locals>.prepare_ort_inputs.<locals>.<lambda>z(The following model inputs are missing: zEThere are missing inputs to the model. Please add them and try again.r%   r&   zRemoving unnecessary input 'z' from user provided inputsrf   )Zdevice_type	device_id)setmaprL   keysr   rA   errorr;   rY   rB   r6   
io_bindingr<   Zbind_cpu_inputget_outputsZbind_outputr   r   )r7   warmupZmodel_inputsZuser_inputsZmissing_inputsZunnecessary_inputsZunnecessary_inputr   rJ   rK   outputr   r_   r   r   prepare_ort_inputsL  s*    

z-run_ort_inference.<locals>.prepare_ort_inputsc                    s     |  | S r   )Zrun_with_iobinding)r   r   r   r   with_io_bindingj  s    
z*run_ort_inference.<locals>.with_io_bindingc                    s     d | }|S r   )run)r7   rm   r   r   r   without_io_bindingo  s    z-run_ort_inference.<locals>.without_io_bindingc                    s6    j | kr2t|  j kd d }| d |d  S | S )Nr   rN   )eos_token_idr   where)r   Z	first_endr   r   r   handle_outputt  s    
z(run_ort_inference.<locals>.handle_outputrf   Ze2er   r   z
Evaluating ONNX Runtime...T)r   r   r   r   r   r   )F)r6   r   r   r   rA   r   r{   r   r|   r}   r~   rB   rY   rC   Zcopy_outputs_to_cpurD   r   r3   r   printr   )r   r7   r_   r   r   r   r   r   Z
ort_inputsr   r   Zort_evaluate_inputsZort_warmup_inputsZort_outputsZactual_outputr   r   r   r   run_ort_inferenceK  s:    


r   c                 C   sD   | j dkrt| || n(| j dkr0t| || ntd| j  d S )N>   r   r   r   r   rO   )r:   r   r   r;   )r   r7   r_   r   r   r   run_inference  s
    

r   c               	   C   s  t  } | jddtdddddgd | jd	d
tddd | jddtdddddgdd | jdtddd | jdtddd | jdtddd | jddtddd | jdd ttj rd!nd"d"d!d#gd$ | jd%d&td'd( | jd)d*td+d( | jd,d-td.d( | jd/td0d( | jd1td2d3d | jd4td5d( | jd6td'd( | jd7td8d( | jd9td8d( | jd:td;d( | jd<td;d( | jd=td>d( | jd?td@dAd | jdBtd8dCd | jdDtd;dEd | jdFdGdHdI | jdJtdKdLd | jdMtdNdOd | jdPdGdHdI | jdQtt	j
dRdSd | jdTdGdHdUdV |  }tj|j t|j |j|_d|jkr|j  dW|_|jdXkr|jdY|jif|_n4|jdZkr|j|jd8|jrd8nd'd[f|_d!|_|jdkr|jstd\|jdkr
|js
td]t|j|_|S )^Nz-btz--benchmark-typeTr   r   r   r   )rZ   requiredchoicesz-mz--model-namez;Hugging Face name of model (e.g. 'openai/whisper-large-v2'))rZ   r   helpz-pz--precisionZfp32Zint8fp16zePrecision for model. For ONNX models, the model's precision should be set before running this script.)rZ   r   defaultr   r   z--hf-pt-model-pathrg   zNPath to directory containing all PyTorch files (e.g. tokenizer, PyTorch model))rZ   r   r   z--hf-ort-dir-pathzaPath to directory containing all ONNX files (e.g. tokenizer, encoder, decoder, decoder_with_past)z--ort-model-pathzPath to ONNX modelz-az--audio-pathz%Path to audio file for E2E evaluationz-dz--devicerk   rf   Zrocm)rZ   r   r   z-idz--device-idr   )rZ   r   z-wz--warmup-runsrp   z-nz
--num-runs
   z--seed   z--sampling-ratei>  zSampling rate for audio (in Hz)z--max-lengthi  z--min-lengthz--num-beamsrN   z--num-return-sequencesz--length-penaltyg      ?z--repetition-penaltyz--no-repeat-ngram-size   z--decoder-input-idsz[]zThe forced decoder ids for generation. Format is [start token, timestamp token, language token, task token]. Default is [start token]. See `decoder_input_ids` in https://github.com/microsoft/Olive/tree/main/examples/whisper for details.z--logits-processorzLWhether to use timestamps logits processor or not (0 for false, 1 for true).z--temperaturez!Temperature value for generation.z	--profileF
store_true)r   actionz--pt-filter-byZself_cpu_time_totalz"What to filter PyTorch profiler byz--pt-num-rowsi  z.Number of rows for PyTorch profiler to displayz	--verbosez--log-folder.zFolder to cache log filesz--tunezFOnly used by ROCm EP, enable TunableOp tuning to select fastest kernel)r   r   r   ZExecutionProviderZCUDAExecutionProviderr   ZROCMExecutionProvider)r   Ztunable_op_enableZtunable_op_tuning_enablez,Please specify a path to `--hf-ort-dir-path`z+Please specify a path to `--ort-model-path`)argparseArgumentParseradd_argumentstrrG   rk   Zis_availableintfloatr{   r|   r}   
parse_argsr   randomseedZmanual_seedr6   r   r:   upperr[   r   rY   r]   AssertionErrorr^   astliteral_evalr+   )parserr   r   r   r   r     s    

   r   c                  C   sF  t  } t| j t| j dtjj_	t
| j}t| j}| jdkrTd| j n| j}| jdk}t| d| t| d| t| d| t| dd	 t| d
|j td| j  t| }| jdkr.ttdd | }d|k| _t| dd|k t| dd|k t| dd|k | jg kr.|jg| _t| }t| || d S )NTrf   zcuda:r   r3   rI   rF   rD   Fr   zForced decoder prompt ids: r   c                 S   s   | j S r   r   r   r   r   r   r.   T  r/   zmain.<locals>.<lambda>r0   r>   r+   r?   r,   r@   r-   )r   r   rX   rA   rB   __dict__rG   backendsZcudnnZ	benchmarkr   rV   rT   r   r6   r   rt   setattrr   r+   rc   r:   r   r   rL   rD   Zdecoder_start_token_idr   )r   configr3   rI   rF   r_   Zort_model_inputsr7   r   r   r   main=  s2    




r   __main__).r   r   rw   r   loggingr{   ri   rU   numpyr   r   rG   r   Zbenchmark_helperr   r   Zonnxruntime_extensionsr   Zoptimum.onnxruntimer   Ztorch.profilerr   r   r   Ztqdmr	   Ztransformersr
   r   r   Zonnxruntimer   	getLoggerru   rA   	NamespacerL   rc   rC   r   r   r   r   r   r   r   r   r   r   r   <module>   s@   
HC1@X	 $
