U
    T?hO                     @   s>  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
m
Z
 d dlmZ d dlmZ d dlZd dlZd dlZd dlmZmZ e	G dd dZe	G d	d
 d
Zd'ddZdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Z dd Z!d d! Z"d"d# Z#d$d% Z$e%d&kr:dZ&e$  dS )(    N)	dataclass)datetime)Path)Optional)generate_test_dataget_bert_inputsc                   @   sv   e Zd ZU eed< eed< eed< eed< eed< eed< eed< eed< eed	< eed
< eed< eed< eed< dS )TestSetting
batch_sizesequence_length
test_cases
test_timesuse_gpuuse_io_bindingproviderintra_op_num_threadsseedverboselog_severityaverage_sequence_lengthrandom_sequence_lengthN)__name__
__module____qualname__int__annotations__boolstr r   r   Y/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/bert_perf_test.pyr   "   s   
r   c                   @   sV   e Zd ZU eed< eed< eed< eed< eed< ee ed< ee ed< eed< d	S )
ModelSetting
model_pathinput_ids_namesegment_ids_nameinput_mask_name	opt_levelinput_tuning_resultsoutput_tuning_results	mask_typeN)r   r   r   r   r   r   r   r   r   r   r   r   3   s   
r      c              	   C   sZ  dd l }|| |r*d| kr*td |r|dkr@ddg}q|dkrRddg}q|d	krfd
ddg}q|dkrxddg}q|dkrdddg}qddg}ndg}| }	||	_|jj|	_|d kr|j	j
|	_nZ|dkr|j	j|	_nF|dkr|j	j|	_n2|dkr|j	j|	_n|dkr|j	j
|	_n||	_|d k	r2||	_|j| |	|d}
|r|dkrfd|
 kstn|dkrd|
 kstn|d	krd
|
 kstd|
 kstn`|dkrd|
 kstnB|dkrd|
 kstd|
 kstnd|
 ks(tnd|
 ks(t|d k	rVt|}|
t| W 5 Q R X |
S )Nr   ZCUDAExecutionProviderzWarning: Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance.ZdmlZDmlExecutionProviderZCPUExecutionProviderZrocmZROCMExecutionProviderZmigraphxZMIGraphXExecutionProvidercudaZtensorrtZTensorrtExecutionProvider   r(   c   )	providers)onnxruntimeZset_default_logger_severityZget_available_providersprintZSessionOptionsZlog_severity_levelZExecutionModeZORT_SEQUENTIALZexecution_modeZGraphOptimizationLevelZORT_ENABLE_ALLgraph_optimization_levelZORT_DISABLE_ALLZORT_ENABLE_BASICZORT_ENABLE_EXTENDEDr   ZInferenceSessionZget_providersAssertionErroropenZset_tuning_resultsjsonload)r    r   r   r   r/   r   tuning_results_pathr-   Zexecution_providerssess_optionssessionfr   r   r   create_session?   sz    	















r8   c                 C   s,   t jtjt jtjt jtjt jtji}||  S )N)torchZfloat32npZfloat16Zint32Zint64Zlonglong)Z
torch_typeZtype_mapr   r   r   
numpy_type   s        r;   c                    s4    fdd|   D } fdd|  D }||fS )Nc                    s"   i | ]\}}|t | qS r   r9   Z
from_numpyto.0namearraydevicer   r   
<dictcomp>   s      z/create_input_output_tensors.<locals>.<dictcomp>c                    s"   i | ]\}}|t | qS r   r<   r>   rB   r   r   rD      s      )items)inputsoutputsrC   input_tensorsoutput_tensorsr   rB   r   create_input_output_tensors   s    rJ   c              
   C   sx   |   }| D ],\}}|||jjdt|j|j|  q| D ],\}}|	||jjdt|j|j|  qF|S Nr   )

io_bindingrE   Z
bind_inputrC   typer;   ZdtypeshapeZdata_ptrZbind_output)sessrH   rI   rL   r@   Ztensorr   r   r   create_io_binding   s(    rP   c                 C   s   g }g }|j rdnd}t|D ]\}}| ||}	||	 i }
tt|D ]}|	| |
|| < qLt||
|\}}t| ||}| | t	
 }| | t	
 | }|| q||fS )Nr)   cpu)r   	enumeraterunappendrangelenrJ   rP   Zrun_with_iobindingtimeitdefault_timer)r6   
all_inputsoutput_namestest_settingresultslatency_listrC   _test_case_idrF   resultrG   irH   rI   rL   
start_timelatencyr   r   r   %onnxruntime_inference_with_io_binding   s"    


rc   c           
      C   st   t |dkr| |t| g }g }t|D ]<\}}t }| ||}t | }	|| ||	 q.||fS rK   )rV   rS   randomchoicerR   rW   rX   rT   )
r6   rY   rZ   r\   r]   r^   rF   ra   r_   rb   r   r   r   onnxruntime_inference   s    
rf   c                 C   s   |  }dtj|  d}|d|j d|j ddd7 }|d|j d|j d7 }|d	|j	 d
|j
 d7 }|d|j d|j d7 }|d|j d7 }|d|j 7 }|S )Nzmodel=,zgraph_optimization_level=z,intra_op_num_threads=zGraphOptimizationLevel.ORT_ zbatch_size=z,sequence_length=ztest_cases=z,test_times=zuse_gpu=z,use_io_binding=zaverage_sequence_length=zrandom_sequence_length=)Zget_session_optionsospathbasenamer/   r   replacer	   r
   r   r   r   r   r   r   )r    r6   r[   r5   optionr   r   r   	to_string   s     rn   c              	   C   s  t | j|j|j|| j|j| jd}dd | D }t| j||}||krXt	d| d S t	d| g }|j
rt|jD ] }	t||||\}
}|| qvn*t|jD ]}	t|||\}
}|| qt|d }t|}t|d}t|d}t|d	}t|d
}t|d}|jd|  }|||||||f||< t	dt|dt|d | jrtj| j}tj|r|}|ddd  dt   d}t	d|d|d |  }t!|d}t"#|| W 5 Q R X t	d| d S )N)r   r4   c                 S   s   g | ]
}|j qS r   )r@   )r?   outputr   r   r   
<listcomp>  s     z run_one_test.<locals>.<listcomp>zskip duplicated test:zRunning test:  2   K   Z   _   r+   g     @@z,Average latency = {} ms, Throughput = {} QPS.2fz.jsonr*   r   .zWARNING:zexists, will write tozinstead.wzTuning results is saved to)$r8   r    r   r   r$   r   r%   get_outputsrn   r.   r   rU   r   rc   extendrf   r:   rA   
statisticsZmeanZ
percentiler	   formatr&   ri   rj   abspathexistsrsplitr   now	timestampZget_tuning_resultsr1   r2   dump)model_settingr[   perf_resultsrY   r   r6   rZ   keyZall_latency_listZ_ir\   r]   Z
latency_msZaverage_latencyZ
latency_50Z
latency_75Z
latency_90Z
latency_95Z
latency_99Z
throughputZoutput_pathZold_output_pathZtrsr7   r   r   r   run_one_test   sr    	

   

$r   c                 C   s,   t jt| ||||fd}|  |  d S )N)targetargs)multiprocessingProcessr   startjoin)r   r[   r   rY   r   processr   r   r   launch_test<  s    
r   c           	      C   s   |j d k	r t| ||||j  d S tjdd}tjdd}t||h}tdtd|D ]}||krT|| qT|jdd |D ]}t| |||| q|d S )NF)ZlogicalTr*      )reverse)	r   r   psutil	cpu_countlistrU   minrT   sort)	r   r[   r   rY   r   Zlogical_coresZcandidate_threadsr`   r   r   r   r   run_perf_testsK  s$    
r   c                 C   s|   t | j| j| j| j\}}}td|j d|j d|j  t	|j|j|j|j
|j||||j|j| jd}t| ||| d S )NzGenerating z samples for batch_size=z sequence_length=)r'   )r   r    r!   r"   r#   r.   r   r	   r
   r   r   r   r   r   r'   r   )r   r[   r   Z	input_idsZsegment_idsZ
input_maskrY   r   r   r   run_performancec  s.    
r   c               
   C   s  t  } | jddtdd | jdddtddd	 | jd
ddtdd | jddtddd | jdddtddd | jddtddddgddd | jddtddd | jddd d!d" | jdd# | jd$dtdddddd%gd&d' | jd(dd d)d" | jdd* | jd+dd d,d" | jdd- | jd.dtd d/d | jd0d1dtd d2d | jd3dtd d4d | jd5dtd d6d | jd7dtd d8d | jd9d td:d; | jd<d td=d; | jd>d?d@tdAd; | jdBdCdd dDd" | jddE | jdFdtddGd |  }|S )HNz--modelTzbert onnx model path)requiredrM   helpz-bz--batch_size+zKbatch size of input. Allow one or multiple values in the range of [1, 128].)r   rM   nargsr   z-sz--sequence_lengthz maximum sequence length of inputz	--samplesF
   z!number of samples to be generated)r   rM   defaultr   z-tz--test_timesr   zJnumber of times to run per sample. By default, the value is 1000 / samplesz--opt_levelr*   r(   r+   zZonnxruntime optimization level: 0 - disable all, 1 - basic, 2 - extended, 99 - enable all.)r   rM   choicesr   r   z--seed   zPrandom seed. Use the same seed to make sure test data is same in multiple tests.z	--verbose
store_truezprint verbose information)r   actionr   )r   z--log_severity   z.0:Verbose, 1:Info, 2:Warning, 3:Error, 4:Fatal)r   rM   r   r   r   z	--use_gpuzuse GPU)r   z--use_io_bindingzuse io_binding)r   z
--providerzExecution provider to usez-nz--intra_op_num_threadsz>=0, set intra_op_num_threadsz--input_ids_namezinput name for input idsz--segment_ids_namezinput name for segment idsz--input_mask_namezinput name for attention maskz--input_tuning_resultsz3tuning results (json) to be loaded before benchmark)r   rM   r   z--output_tuning_resultsz1tuning results (json) to be saved after benchmarkz-az--average_sequence_lengthz)average sequence length excluding paddingz-rz--random_sequence_lengthz3use uniform random instead of fixed sequence length)r   z--mask_typezmmask type: (1: mask index or sequence length, 2: raw 2D mask, 3: key len, cumulated lengths of query and key))argparseArgumentParseradd_argumentr   r   set_defaults
parse_args)parserr   r   r   r   parse_arguments  s   		
			r   c                  C   s  t  } | jdkr&tdtd| j | _| jdkr8| j| _t }|	 }t
| j}t|dkrjt|dksrtdt| j| j| j| j| j| j| j| j}|D ]R}t|| j| j| j| j| j| j| j| j| j| j| j| j}t d| t!||| qt"|# ddd	 d
}t$j%&t'| jj(d)| jr&dndd&dd t"t*|D | jt+, -d}t.|ddd}	t/j0|	ddd}
d }|D ]z\}}|1d}|d krdddddddg}|2d d |D  |
3| d!d |D }|2d"d |D  |
3| qW 5 Q R X t d#| d S )$Nr   r*   rq      z batch_size not in range [1, 128]ztest settingFc                 S   s   | d S )Nr*   r   )xr   r   r   <lambda>R      zmain.<locals>.<lambda>)r   r   zperf_results_{}_B{}_S{}_{}.txtZGPUZCPU-c                 S   s   g | ]}t |qS r   )r   r?   r   r   r   r   rp   X  s     zmain.<locals>.<listcomp>z%Y%m%d-%H%M%Szw+rh   )newline	
)	delimiterlineterminatorrg   zLatency(ms)ZLatency_P50ZLatency_P75ZLatency_P90ZLatency_P95ZLatency_P99zThroughput(QPS)c                 S   s   g | ]}| d d qS )=r   splitr   r   r   r   rp   l  s     c                 S   s   g | ]}t |d qS )rv   )r|   r   r   r   r   rp   o  s     c                 S   s   g | ]}| d d qS )r   r*   r   r   r   r   r   rp   p  s     zTest summary is saved to)4r   r   maxr   Zsamplesr   r
   r   Managerdictsetr	   r   	Exceptionr   modelr!   r"   r#   r$   r%   r&   r'   r   r   r   r   r   r   r   r   r   r.   r   sortedrE   ri   rj   r   r   parentr|   r   r   r   strftimer1   csvwriterr   rz   writerow)r   managerr   Zbatch_size_setr   r	   r[   Zsorted_resultsZsummary_fileZtsv_fileZ
tsv_writerheadersr   Zperf_resultparamsvaluesr   r   r   main"  s    




	

	
r   __main__)Nr(   N)'r   r   r2   r   ri   rd   r{   rW   dataclassesr   r   pathlibr   typingr   numpyr:   r   r9   Zbert_test_datar   r   r   r   r8   r;   rJ   rP   rc   rf   rn   r   r   r   r   r   r   r   __spec__r   r   r   r   <module>   sN      
Y
E #T
