U
    T?hGX                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZ d dlZd dlZd dlZd dlZd dlmZ d dlZeeZG d	d
 d
eZG dd deZ G dd dZ!dej"iZ#ddddddi fddZ$d8ddZ%d9ddZ&dd Z'dd Z(dd Z)dd  Z*d:d!d"Z+ej,d fd#d$Z-d%d& Z.d;d(d)Z/eeee0ef   d*d+d,Z1G d-d. d.eZ2G d/d0 d0e2Z3G d1d2 d2e2Z4d<d4d5Z5d6d7 Z6dS )=    N)ABCabstractmethod)ThreadPoolExecutor)datetime)Enum)sleep)AnyDictListOptional)versionc                   @   s$   e Zd ZdZdZdZdZdd ZdS )	PrecisionZfp32Zfp16Zint8Zint4c                 C   s   | j S Nvalueself r   [/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/benchmark_helper.py__str__&   s    zPrecision.__str__N)__name__
__module____qualname__ZFLOAT32ZFLOAT16ZINT8ZINT4r   r   r   r   r   r       s
   r   c                   @   s    e Zd ZdZdZdZdd ZdS )OptimizerInfoZno_optZby_ortZ	by_scriptc                 C   s   | j S r   r   r   r   r   r   r   1   s    zOptimizerInfo.__str__N)r   r   r   ZNOOPTZBYORTZBYSCRIPTr   r   r   r   r   r   *   s   r   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )ConfigModifierc                 C   s
   || _ d S r   
num_layers)r   r   r   r   r   __init__6   s    zConfigModifier.__init__c                 C   s~   | j d krd S t|dr2| j |_td| j   t|drV| j |_td| j   t|drz| j |_td| j   d S )Nnum_hidden_layersz6Modifying pytorch model's number of hidden layers to: encoder_layersz7Modifying pytorch model's number of encoder layers to: zdecoder_layers z7Modifying pytorch model's number of decoder layers to: )r   hasattrr   loggerinfor   Zdecoder_layers)r   configr   r   r   modify9   s    



zConfigModifier.modifyc                 C   s   | j S r   r   r   r   r   r   get_layer_numF   s    zConfigModifier.get_layer_numN)r   r   r   r   r$   r%   r   r   r   r   r   5   s   r   float32TFc	                    sL  d }	zt  }
|r t jj|
_n
t jj|
_|r4d|
_|dkrT||
_t	d|
j  |r`d|
_
nd|
_
t	d|   |r|dkrddg}q|d	krd
dg}q|dkrdd
dg}q|dkrddg}q|dkrdddg}qddg}ndg} r fdd|D }|r|
dd t j| |
|d}	W n$ tk
rF   tjddd Y nX |	S )NTr   z%Session option: intra_op_num_threads=   zCreate session for onnx model: dmlDmlExecutionProviderZCPUExecutionProviderrocmROCMExecutionProviderZmigraphxMIGraphXExecutionProvidercudaCUDAExecutionProviderZtensorrtZTensorrtExecutionProviderc                    s$   g | ]}| kr| | fn|qS r   r   ).0nameprovider_optionsr   r   
<listcomp>   s     z.create_onnxruntime_session.<locals>.<listcomp>z(mlas.enable_gemm_fastmath_arm64_bfloat161)	providers	Exception)exc_info)onnxruntimeZSessionOptionsZGraphOptimizationLevelZORT_ENABLE_ALLZgraph_optimization_levelZORT_ENABLE_BASICenable_profilingZintra_op_num_threadsr!   debugZlog_severity_levelZadd_session_config_entryZInferenceSessionr7   error)Zonnx_model_pathuse_gpuproviderZenable_all_optimizationnum_threadsr:   verboseZ(enable_mlas_gemm_fastmath_arm64_bfloat16r3   sessionZsess_optionsr6   r   r2   r   create_onnxruntime_sessionP   sT    




rB   c                 C   s6   | rt jddd nt jdd tdtj d S )NDEBUGz8[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s)levelfmtz%(message)s)rE   transformers)coloredlogsinstalllogging	getLoggersetLevelWARNING)r@   r   r   r   setup_logger   s    rM   c                 C   s   | rt j| st |  |r4t j|s4t | |rv|dkrVdt ksvtdn tt dddgrvtdt	
dtj  t	
d	tj  t	
d
tj  ttjtdkstttjtdkstttjtdkstd S )Nr)   r*   zBPlease install onnxruntime-directml package to test GPU inference.r/   r,   r-   zWPlease install onnxruntime-gpu package, or install ROCm support, to test GPU inference.zPyTorch Version:zTransformers Version:zOnnxRuntime Version:z1.10.0z4.12.0)ospathexistsmakedirsr9   Zget_available_providersAssertionErrorset
isdisjointr!   r"   torch__version__rF   r   parse)	cache_dir
output_dirr=   r>   r   r   r   prepare_environment   s*    


rZ   c                 C   s   t | tt|  d }tj| tjdd }|d|  }t| |dt| dd dt| dd dt| dd d|d|ddS )Ng     @@)dtypez.2fZ   _   c   )
test_timeslatency_variancelatency_90_percentilelatency_95_percentilelatency_99_percentileaverage_latency_msQPS)sumfloatlennumpyvarZfloat64Z
percentile)latency_list
batch_sizeZ
latency_msr`   Z
throughputr   r   r   get_latency_result   s    rm   c                 C   s   t |dddd^}ddddd	d
dddddddddddddddg}tj||d}|  | D ]}|| qZW 5 Q R X td|  d S )Na asciimodenewlineencodingenginer   r6   device	precision	optimizer
io_binding
model_nameinputsthreadsrl   sequence_lengthcustom_layer_numr   r_   re   rd   r`   ra   rb   rc   
fieldnamesz&Detail results are saved to csv file: )opencsv
DictWriterwriteheaderwriterowr!   r"   )resultscsv_filenamecsv_filecolumn_names
csv_writerresultr   r   r   output_details   s8    r   c                    s  t |dddd}ddddd	d
dddddg g }|jD ]D}|jdgkrZ|d|  q8|jD ]}|d| d|  q`q8tj| | d}|  |jD ]0}dD ]$}	|jD ]}
dD ]
}|j	D  ]}i }| D ]}|d |kr|d |	kr|d |
kr|d |kr|d |kr؇ fdd|
 D }|sT|| |dd |D  n" D ]}|| || ksXtqX|d }|d }|r|d |d| d| < q|d |d| < q|r|| qqqqqW 5 Q R X td|  d S )Nrn   ro   rp   rq   rz   r{   r~   ru   r   r6   rv   rw   rx   ry   r|   bZ_sr   )         )TFro   c                    s   i | ]\}}| kr||qS r   r   )r0   kvheader_namesr   r   
<dictcomp>  s       z"output_summary.<locals>.<dictcomp>c                 S   s   i | ]
}|d qS )ro   r   )r0   r   r   r   r   r     s      rl   r}   rd   z'Summary results are saved to csv file: )r   Zbatch_sizesZsequence_lengthsappendr   r   r   modelsZenginesr?   itemsupdaterR   r   r!   r"   )r   r   argsr   Z
data_namesrl   r}   r   rz   Zinput_countZengine_namery   r|   rowr   headersr   r   sr   r   r   output_summary   sh    









r   c              	   C   s   t |dddd}dttt|   }tj||d	}|  | D ]N}t	t
 | | d< tj| | d< tj| | d< || | d< || |  qHW 5 Q R X td
|  d S )Nrn   ro   rp   rq   model_filenamer   rF   rU   r   z(Fusion statistics is saved to csv file: )r   r   rF   rU   )r   listnextitervalueskeysr   r   r   strr   nowrF   rV   rU   r   r!   r"   )Zmodel_fusion_statisticsr   r   r   r   keyr   r   r   output_fusion_statistics)  s"        r   c                    sd   i }t j fddd|d t j fddd|d}|| |ddi |t|| |S )Nc                      s    d  S r   runr   
ort_inputsort_sessionr   r   <lambda>?      zinference_ort.<locals>.<lambda>r   numberrepeatc                      s    d  S r   r   r   r   r   r   r   @  r   ry   F)timeitr   r   rm   )r   r   result_templaterepeat_timesrl   warm_up_repeatr   rk   r   r   r   inference_ort=  s    
r   c              
      s  i }   |D ]L}t|| |	}tt|| j|
} ||j	j
d||j|  qt|dkrvt|||	 t|D ]4\}} ||| j	j
dtj|| j||   q~tj fddd|d tj fddd|d}|| |ddi |t|| |S )	Nr   c                      s
     S r   Zrun_with_iobindingr   ry   r   r   r   r   t  r   z/inference_ort_with_io_binding.<locals>.<lambda>r   r   c                      s
     S r   r   r   r   r   r   r   z  r   ry   T)ry   rU   Z
from_numpytoIO_BINDING_DATA_TYPE_MAPgetr   r[   Z
bind_inputrv   typeshapeZdata_ptrrh   allocateOutputBuffers	enumerateZbind_outputri   r&   r   r   r   rm   )r   r   r   r   Zort_output_namesZort_outputsoutput_buffersoutput_buffer_max_sizesrl   rv   Z	data_typer   r   r1   Znp_inputZ
input_typeiZort_output_namerk   r   r   r   inference_ort_with_io_bindingG  sL    	

	
r   c                 C   s&   |D ]}|  tj|tj|d qd S )N)r[   rv   )r   rU   emptyr&   )r   r   rv   r   r   r   r   r     s    r   {   c                 C   s<   t |  tj |  t|  tj|  tj|  dS )z5Set random seed manually to get deterministic resultsN)randomseedri   rU   Zmanual_seedr.   Zmanual_seed_all)r   r   r   r   set_random_seed  s
    

r   returnc               
   C   s   ddl m} m}m}m}m}m}m} z||  g }| }t|t	sFW d S t
|D ]F}	|||	}
t|
trp W d S ||	|||	|
j|
j|
jd qN|  |W S  | k
r } ztd| W Y d S d }~X Y nX d S )Nr   	NVMLErrornvmlDeviceGetCountnvmlDeviceGetHandleByIndexnvmlDeviceGetMemoryInfonvmlDeviceGetNamenvmlInitnvmlShutdown)idr1   totalfreeused-Error fetching GPU information using nvml: %s)py3nvml.py3nvmlr   r   r   r   r   r   r   
isinstanceintranger   r   r   r   r   print)r   r   r   r   r   r   r   r   device_countr   r"   r<   r   r   r   get_gpu_info  s0    $



	
r   c                   @   s@   e Zd Zd
ddZdd Zeeeee	e
f   dddZd	S )MemoryMonitorTc                 C   s
   || _ d S r   )keep_measuringr   r   r   r   r   r     s    zMemoryMonitor.__init__c                 C   s@   dd l }d}t||t  jd }td | jsq<q|S )Nr      {Gzt?)	psutilmaxProcessrN   getpidZmemory_infoZrssr   r   )r   r   	max_usager   r   r   measure_cpu_usage  s    zMemoryMonitor.measure_cpu_usager   c                 C   s
   t  d S r   )NotImplementedErrorr   r   r   r   measure_gpu_usage  s    zMemoryMonitor.measure_gpu_usageN)T)r   r   r   r   r   r   r   r
   r	   r   r   r   r   r   r   r   r     s   
r   c                       s<   e Zd Zd fdd	Zeeeeef   dddZ	  Z
S )CudaMemoryMonitorTc                    s   t  | d S r   )superr   r   	__class__r   r   r     s    zCudaMemoryMonitor.__init__r   c           
   
      sD  ddl m}m}mm}mm}m} g g  z|  | }t|t	sZt
d|  W d S dd t|D fddt|D  t|D ]J}||}t|trt
d|   W d S t| |jd |< qtd	 | jsqq|   fd
dt|D W S  |k
r> }	 zt
d|	 W Y d S d }	~	X Y nX d S )Nr   r   z*nvmlDeviceGetCount result is not integer: c                 S   s   g | ]}d qS r   r   r0   r   r   r   r   r4     s     z7CudaMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>c                    s   g | ]} |qS r   r   r   )r   r   r   r   r4     s     z%nvmlDeviceGetMemoryInfo returns str: r   r   c                    s    g | ]}| | | d qS )Z	device_idr1   max_used_MBr   r   gpu_namemax_gpu_usager   r   r4     s
   r   )r   r   r   r   r   r   r   r   r   r   r!   r<   r   r   r   r   r   r   )
r   r   r   r   r   r   r   r   r"   r<   r   )r   r   r   r   r   r     s6    $


z#CudaMemoryMonitor.measure_gpu_usage)T)r   r   r   r   r   r
   r	   r   r   r   __classcell__r   r   r   r   r     s   r   c                       s.   e Zd Zd fdd	Zdd Zdd Z  ZS )	RocmMemoryMonitorTc                    sn   t  | d}tj|r2|tjkr2tj| zdd l}|| _| j  W n t	k
rh   d | _Y nX d S )Nz/opt/rocm/libexec/rocm_smir   )
r   r   rN   rO   rP   sysr   rocm_smiZinitializeRsmiImportError)r   r   Zrocm_smi_pathr   r   r   r   r     s    
zRocmMemoryMonitor.__init__c                 C   s(   | j d krdS | j |dd d d S )Nr'   ZVRAMr   i   )r   Z
getMemInfo)r   devr   r   r   get_used_memory  s    
z!RocmMemoryMonitor.get_used_memoryc                    s   | j d krd S | j d k	r&t| j  nd}dd t|D dd t|D  t|D ]}t| | ||< qVtd | jsNqqN fddt|D S )Nr   c                 S   s   g | ]}d qS r   r   r   r   r   r   r4     s     z7RocmMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>c                 S   s   g | ]}d | qS )ZGPUr   r   r   r   r   r4     s     r   c                    s    g | ]}| | | d qS r   r   r   r   r   r   r4   %  s
   )	r   rh   ZlistDevicesr   r   r   timer   r   )r   r   r   r   r   r   r     s    

z#RocmMemoryMonitor.measure_gpu_usage)T)r   r   r   r   r   r   r   r   r   r   r   r     s   r   r.   c              
   C   s  d }|dkrt }nt}|d}| rH|d k	r2|}n| }|d krFd S |d krR|S t }| }||j}z||}
|
 }W 5 d|_| }	X |	d krW 5 Q R  d S td| d|	  t	|dkr:t	|	dkr:t	|t	|	kr:d}t
|D ].\}}|d }|	| d }|| }t||}q|W  5 Q R  S W 5 Q R X d S |d k	rX|}n| }|d krn|S t l}| }||j}z||}
|
 }W 5 d|_| }	X td|d	d
|	d	d |	| W  5 Q R  S Q R X d S )Nr+   FzGPU memory usage: before=z  peak=r   r   r   zCPU memory usage: before=z.1fz
 MB, peak=z MB)r   r   r   r   Zsubmitr   r   r!   r"   rh   r   r   r   )Zis_gpufuncZmonitor_typeZstart_memoryZmemory_monitor_typeZmonitorZmemory_before_testexecutorZ
mem_threadr   Z	fn_thread_Zmax_usedr   Zmemory_beforebeforeafterr   r   r   r   measure_memory/  s`    

.



r  c                  C   sV   dddddddg} d}| D ]6}t |}|d kr2q|r>|d	7 }|| d
| 7 }q|S )NZORT_DISABLE_FUSED_ATTENTIONZ!ORT_ENABLE_FUSED_CAUSAL_ATTENTIONZ!ORT_DISABLE_FUSED_CROSS_ATTENTIONZORT_DISABLE_TRT_FLASH_ATTENTIONZ&ORT_DISABLE_MEMORY_EFFICIENT_ATTENTIONZORT_TRANSFORMER_OPTIONSZORT_CUDA_GEMM_OPTIONSro   ,=)rN   getenv)Z	env_namesenvr1   r   r   r   r   get_ort_environment_variabless  s"    	
r
  )T)N)r   )r   )r.   N)7r   rI   rN   r   r   r   r   abcr   r   concurrent.futuresr   r   enumr   r   typingr   r	   r
   r   rG   ri   rU   rF   	packagingr   r9   rJ   r   r!   r   r   r   r&   r   rB   rM   rZ   rm   r   r   r   r   Zlonglongr   r   r   r   r   r   r   r   r  r
  r   r   r   r   <module>   sf   

 	
F

":

=
&2+
D