U
    T?h=E                     @   s@  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
mZ d dlmZ d dlmZmZ d dlmZ edZd5ddZG d	d
 d
Zdd Zdd Zdd Zdd Zdd ZedddZdd ZddgfedddZdd  Z d!d" Z!d#d$ Z"d%d& Z#edd'd(Z$e%d)kr<e Z&ee&j' e&j(d*k s`e&j)d+k s`e&j(e&j) d,k rje*d- ej+,e&jre&j-se&j.se/d.e&j d/ne0d0e&j e1e&j ee&j(e&j)e&jZ2e&j-se$e2e& zee2j3Z4W n. e5k
r   e6d1e2j3  e2j7Z4Y nX e0d2 e2j38d3d4Z9ee4e9 dS )6    N)get_ort_environment_variablessetup_logger)main)PRETRAINED_GPT2_MODELS
Gpt2Helper)	OnnxModel c              	   C   s"  t  }|jdddtddt d |jddtd	d
d |jddtddd |jddtddd |jddddd |jdd |jddddd |jdd |jddddd |jdd |jdddd |jdd |jd ddd!d |jdd" |jd#ddd$d |jdd% || }|S )&Nz-mz--model_name_or_pathTz2Model path, or pretrained model name in the list: z, )requiredtypehelpz--csvFzgpt2_parity_results.csvz#path of csv file to save the result)r	   r
   defaultr   z--test_casesi  znumber of test cases per runz--runs(   znumber of repeated runs	--use_gpu
store_truezuse GPU for inference)r	   actionr   )use_gpuz--allz'run all combinations of mixed precision)allz-e--use_external_data_format)r	   r   )use_external_data_formatz	--verbose)verbosez--skip_testzEdo not run test, and only rank experiments based on existing csv file)	skip_testz--overwritezOverwrite existing csv file)	overwrite)	argparseArgumentParseradd_argumentstrjoinr   intset_defaults
parse_args)argvparserargs r#   b/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/models/gpt2/gpt2_parity.pyparse_arguments   sh    
r%   c                   @   s   e Zd Zdd Zdd ZdS )
ParityTaskc                 C   s"   || _ || _|| _g | _d| _d S )Nr   )
total_runs
test_casescsv_pathresultsrun_id)selfr(   r'   r)   r#   r#   r$   __init__b   s
    zParityTask.__init__c                 C   s   t j  d}| d| j }|  jd7  _z:t|d| j d| j f||| jd}|rf| j	| W n( t
k
r   td|  d }Y nX |S )Nz%Y%m%d%H%M%S_   z-tz-r)experiment_namer+   Zcsv_filenamezFailed to run experiment )datetimenowstrftimer+   r   r(   r'   r)   r*   append	Exceptionlogger	exception)r,   r    r0   
start_timer+   resultr#   r#   r$   runi   s     
zParityTask.runN)__name__
__module____qualname__r-   r:   r#   r#   r#   r$   r&   a   s   r&   c              	   C   sF   g }dd l }t| dd$}||}|D ]}|| q(W 5 Q R X |S )Nr   r   newline)csvopen
DictReaderr4   )r)   rowsr@   csvfilereaderrowr#   r#   r$   load_results_from_csv~   s    
rG   c                 C   s0   | D ]}| drt| |   S qtdd S )Nzaverage_latency(batch_size=z)Failed to get average_latency from output)
startswithfloatRuntimeError)rF   namer#   r#   r$   get_latency   s    
rL   c                 C   s8   t | }t| d }t| d }|d |d  |d  S )z@Scoring function based on 3 metrics. The larger score is better.top1_match_rateonnx_size_in_MBi  
   d   )rL   rI   )rF   Zlatency_in_msrM   rN   r#   r#   r$   score   s    rQ   c           
         s   t   t d i  |D ]}| |d < qtt|   fdddd}t| d|  td| d	 d
}d}t| D ]j\}\}}	|	|kr|}|	}|D ]H}|d |krtd||	|t	|t
|d |d |d t   qqqd S )Nz
**********r+   c                    s   | d t  | d  fS )Nr/   r   )rQ   )itemZrow_mapr#   r$   <lambda>       zprint_wins.<locals>.<lambda>T)keyreversez Wins:z	Based on z* wins and a scoring function, the ranking:r   za{:02d}: WINs={:02d}, run_id={}, latency={:5.2f}, top1_match={:.4f}, size={}_MB, experiment={}, {}rM   rN   
experiment)printdictsorteditemsr6   debuginfo	enumerateformatrL   rI   r   )
ZwinsrC   Z	test_namerF   Zsorted_winsZrankprevious_valuecountrV   valuer#   rS   r$   
print_wins   sF    

re   c                 C   s  i }i }| D ]}|d }d||< d||< qt |dddF}dddd	d
ddddddg}tj||d}|  dddg}	t| }
t|
d D ]}| | }t|d trt	|d }n|d }t|d |
dD ]}| | }d}|	D ]}|| || krd} qq|sqt|d tr,t	|d }n|d }zt
jj||ddd\}}W n tk
rn   d}d}Y nX t
jj||ddd\}}|dk	r|dk rt|d t|d kr||d   d7  < n||d   d7  < |dk r.t|d t|d kr||d   d7  < n||d   d7  < |d |d |d t|d |d |d t|d ||||d}|| qqW 5 Q R X td |  t|| d! t|| d" dS )#zRun U test and T test.r+   r   wr   r>   
model_namerun_id_1experiment_1top1_match_rate_1run_id_2experiment_2top1_match_rate_2U_statisticU_pvalueT_statisticT_pvalue)
fieldnamesr(   runsr/   Ztop1_match_rate_per_runTFz	two-sided)Zuse_continuityalternativeN)ZaxisZ	equal_varg?rM   rY   )rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   rq   z(U-Test and T-Test results are output to zU-TestzT-Test)rA   r@   
DictWriterwriteheaderlenrange
isinstancer   jsonloadsscipystatsZmannwhitneyu
ValueErrorZ	ttest_indrI   writerowr6   r_   re   )rC   Zoutput_csv_pathZ
utest_winsZ
ttest_winsrF   r+   rD   Zcolumn_nameswriterZrequired_match_columnsZnum_resultsiZresult1ajZresult2Zall_matchedcolumnbZutest_statisticZutest_pvalueZttest_statisticZttest_pvaluer#   r#   r$   run_significance_test   s    

   



r   )raw_onnx_modelc                 C   s   t | }t|}| }|jjd j|ks0t||jjd j }|jdkrdt	
d|j  |jS t	d|j d|j  d S )Nr   ZMatMulz#Found last MatMul node for logits: z-Failed to find MatMul node for logits. Found z	 of node )onnxloadr   output_name_to_nodegraphoutputrK   AssertionErrorZop_typer6   r_   warning)r   model
onnx_modelr   noder#   r#   r$   get_last_matmul_node_name"  s    

r   c                 C   sL   | j }d| d }| jr&|d |ddd|g7 }|rH|d| |S )	N-m  -o --use_gpu -p fp16r   --io_block_listlogitsz--node_block_list--op_block_list)r   )model_name_or_pathsplitr   r4   extend)r"   last_matmul_node_nameop_block_listr   
parametersr#   r#   r$   get_mixed_precision_parameters1  s    
r   FastGeluLayerNormalization)taskc                 C   sb   t |||}dt|}|r,d| d}nd| d}t }|rR|d| d }| || d S )N,Mixed precision baseline +  in FP32z=Mixed precision baseline (logits output and last MatMul node z	 in FP32)z ())r   r   r\   r   r:   )r   r"   r   r   r   Zop_block_list_strrK   Zenv_varsr#   r#   r$   run_candidateC  s    r   c                 C   s^   | j }d| d }| jr&|d | jr6|d d| d }| jrV|d ||fS )Nr   z -o -p fp32r   r   r   )r   r   r   r4   r   )r"   r   fp32_baselinefp16_baseliner#   r#   r$   get_baselinesX  s    


r   c                    s   ddg}|  || d dg}|  || d |  || dg dd |D  d	g d
 g }|}|D ]F dg fdd|D  }|  || | d  d}	|	rd||	 qdt|dd d}
td|
 dS )z:Step 0 is to check which operator in FP16 causes most lossr   r   zFP16 except logitsz--keep_io_typeszGraph I/O FP32, Other FP16r   c                 S   s   g | ]}|qS r#   r#   .0or#   r#   r$   
<listcomp>q  s     z$run_tuning_step0.<locals>.<listcomp>z--force_fp16_initializerszFP32 except weights in FP16c                    s   g | ]}| kr|qS r#   r#   r   opr#   r$   r   x  s      zFP32 except z in FP16c                 S   s   | d S )NrM   r#   )yr#   r#   r$   rT   ~  rU   z"run_tuning_step0.<locals>.<lambda>)rV   z<step 0: optimized operator causes the most loss in precisionN)r:   r4   minrZ   )r   r   all_opsoptimized_opsZfp32_logitsZfp32_ioZoptimized_ops_resultsZop_listr   r9   Z
min_resultr#   r   r$   run_tuning_step0g  s"     r   c                 C   s.   |D ]$}d|g}|  || d| d qdS )zKStep 1 is to figure out which optimized operator in FP32 could benefit mostr   r   r   N)r:   )r   mixed_precision_baseliner   r   r   r#   r#   r$   run_tuning_step1  s    
r   c              	      s\   dddg} fdd|D } D ]6}||kr ||f}|  |d
|dd|| q d	S )zAssumed that you have run step 0 and 1 to figure out that Logits FP32 and some operators shall be in FP32,
    This step will try add one more operator.
    r   r   SkipLayerNormalizationc                    s   g | ]}| kr|qS r#   r#   )r   xr   r#   r$   r     s      z$run_tuning_step2.<locals>.<listcomp>r   z(Mixed precision baseline + {},{} in FP32r   N)r   )r:   ra   r   )r   r   r   Zcandidate_fp32_opsZfp32_opsr   r   r#   r   r$   run_tuning_step2  s    

r   c           
   
      s~  t jd|j|jg d}t|\}}| |d}g }|rVd|krV|d rV|d d}ntdg  |rd|kr|d r|d d ntd|jst	
d	 d S | |d
 t|d }t| ||g d  fdd}|jrt| | | t||g d}	t| |	| t| |	| n,t| |||dddgd t| ||dgd t| |||ddddgd t| |||dddddgd d S )NZonnx_models)Z
new_folderZremove_existingzFP32 baselineZoptimized_operatorsr   z!Failed to get optimized operators	operatorszFailed to get operatorsz5skip mixed precision since --use_gpu is not specifiedzFP16 baselineraw)r   c                    s    fdd| D S )Nc                    s   g | ]}| kr|qS r#   r#   )r   r   r   r#   r$   r     s      z4run_parity.<locals>.get_fp32_ops.<locals>.<listcomp>r#   )r   r   r#   r$   get_fp32_ops  s    z run_parity.<locals>.get_fp32_opsr   r   Addr   ZEmbedLayerNormalization)r   Zget_onnx_pathsr   r   r   r:   r   rJ   r   r6   r_   r   r   r   r   r   r   r   )
r   r"   Zonnx_model_pathsr   r   r9   r   r   r   r   r#   r   r$   
run_parity  sb    
r   __main__rP      i'  zNot enough test cases or runs to get stable results or test significance. Recommend test_cases >= 100, runs >= 20, test_cases * runs >= 10000.zOutput file zK existed. Please remove the file, or use either --skip_test or --overwrite.z6Remove existing file %s since --overwrite is specifiedzFailed to load csv z#Start running significance tests...z.csvz
.stats.csv)N):r   r@   r1   rz   loggingosr   Zscipy.statsr|   Zbenchmark_helperr   r   Zconvert_to_onnxr   Zgpt2_helperr   r   r   r   	getLoggerr6   r%   r&   rG   rL   rQ   re   r   r   r   r   r   r   r   r   r   r   r;   r"   r   r(   rs   r   pathexistsr   r   rJ   r_   remover   r)   rC   r5   r7   r*   replaceZsummary_csvr#   r#   r#   r$   <module>   sn   

C	*^
F

*

