U
    T?h?L                     @   s~   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZ e eZG dd dZG dd dZdS )    N)	Precision)
Gpt2Helper
Gpt2Inputsc                   @   s^   e Zd ZdddZdd Zeddd	Zed
ddZdddZ	dddZ
dd Zdd ZdS )
Gpt2MetricTorch   c                 C   sj   |dkr|dkst || _|| _| d| | _|| _d| _d| _d| _d| _d| _	d | _
d | _i | _d S )N   d   z vs r   )AssertionErrorbaseline	treatmentnametop_ktop_1_errortop_k_errortotal_samplesmax_logits_diffmax_logits_diff_no_pastbatch_top1_errorbatch_topk_errorseq_len_latency)selftreatment_namebaseline_namer    r   b/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/models/gpt2/gpt2_tester.py__init__   s    zGpt2Metric.__init__c                 C   s  | j | jkrtd td| j d| j  d | jdkrd| j | j }d| j | j }td| j d| j d	|d
d| j d| j d	|d
d td td| jd td| jd ntd| j d | j	rtd d}d}t
| j	 D ]}t| j	| d }|dkr4td| d|d
d n.tdd|  dd|d  d  d|d
d ||t| j	|  7 }|t| j	| 7 }qtd|| d
d d S )Nz---zMetrics for z (baseline=z):r   g      Y@zTotal=z Top1Error=z (z.2fz%) TopzError=z%)zMax logits diffs:z	with past  = z.6fz	empty past = z (baseline):z/Past sequence length range and average latency:     @@	z:         	z msz	[   z, r   z]:	zAverage Latency: )r   r   printr   r   r   r   r   r   r   sortedkeys
statisticsZmeanlen)r   Ztop_1_error_rateZtop_k_error_ratetotalcountkeyZaverager   r   r   r    %   s2    
4
.zGpt2Metric.print)is_empty_pastc                 C   s6   ||    }|r$t| j|| _nt| j|| _|S N)absmaxr   r   )r   Zbaseline_logitsZtreatment_logitsr(   diffr   r   r   diff_logitsC   s
    zGpt2Metric.diff_logits)
batch_sizec                 C   s>   |  j |7  _ tj|dftjd| _tj|dftjd| _d S )Nr   Zdtype)r   torchzerosboolr   r   )r   r.   r   r   r   start_batchL   s    zGpt2Metric.start_batchTc                 C   s^   |  |j|jd| |  |j|j| j| | |j|j|dk}|rZtd| j d|  d S )Nr   r   zMax logits diffs of z: )
_eval_topktop_1_tokenstop_k_tokensr   r-   logitsr    r   )r   r   r   past_seq_lenverbosemax_diffr   r   r   
eval_batchQ   s
    zGpt2Metric.eval_batchc                 C   s   t t ||s|dkrJ|r.td| j  |  jt || O  _nH|rftd| d| j d |  jt || dj	dddkO  _d S )Nr   z!Generated tokens not matched for zTop z tokens not matched for z-. This will lead to wrong beam search results)dimr   )
r0   alleqr    r   r   Zlogical_notr   sum	unsqueeze)r   Zbaseline_topkZtreatment_topkr   r9   r   r   r   r4   Y   s     zGpt2Metric._eval_topkc                 C   s,   |  j | j 7  _ |  j| j 7  _d S r)   )r   r   r?   r   r   r   r   r   r   	end_batchh   s    zGpt2Metric.end_batchc                 C   sF   |dkrt t|d nd}|| jkr2g | j|< | j| | d S )Nr   r   )intmathlog2r   append)r   r8   Zlatencyr'   r   r   r   add_latencyl   s    

zGpt2Metric.add_latencyN)r   r   )T)T)__name__
__module____qualname__r   r    r2   r-   rC   r3   r;   r4   rB   rG   r   r   r   r   r      s   
	

r   c                
   @   s   e Zd ZdddZedddZdd	 Zd
d Zdd Ze	dddZ
e	dd Ze	dd Ze	ejddddddddf	ddZdS )
Gpt2TesterFr   c                 C   s   |j d | _|j d | _|| _|| _|| _|| _|d k	| _|d k	| _g | _	d| j|d|| g}t
|D ]2}t||r~tjntj}| j	|| qfd | _d | _d | _|	| _|
| _d S )Nr   r   r   )shaper.   input_lengthn_layer	input_idsposition_idsattention_maskhas_position_idshas_attention_maskpastranger0   emptytypefloat16Zfloat32rF   tor7   r5   r6   r   top_k_required_order)r   rO   rP   rQ   Znum_attention_headsZhidden_sizeZ	num_layerdeviceZis_fp16r   rZ   Z
past_shapeZ_iZ
empty_pastr   r   r   r   t   s.    

zGpt2Tester.__init__)returnc                 C   s   t | j| j| j| jS r)   )r   rO   rP   rQ   rT   rA   r   r   r   
get_inputs   s    zGpt2Tester.get_inputsc              
      s  ddl m  tj|dt| }tj|rBtd| d d S tj|dd  fdd	}g }||| j	d
 | j
r||| jd | jr||| jd t| jD ]}||| j| dt|  qt|D ]>\}}	ttj|d| dd}
|
|	  W 5 Q R X qdd | D }t|D ]t\}} t|| tjrH|| n||    }	ttj|d| dd}
|
|	  W 5 Q R X q"td|  d S )Nr   numpy_helperZtest_data_set_z
Directory z existed. Skip saving test dataT)exist_okc                    s"   |   |   | d S r)   )rF   
from_arrayclonecpunumpy)input_tensorsZtorch_tensorr   r^   r   r   
add_tensor   s    z-Gpt2Tester.save_test_data.<locals>.add_tensorrO   rP   rQ   Zpast_Zinput_z.pbwbc                 S   s   g | ]
}|j qS r   )r   ).0outputr   r   r   
<listcomp>   s     z-Gpt2Tester.save_test_data.<locals>.<listcomp>Zoutput_zTest data saved to directory )onnxr_   ospathjoinstrexistsr    makedirsrO   rR   rP   rS   rQ   rU   rN   rT   	enumerateopenwriteZSerializeToStringget_outputsra   
isinstancerd   ndarrayrb   rc   )r   sessionri   save_test_data_dirZtest_case_idrm   rf   re   itensorfZoutput_names_namer   r^   r   save_test_data   s4    ,zGpt2Tester.save_test_datac                 C   sl  t |d tjrt|d n|d    | _t	
| j| _t	
| j| j| j| _| j  | jdg|| _| jrt| j| d gd| jd|| _| jrt| jt| jdg| jgd|| _g | _t |d trt |d | _n`t!| j"D ]T}t ||d  tjr>t||d  n||d    }| j#|| qdS )z7
        Update the inputs for next inference.
        r   r   N)$rv   rd   rw   r0   
from_numpyrb   detachrc   r7   rK   predict_next_tokenr5   r   rZ   r6   Zreshaper.   rY   rO   rR   r{   rM   r@   repeatrP   rS   catrQ   ZonesZtype_asrT   tuplelistrU   rN   rF   )r   ri   stepr[   rz   past_ir   r   r   update   s4    0"*zGpt2Tester.updatec                 C   s2  t d | jdk	r<| j|j   }|dkr<t d|  t| j|jks^t d| j|j | jrt| j|jkst d| j|j | j	rt| j
|j
kst d| j
|j
 t| jt|jkstt| jD ]\\}}|j|j| jkst| dkr||j|    }|dkrt d	| d
|  qdS )z3
        Compare inputs and logits output.
        zstart diff...Ng-C6?z$Max logits difference is too large: zInput_ids is differentzposition_ids is differentzattention_mask is differentr   zmax_past_diff[z]=)r    r7   r*   r+   r0   r=   rO   rR   rP   rS   rQ   r$   rT   r
   rr   rL   Znelement)r   r   Zmax_io_diffrz   r   Zmax_past_diffr   r   r   r,      s8    
zGpt2Tester.diffr   c                 C   sf   | dddf }|dkr*t |dd}|S t j|dddddd|f }|s^| \}}|S |S dS )z4
        Get top k topkens based on logits.
        Nr   T)Z
descending)r0   ZargmaxZargsortsort)r7   r   Zrequired_orderZlastTokenLogitsZgeneratedTokensZtopkZsorted_topk_r   r   r   r     s     zGpt2Tester.predict_next_tokenc                 C   s   g }t |D ]}t| |d  tjr6t| |d  n
| |d  }t||d  tjrht||d  n
||d  }||   }|| qt	d|  dS )zO
        Compare the present outputs of two outputs from ONNX Runtime.
        r   zpresent_diff_max=N)
rU   rv   rd   rw   r0   r   r*   r+   rF   r    )onnx_outputonnx_io_outputrN   Zpresent_diff_maxrz   Zonnx_present_iZonnx_io_present_ir:   r   r   r   diff_present+  s    

zGpt2Tester.diff_presentc                 C   s*   ddl m} || }ddlm} |j|kS )z>
        Returns True if the ONNX model is quantized.
        r   )load)__producer__)rk   r   Z!onnxruntime.quantization.quantizer   Zproducer_name)Zonnx_model_pathr   modelZquantize_producerr   r   r   is_quantized_onnx_model@  s    z"Gpt2Tester.is_quantized_onnx_modelZGpt2LMHeadModelT   r   .c           -      C   s  t d| d| d| dt| d|	 d |jj}|jj}|jj}|jj}d}|tjk}|rtd| 	 d j
kstt| | tjd	d
d|j|d}tj|||d}d}|tjkrdnd}t|||}t|||}t|d ||}t|D ]\}}|	dkr||	kr q|d dkr*t |  |d }|dd}|dd}t|||||||||| 
}t|||||||||| 
} t|||||||d|| 
}!|!j}"||" ||" t  tj|"tjd}#t|D ]}$t|j d }%t|j d  d }&t!" }'t#||!$ }(|%|&t!" |'  |!&|(|$| tj'| |$ dd\})}*|%|&|*d  |&|)|$| tj|"|&|%|j|d}+t(||+ tj)| | $ ||+dddd\},}*|%|&|*d  ||k r| *| |,|| |d7 }| &|,|$| |
rF|+|  t,|)|,| t d  t d!|!j- t d"|j- t d#| j- |j.|!||&|
d$ |j.|!| |&|
d$ |#|!j-|k/ B }#t0|#r qqW 5 Q R X |1  |1  q|   |   |   dS )%z
        Test Generation using greedy beam search (without sampling) to compare PyTorch and ONNX model.
        It will print top 1 and top k errors on the given test inputs.
        zstart test generation: (top_k=z top_k_no_order=z max_steps=z test_inputs=z max_inputs=)r   rX             )r.   Zpast_sequence_lengthZsequence_lengthconfigmodel_class)
is_float16r   zQuantized OnnxZOnnxz with IO Binding
   rO   rP   NrQ   Fr/   r      )
total_runsr   )r   T)r   Zreturn_numpyZinclude_copy_output_latencyzTop 1 tokens:z	Torchz	ONNXz	ONNX with IO binding)r9   )2r    r$   r   rN   n_headn_embdeos_token_idr   ZFLOAT16ru   rW   r
   evalrY   r   Zget_output_shapesZget_output_buffersZINT8r   rr   getrK   r.   r3   r0   Zno_gradr1   r2   rU   r   rO   sizerT   timeitZdefault_timerZpytorch_inferencer]   rG   r   Zonnxruntime_inferenceZauto_increase_buffer_sizeZ$onnxruntime_inference_with_binded_ior~   r,   r   r5   r;   anyr=   rB   )-rx   r   r[   Ztest_inputs	precisionr   r   Ztop_k_no_orderZ	max_stepsZ
max_inputsr9   r~   ry   rN   r   r   r   Ztest_data_savedr   Zinit_output_shapesZoutput_buffersr   r   Ztorch_metricZonnx_metricZonnx_io_metricrz   inputsrO   rP   rQ   Zonnx_runnerZonnx_io_runnerZtorch_runnerr.   doner   Zseq_lenr8   
start_timeZpytorch_outputr   Zavg_latency_msZoutput_shapesr   r   r   r   test_generationL  s   &



  




zGpt2Tester.test_generationN)Fr   F)r   F)rH   rI   rJ   r   r   r]   r~   r   r,   staticmethodr   r   r   r   ZFLOAT32r   r   r   r   r   rK   s   s2   
   
+'('

rK   )loggingrD   rl   r#   r   rd   r0   Zbenchmark_helperr   Zgpt2_helperr   r   	getLoggerrH   loggerr   rK   r   r   r   r   <module>   s   
^