U
    T?h3                     @   s\   d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	 ee
Zdd ZG dd	 d	eZdS )
    )	getLoggerN)FusionGptAttentionPastBase)helper)	OnnxModelc                 C   s   t | | dkS )Ngư>)abs)valueexpected_value r	   h/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/fusion_gpt_attention_megatron.pyis_close   s    r   c                       s@   e Zd ZdZeed fddZdd Zdd Zd	d
 Z	  Z
S )FusionGptAttentionMegatronz^
    Fuse GPT-2 Attention with past state subgraph from Megatron into one Attention node.
    )model	num_headsc                    s   t  || d S )N)super__init__)selfr   r   	__class__r	   r
   r      s    z#FusionGptAttentionMegatron.__init__c                 C   s   | j d}| |}	|jd }
|jd |jd kr8dnd}tjd||jd |j| |	|g|
|g|d}d|_|j	t
d| jt
ddg | jd k	r|j	t
d	t| jg |g}| j	| |D ]}| j| j|j< q| j| d
| _d S )NZGptAttentionr      Z	Attention)inputsoutputsnamezcom.microsoftr   Zunidirectionalmask_filter_valueT)r   Zcreate_node_nameZcast_attention_maskoutputinputr   Z	make_nodedomain	attributeextendZmake_attributer   r   floatnodes_to_addZthis_graph_nameZnode_name_to_graph_namer   Znodes_to_removeappendZprune_graph)r   matmul_before_splitadd_before_splitpastpresentr   reshape_qkvmaskZattention_node_nameZ
int32_maskr   iZattention_noder   noder	   r	   r
   fuse_attention_node   s:    




z.FusionGptAttentionMegatron.fuse_attention_nodec              	   C   s   | j |ddddgddddg}|d kr6td d S |\}}}}	t|dkr|d jdkr| j |d \}
}|dkr| | _|jd |j	d krtd d S | j
|dd	std
 d S | j
|ddstd d S | j |	jd std d S | j
|ddgs td d S | j
|ddgsBtd dS | j
|ddgsdtd dS | j
|	ddgstd d S | j
|	ddgstd d S | j |ddddgddddg}|d ks|d |krtd d S | j |	ddddgddddg}|d ks&|d |kr4td d S | j |	dddddgdddddg}|d ksp|d |kr~td d S | j |	dddddgdddddg}|d kr| j |	ddddd gdddddg}|d ks|d |krtd! d S |	jd S )"NMulSubSlicer   r   z8fuse_attention: failed to match unidirectional mask pathi'  zCfuse_attention failed: mul_qk.input[1] != last_slice_mask.output[0]g     @z?fuse_attention failed: mul_mask input 1 is not constant 10000.0g      ?z;fuse_attention failed: sub_mask input 0 is not constant 1.0z+expect slick_mask input 0 to be graph inputzKfuse_attention failed: last_slice_mask input 1 (starts) is not constant [0]   zIfuse_attention failed: last_slice_mask input 3 (axes) is not constant [3]F   zJfuse_attention failed: last_slice_mask input 4 (steps) is not constant [1]   zDfuse_attention failed: slice_mask input 3 (axes) is not constant [2]zEfuse_attention failed: slice_mask input 4 (steps) is not constant [1]Z	UnsqueezeZGatherZShapeMatMulz/fuse_attention: failed to match last slice pathz0fuse_attention: failed to match first slice pathz3fuse_attention: failed to match last slice sub pathLayerNormalizationSkipLayerNormalizationz5fuse_attention: failed to match last slice sub path 1)r   match_parent_pathloggerdebuglenop_typeget_constant_inputr   r   r   utilsZcheck_node_input_valuefind_graph_inputinfo)r   sub_qkmul_qk	matmul_qklayernorm_before_attentionZ
mask_nodesZmul_maskZsub_maskZlast_slice_maskZ
slice_mask_Zmul_valZlast_slice_pathZfirst_slice_pathZfirst_slice_subZfirst_slice_sub_1r	   r	   r
   
match_maskJ   s     









 
 

 
 




z%FusionGptAttentionMegatron.match_maskc           .      C   s  d }d }|j dk}d }|sH| jj|ddddddgddd dddg|d}n(| jj|dddddgdd dddg|d}|d kr|d S d }|s|\}	}
}}}}|	jd }n|\}
}}}}|jd }| j|d	ddd
dddgdddddd dg}|d kr| j|d	ddd
dddgdddddd dg}|d kr2td d S |\}}}}}}}|j dkrn||jd krntd d S |j dkr||jd krtd d S | j|ddddgddddg}|d krtd d S |\}}}}| j|ddkrtd d S | ||||}| j|dddd
gddddg}|d krFtd d S |\}}} }!||!krjtd d S | j|ddd	ddd
gddddddg}"|"d krtd d S |"\}#}$}%}&}'}(||(krtd d S | j	|'\})}*t
|*tjr:t|*jdgkr:|*d dkr:|*d dkr:|*d dkr:|*d dksHtd d S |*d }+|+| jkrztd|+ d| j  |+| _|*d },| j	|#\})}*ttt|,}-t|*|-std|* d |-  d S | j	|\})}*t|*|-std!|* d |-  d S | |%||}|d kr(td" d S | j|s@td# | ||}|d krdtd$ d S | j|std% d S | |||||jd || d S )&Nr3   Addr0   ZReshapeZ	Transposer   r   )output_name_to_nodeZConcatZSplitr2   z&fuse_attention: failed to match v pathzAfuse_attention: skip_input != layernorm_before_attention.input[0]r-   ZSoftmaxr+   r*   z'fuse_attention: failed to match qk pathZaxisz+fuse_attention failed: softmax_qk axis != 3Divz&fuse_attention: failed to match q pathz-fuse_attention: skip since split_v != split_qz&fuse_attention: failed to match k pathz-fuse_attention: skip since split_v != split_kr.   r/   z:fuse_attention: reshape constant input is not [0, 0, N, H]zDetected num_heads=z. Ignore user specified value zfuse_attention: div_k value=z
 expected=zfuse_attention: div_q value=z!fuse_attention: match past failedz(fuse_attention: past is not graph input.z$fuse_attention: match present failedz1fuse_attention: expect present to be graph output)r8   r   r4   r   r5   r6   r   Zget_node_attributerB   r9   
isinstancenpZndarraylistshaper   r<   r   sqrtr   Zmatch_past_pattern_2r;   Zmatch_presentZfind_graph_outputr)   ).r   Znormalize_nodeZinput_name_to_nodesrD   r#   r$   Zis_normalize_node_skiplayernormZ	qkv_nodesZ
skip_inputZadd_skipZadd_after_attentionZmatmul_after_attentionr%   Ztranspose_qkvZ
matmul_qkvZv_nodesZconcat_vZtranspose_vZ	reshape_vZsplit_vr"   r!   r@   Zqk_nodesZ
softmax_qkr=   r>   r?   Zattention_maskZq_nodesZdiv_qZtranspose_qZ	reshape_qZsplit_qZk_nodesZdiv_krA   Zconcat_kZtranspose_kZ	reshape_kZsplit_kr'   r   r   Zhidden_size_per_headr   r	   r	   r
   fuse   sN   

	
	

	

 


 



















zFusionGptAttentionMegatron.fuse)__name__
__module____qualname____doc__r   intr   r)   rB   rK   __classcell__r	   r	   r   r
   r      s
   /Zr   )loggingr   numpyrG   Zfusion_gpt_attentionr   Zonnxr   Z
onnx_modelr   rL   r5   r   r   r	   r	   r	   r
   <module>   s   