U
    T?h}p                     @   sZ   d dl Z d dlZd dlmZmZ d dlmZmZ d dl	m
Z
 e eZG dd deZdS )    N)AttentionMaskFusionAttention)TensorProtohelper)	OnnxModelc                       sD   e Zd ZdZeeeed fddZdd Zdd Z	d	d
 Z
  ZS )FusionBartAttentionz?
    Fuse Bart Attention subgraph into one Attention node.
    )modelhidden_size	num_headsattention_maskc                    s   t  |||| d S )N)super__init__)selfr   r	   r
   r   	__class__ `/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/fusion_bart_attention.pyr      s    zFusionBartAttention.__init__c                 C   s  | j |dgdg}|d kr dS |d }| j |dddgdddg}	| j |dddgdddg}
|	d ksp|
d krtdS |	\}}}|
\}}}|jd |ks|jd |krdS | j |dddgdddg}| j |dddgdddg}|d ks|d krdS |d	 j|jks|d	 j|jkrdS | j |ddd
gdddg}| j |ddd
gdddg}| j |ddd
gdddg}|d ks|d ks|d krdS |d	 }|d	 }|d	 }|jd }|jd |ks|jd |ks|jd |krdS dS )NConcat   Fr   	UnsqueezeGatherShape   MulT)r   match_parent_pathinputnameoutput)r   reshape_qkv_2reshape_qkv_1reshape_q_2reshape_k_2reshape_v_2
root_inputZconcat_qkv_2_pathZconcat_qkv_2Zreshape_qkv_2_path_1Zreshape_qkv_2_path_2_Zgather_1Zshape_1Zgather_2Zshape_2Zreshape_qkv_1_path_1Zreshape_qkv_1_path_2Zreshape_q_2_pathZreshape_k_2_pathZreshape_v_2_pathmul_qmul_kZmul_vZgather_1_outr   r   r   check_runtime_shape_path   s<    	

$
0z,FusionBartAttention.check_runtime_shape_pathc              
   C   s   | j |ddddgddddg}|d kr,dS |d jd |jd krHdS | j |d	d
ddddgddddddg}| j |d	d
ddddgddddddg}|d ks|d krdS |d }	|d }
|	jd |
jd krdS |d jd |jd kr|d jd |jd krdS |d k	r| j |dgdg}|d kr8dS | j |d ddddgddddg}| j |d dddgdddg}|d kr|d krdS |\}}}}|\}}}|jd |jd krdS |d jd |jd kr|d jd |jd krdS dS )Nr   Slicer   r   r   r   Fr   r   PowZCastDivr   r   T)r   r   r   r   )r   r   
matmul_qkvadd_qk	matmul_qkadd_qZreshape_qkv_2_pathZmatmul_qk_path_1Zmatmul_qk_path_2Zmul_1Zmul_2Zadd_qk_pathZslice_q_path_1Zslice_q_path_2r%   Zunsqueeze_1Zunsqueeze_2r   r   r   check_runtime_shape_path_openaiM   s\     
 
    4

 
 
 
4z3FusionBartAttention.check_runtime_shape_path_openaic           a      C   s  d}| j |ddddddgddddddg}| j |dddddgdddddg}|d k	rj|\}}}	}
}}n$|d k	r|}|\}}}	}
}d}nd S g }|jD ],}||krq||d jd krq|| qt|dkrd S |d }|| }|jdkr| j |d }|jD ]@}|sq|| }d	d
 |D }|ddkr|} qJqt	dd
 | j 
 jD }t	dd
 | j 
 jD }| j |dddddgddddd g}| j |ddddgdddd g}| j |ddddddgdddddd g}| j |dgdg}| j |ddddgddddg}d\}}d\}}|d k	rT|\}}} }}!|jd }n|d k	r"|}|\}} }}!| j j|ddg| gd}"| j |dddg}#|"d k	r|"\}$}%|%jd |kr|%jd }|#d k	rr|#\}&}$}'|'jd |kr|'jd }| j |&ddgddg}(|(\}$})|)jd }nP|d k	rX|\}}&}} }}!|}|&jd }|&jd }n|d k	r|d jd |kr|}|d jd }|d jd }||krrttdd | j  | }*t|*dkr|*d jd nd}n|d k	rd|d jd |krd|}|d jd }|d jd }||krrttdd | j  | }*t|*dkr^|*d jd nd}ntd d S ||kr|nd}||kr|nd}| j |ddgddg}+| j |dddddgdddddg},| j |dddgdddg}-d }.|+d k	r
|+\}$}/|+}0n<|,d k	r(|,\}$}$}.}$}/|,}0n|-d k	rB|-\}$}.}/|-}0nd S | j |/ddddddgddddddg}1| j |/dddddgdddddg}2d }3|1d k	r|1\}3}4}5}6}7}8n"|2d k	r|2}1|1\}6}4}5}7}8nd S | j |/ddddddgddddddg}9| j |/ddddgddddg}:| j |/dddddgdddddg};| j |/ddddddgddddddg}<| j |/ddgddg}=| j |/dddddgdddddg}>d\}?}@d\}A}B}C|9d k	r|9\}$}A}D}B}E}C|9}FnH|:d k	r|:\}G}D}B}C|:}F|Cjd }@| j j|Cddg|Bgd}"| j |Cdddg}#|"d k	rT|"\}$}H|Hjd |krT|Hjd }@|#d k		r |#\}I}$}J|Jjd |kr|Jjd }@| j |Iddgddg}(|(\}$}K|Kjd }?np|;d k	r|;\}$}A}D}B}C|;}F|Djd }@nF|<d k	r|<\}$}A}I}$}B}C|<}F|Ijd }?|Ijd }@n|=d k	r|=d jd |kr|=}F|Fd jd }?|Fd jd }@|@|k	r ttdd | j  |? }Lt|Ldkr|Ld jd nd}@n|>d k		r|>d jd |k	r|>}F|Fd jd }?|Fd jd }@|@|k	r ttdd | j  |? }Lt|Ldk	r|Ld jd nd}@nd S |?|k	r.|?nd}?|@|k	r@|@nd}@|F|:|;|<fk	r| j |jd jd }Md}N| j |N}O|Od k	r| j|Ntj|Mgtjdg|M tjdd | j d}Ptd|N|Cjd g|Bjg|P}E|	r|?	s| |	||.|/|7	sd S |
s"|?
s"| |	||3|A||
s"d S |?
o@|
o@|Cd k
o@d t k}Q|Q 
ov|Cjd |k
ov|8jd |k
ov|!jd |k}R|Q 
o|8jd |k
o|Cjd |!jd k
o|Cjd |8jd k}S|R
o|0|+k}T|R
o|0|,|-fk}U|
s|Tn|U
o|?
o|}V|So |0|+k}W|Qo|0|+k}Xd }Y|Ur| j |.d!gdg}Z| j |.d"d#d#d!gddddg}[|[d k	rh|[d jd }Yn|Zd k	r|Zd jd }Y|Ts|Us|Vs|Ws|Xr|	}\|  |5\}]}^|]dks|^dks|^|] dkrtd$ d S d }_|Vs|Ws|Xr| j!r~| j"|8|Ws|Vr|Cn|?|Ws"|Vr&|!n||7|Ws6|Vr:|End |WsH|VrL|nd |]|^|\jd |Vrd|?nd|Vrp|nd|@||Vd%nd }_nL| j!}`d| _!| j#d |8|C|!|7|E||]|^||\jd |Ur|Ynd |?||@|d&}_|`| _!|_d krd S | j$|_ | j%| j&|_j< | j'(|\|
|g | j'(|0 |Vs(|Ws(|Xr|1d jdkr@|1)  |Fd jdkrX|F)  |d jdkrp|)  | j*r|Ws|Xr|1d jdkr|1)  |Fd jdkr|F)  |d jdkr|)  | j'(|1 | j'(|F | j'(| d| _+d S )'NFAddZMatMulZReshapeZ	Transposer   r   Tc                 S   s   g | ]
}|j qS r   op_type).0childr   r   r   
<listcomp>   s     z,FusionBartAttention.fuse.<locals>.<listcomp>c                 S   s   g | ]
}|j qS r   r   r4   noder   r   r   r6      s     c                 S   s   g | ]
}|j qS r   r7   r8   r   r   r   r6      s     r   ) r:   )NN)excluder   c                 S   s
   | j dkS NZIdentityr2   r9   r   r   r   <lambda>       z*FusionBartAttention.fuse.<locals>.<lambda>r:   c                 S   s
   | j dkS r<   r2   r=   r   r   r   r>   ,  r?   z&fuse_attention: failed to match v pathZSoftmaxr   )NNNc                 S   s
   | j dkS r<   r2   r=   r   r   r   r>     r?   c                 S   s
   | j dkS r<   r2   r=   r   r   r   r>     r?   Z
empty_biasg        )Zdtype)dimsvalsmatmul_vZWhereZExpandr   z9fuse_attention: failed to detect num_heads or hidden_size)past_kpast_v	present_k	present_vZ
packed_qkv)Z
add_qk_strrC   rD   rE   rF   ),r   r   r   r   appendlenr3   Zget_childrencountsetgraphZmatch_child_pathlistfilterinput_name_to_nodesloggerdebugZget_initializerr@   Zadd_initializerr   FLOATnparrayZfloat32Zcreate_node_namer   Z	make_noder   r0   r(   localsZget_num_heads_and_hidden_sizeZuse_multi_head_attentionZcreate_multihead_attention_nodeZcreate_attention_nodeZnodes_to_addZthis_graph_nameZnode_name_to_graph_nameZnodes_to_removeextendpopZ!disable_multi_head_attention_biasZprune_graph)ar   Znormalize_noderN   Zoutput_name_to_nodeZmodel_impl_openaiZ	qkv_nodesZqkv_nodes_openaiZadd_outZ
matmul_outr   Ztranspose_qkvr    r,   Zother_inputsr   r$   Zskip_layernormr   childrenZchildren_typesZgraph_input_namesZgraph_output_namesZv_nodesZv_nodes_openaiZv_nodes_with_past_self_attnZv_nodes_with_past_cross_attnZ#v_nodes_with_past_cross_attn_openairD   rF   r#   Zadd_vZtranspose_vZreshape_v_1rB   Zreshape_pathZconcat_pathr%   Ztranspose_add_vZconcat_vZtranspose_concat_vZconcat_nodesZtranspose_concat_v_inZidentity_node_vZ
qk_nodes_1Z
qk_nodes_2Zqk_nodes_2_openair-   r.   Zqk_nodesZq_nodesZq_nodes_openair!   Ztranspose_qZreshape_q_1r&   r/   Zmatmul_qZk_nodes_with_biasZk_nodes_with_bias_openaiZk_nodes_no_biasZ#k_nodes_no_bias_with_past_self_attnZ$k_nodes_no_bias_with_past_cross_attnZ+k_nodes_no_bias_with_past_cross_attn_openairC   rE   r"   Zreshape_k_1Zmatmul_kZtranspose_k_1Zadd_kZk_nodesr'   Ztranspose_matmul_kZconcat_kZtranspose_concat_kZtranspose_concat_k_inZidentity_node_kZbias_dimZempty_bias_nameZempty_tensorZadd_nameZthree_root_inputsZone_root_inputZtwo_root_inputsZencoder_attentionZdecoder_attentionZdecoder_attention_with_pastZdecoder_cross_attentionZ!decoder_cross_attention_with_pastZ
mask_indexZmask_nodes_bartZmask_nodes_whisperZattention_last_noder
   r	   new_nodeZ%use_multi_head_attention_ground_truthr   r   r   fuse   sV   








	







"
"
  




















"
"
 



"

zFusionBartAttention.fuse)__name__
__module____qualname____doc__r   intr   r   r(   r0   rY   __classcell__r   r   r   r   r      s   	05r   )loggingnumpyrR   Zfusion_attentionr   r   Zonnxr   r   Z
onnx_modelr   	getLoggerrZ   rO   r   r   r   r   r   <module>   s   
