U
    T?hÊU  ã                   @   sp   d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ eeƒZG dd„ deƒZG d	d
„ d
eƒZdS )é    )Ú	getLoggerN)ÚFusion)ÚFusionUtils)Úhelper)Ú	OnnxModelc                       sH   e Zd ZdZeedœ‡ fdd„Zdd„ Zdd„ Zd	d
„ Z	dd„ Z
‡  ZS )ÚFusionGptAttentionPastBasez3Base class for GPT Attention Fusion with past state©ÚmodelÚ	num_headsc                    s6   t ƒ  |dddgd¡ || _t|ƒ| _i | _d | _d S )NÚ	AttentionÚLayerNormalizationÚSkipLayerNormalizationz	with past)ÚsuperÚ__init__r
   r   ÚutilsÚcasted_attention_maskÚmask_filter_value©Úselfr	   r
   ©Ú	__class__© ú_/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/fusion_gpt_attention.pyr      s
    
z#FusionGptAttentionPastBase.__init__c           
      C   sø   | j  |d|¡}|d ks"|jdkr0t d¡ d S | j  |d¡dkrPt d¡ d S |jd }| j  |d|¡}|r~|jdkr~|}n6| j  |ddgddg¡}|d kr¬t d¡ d S |d }| j  |d¡dkrÔt d	¡ d S |jd }	||	krôt d
¡ d S |S )Nr   ZGatherz,match_past_pattern_1: expect Gather for pasté   z9match_past_pattern_1: expect indices=1 for Gather of pastÚ	Transposez7match_past_pattern_1: failed match Transpose and Gatheréÿÿÿÿz;match_past_pattern_1: expect indices=0 for Gather k of pastz,match_past_pattern_1: expect past to be same)r	   Ú
get_parentÚop_typeÚloggerÚdebugZfind_constant_inputÚinputÚmatch_parent_path)
r   Úconcat_kÚconcat_vÚoutput_name_to_nodeZgatherÚpastÚparentZgather_past_kÚpast_k_nodesÚpast_kr   r   r   Úmatch_past_pattern_1   s0    






z/FusionGptAttentionPastBase.match_past_pattern_1c           
      C   sz  | j  |d|¡}|d ks"|jdkr0t d¡ d S | j  |d|¡}|d ksR|jdkr`t d¡ d S | j  ¡ }|dk r²t |ddg¡st d¡ d S t |d	d
d
g¡sôt d¡ d S nB| j 	|d
dg¡sÒt d¡ d S | j 	|d
d
d
g¡sôt d¡ d S tj|ddddst d¡ d S |j
d }| j  |ddgddg¡}|d krPt d¡ d S |d j
d }	||	krvt d¡ d S |S )Nr   ÚSqueezez:match_past_pattern_2: expect Squeeze as parent of concat_vÚSplitz0match_past_pattern_2: expect Split for past pathé   Zaxesz:match_past_pattern_2: axes != [0] for Squeeze in past pathÚsplitr   z<match_past_pattern_2: split != [1, 1] for Split in past pathZaxis)Údefault_valuezKmatch_past_pattern_2: attribute axis of Split are not expected in past pathz7match_past_pattern_2: failed to match past_k_nodes pathr   z,match_past_pattern_2: expect past to be same)r	   r   r   r   r   Zget_opset_versionr   Zcheck_node_attributer   Zcheck_node_input_valuer    r!   Úinfo)
r   r"   r#   r$   Zsqueezer-   Zopset_versionr%   r'   r(   r   r   r   Úmatch_past_pattern_2K   sF    












z/FusionGptAttentionPastBase.match_past_pattern_2c                 C   sZ   | j j|d|dd}|s&t d¡ d S | j j|d|dd}|sLt d¡ d S |jd }|S )NÚ	UnsqueezeF)Ú	recursivezexpect unsqueeze for presentÚConcatzexpect concat for presentr   )r	   Zfind_first_child_by_typer   r/   Úoutput)r   r#   Úinput_name_to_nodesZunsqueeze_present_vZconcat_presentÚpresentr   r   r   Úmatch_presentŽ   s(       ÿ
   ÿ

z(FusionGptAttentionPastBase.match_presentc                 C   s\   || j kr| j | }nB| j |¡r>| j |¡\}}|| j |< n| j |¡\}}|| j |< |S ©N)r   r	   Úfind_graph_inputr   Zcast_graph_input_to_int32Zcast_input_to_int32)r   Ú
input_nameÚattention_mask_input_nameZcastedZ	cast_noder   r   r   Úcast_attention_maskŸ   s    

z.FusionGptAttentionPastBase.cast_attention_mask)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   Úintr   r)   r0   r7   r<   Ú__classcell__r   r   r   r   r      s   1Cr   c                       s8   e Zd ZdZeedœ‡ fdd„Zdd„ Zdd„ Z‡  Z	S )	ÚFusionGptAttentionzP
    Fuse GPT-2 Attention with past state subgraph into one Attention node.
    r   c                    s   t ƒ  ||¡ d S r8   )r   r   r   r   r   r   r   °   s    zFusionGptAttention.__init__c
                 C   s  | j  d¡}
tjd|||||g|
d |g|
d}d|_|j t d| j¡t d|	rVdnd	¡g¡ | j	d k	r†|j t d
t
| j	ƒ¡g¡ tjd|
d |jd g|
d g|
d d}tjd|
d |jd g|g|
d d}| j |||g¡ | j| j|j< | j| j|j< | j| j|j< d S )NZGptAttentionr   Ú_output)ÚinputsÚoutputsÚnamezcom.microsoftr
   Zunidirectionalr   r   r   ÚMatMulZ_matmul_outputZ_matmulÚAddé   Ú_add)r	   Zcreate_node_namer   Z	make_nodeÚdomainÚ	attributeÚextendZmake_attributer
   r   Úfloatr    Znodes_to_addZthis_graph_nameZnode_name_to_graph_namerG   )r   Ú	fc_weightÚfc_biasÚgemm_qkvr%   r6   r    r4   ÚmaskÚis_unidirectionalZattention_node_nameZattention_nodeZmatmul_nodeÚadd_noder   r   r   Úcreate_attention_node³   s>    
üþÿ
üüz(FusionGptAttention.create_attention_nodec           8      C   s  d }d }g }|j dk}d }|sR| jj|dddddddgdd dddddg||d}n.| jj|ddddddgd dddddg||d}|d krŒd S d }	|sº|\}
}}}}}}|
jd	|d   }	n|\}}}}}}| j |d
dddgd	d	ddg¡}|d krt d¡ d S |\}}}}| j |ddddgddddg|¡}|d kr\| j |ddddgddddg|¡}|d krö| j |dddgdd dg|¡}|d kr¬| j |dddgdd dg|¡}|d krÄt d¡ d S |d	 jd	 }| j |d ¡\}}|d j| }n|d	 jd	 }|d	 jd }|d }|	d k	r>|	|jkr>t d¡ d S d}d }d }d }| j |dddddgdddddg¡} | d k	rJ| \}!}"}#}$}%| j |"ddddddddddg
d	dd	dd	dddddg
¡}&|&d krÚt d¡ d S |&d }'|&d }|$|'krt d¡ d S t|&ƒd	krv|&d j dkrv| j |&d ¡\}}(|(dkrv|( | _n,| j 	|ddddgddd	dgfdddddgddd d	dgfg|¡\}} }| d kr¨t d ¡ d S | d! })| d" }$| d }%|d	kr| d	 }*| j 	|*ddd#dddgd dd	dddgfdddddgd dd	ddgfddddgd dd	dgfg|¡\}}}|d krNt d$¡ d S t|ƒd	kr|d j dkr| j |d ¡\}}(|(dkr|(| _| j 	|)d#dddddddgdddd	ddddgfdddddddgddd	ddddgfg|¡\}}&}|&d krt d%¡ d S |&|dkrdnd	 }| j 
|&d d|¡}+|+j dkrZ|+}'|$|'krvt d¡ d S n|+j d
krl|+}n
t d%¡ | j |jd ¡},t|,tjƒrÐt|,jƒd&krÐ|,jd d… d'krÐ|,jd |,jd ksÞt d(¡ d S t |,t |,¡¡rød)}n(t |,t t |,¡¡¡s t d*¡ d S | j |%dddgdddg¡}-|-d krTt d+¡ d S |-\}.}/}0||0krvt d,¡ d S | j |%d
dddgd	d	ddg¡}1|1d krì| j |%dd
dddgd	dd	ddg¡}1|1d krÜt d-¡ d S |1\}}2}3}4}5n|1\}2}3}4}5||5krt d.¡ d S |r.|2|kr.t d/¡ d S d0}6|d k	rT|d jd }7|  |7¡}6|  |2||¡pp|  |2||¡}|d krŠt d1¡ d S | j |¡s¢t d2¡ |  ||¡}|d krÆt d3¡ d S | j |¡sât d4¡ d S |  ||||||jd |jd |6|¡	 d| _d S )5Nr   rI   ZReshapeZGemmr   rH   r   )r$   Úreturn_indicer   r3   r+   z&fuse_attention: failed to match v pathr   z'fuse_attention: failed to match fc pathrJ   r   zCUpstream Add and (Skip)LayerNormalization shall have one same inputTZSoftmaxÚSubZMulÚDivÚSlicer1   r*   ZShapez8fuse_attention: failed to match unidirectional mask pathé   z-fuse_attention: skip since div_qk != div_maskiðØÿÿZWherez(fuse_attention: failed to match qk nodeséýÿÿÿéþÿÿÿZCastz9fuse_attention: failed to match input attention mask pathz)fuse_attention: failed to match mask pathé   )r   r   z4fuse_attention: skip since mask shape is not 1x1xWxWFzDfuse_attention: skip since mask is neither lower triangular nor onesz&fuse_attention: failed to match q pathz.fuse_attention: skip since split_fc != split_qz&fuse_attention: failed to match k pathz.fuse_attention: skip since split_fc != split_kz8fuse_attention: skip since concat_k != concat_k_to_matchÚ z)fuse_attention: failed to match past pathzpast is not graph input.z,fuse_attention: failed to match present pathz!expect present to be graph output)r   r	   r!   r    r   r   Zget_constant_inputÚlenr   Zmatch_parent_pathsr   Zget_constant_valueÚ
isinstanceÚnpZndarrayÚshapeZallcloseZ	ones_likeZtrilr<   r)   r0   r/   r9   r7   Zfind_graph_outputrV   r4   Zprune_graph)8r   Znormalize_noder5   r$   r%   r6   rW   Zis_normalize_node_skiplayernormZ	qkv_nodesZanother_inputZadd_qkvZreshape_qkvrR   Z	reshape_1Z	reshape_2Ztranspose_qkvZ
matmul_qkvZv_nodesr#   Ztranspose_vZ	reshape_vZsplit_fcZfc_nodesrP   ÚiÚ_rQ   Zlayernorm_before_attentionrT   Z
slice_maskZinput_mask_nodesZconcat_k_to_matchZqk_nodesZ
softmax_qkZsub_qkZmul_qkZdiv_qkZ	matmul_qkZ
mask_nodesZdiv_maskZmul_valZwhere_qkZadd_qkZdiv_or_concatZ	mask_dataZq_nodesZtranspose_qZ	reshape_qZsplit_qZk_nodesr"   Ztranspose_kZ	reshape_kZsplit_kr;   r:   r   r   r   Úfuseã   sþ   
ûû	ø	ù 



ü


ü
ü
ü


$
öò




þú



þþ

þ÷ð



þþúó






ÿþýü






 
ý






  ÿ





÷zFusionGptAttention.fuse)
r=   r>   r?   r@   r   rA   r   rV   rf   rB   r   r   r   r   rC   «   s   0rC   )Úloggingr   Únumpyrb   Zfusion_baser   Zfusion_utilsr   Zonnxr   Z
onnx_modelr   r=   r   r   rC   r   r   r   r   Ú<module>   s    