U
    T?h                      @   s   d dl Z d dlmZ d dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZ e eZG dd	 d	eZG d
d deZdS )    N)Union)AttentionMaskFusionAttention)NumpyHelper)	NodeProtohelper)	OnnxModel)BertOnnxModelc                       sZ   e Zd ZdZeeeed fddZee	e	eeeeee
e	df d	ddZd	d
 Z  ZS )FusionTnlrAttentionz
    Fuse TNLR Attention subgraph into one Attention node.
    TNLR Attention has extra addition after qk nodes and adopts [S, B, NH] as I/O shape.
    )modelhidden_size	num_headsattention_maskc                    s   t  |||| d S N)super__init__)selfr   r   r   r   	__class__ Z/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/onnx_model_tnlr.pyr      s    zFusionTnlrAttention.__init__N)	
mask_indexmatmuladdr   r   inputoutput
add_qk_strreturnc	                 C   s  |dkst |dkr:|| dkr:td| d|  d S | j|jd }	| j|jd pn| j|jd }
|	d ks|
d krd S t|	}t|
}| jd}|	j	}t
|}t
j|d ||d| g|| dd	}	| j|	| j t
j|d
 |d| g|| dd	}
| j|
| j ||d |d
 g}|d k	rN|| n
|d |d k	rv|d || t
jd||g|d}d|_|jt
d|g |S )Nr   zinput hidden size z# is not a multiple of num of heads    Z	AttentionZ_qkv_weight   T)name	data_typeZdimsvalsrawZ	_qkv_bias )inputsoutputsr    zcom.microsoftr   )AssertionErrorloggerdebugr   Zget_initializerr   r   Zto_arrayZcreate_node_namer!   r   Ztensor_dtype_to_np_dtypeZmake_tensorZastypetobytesZadd_initializerthis_graph_nameappend	make_nodedomain	attributeextendZmake_attribute)r   r   r   r   r   r   r   r   r   weightZbiasZ
qkv_weightZqkv_biasZattention_node_nameZtensor_dtypeZnp_typeZattention_inputsZattention_noder   r   r   create_attention_node    s`    $








z)FusionTnlrAttention.create_attention_nodec                 C   s  |}|j dkrd S | j|ddddddgddddddg}|d k	rT|\}}}}}	}
nd S g }t|jD ]0\}}||krxqf||d jd krqf|| qft|dkrd S |d }| j|
ddd	ddgdddddg}|d krd S |\}}}}}| j|dgdg}|d }| j|
d
ddgdddg}|d kr4d S |\}}}| j|dddd	ddgddddddg}|d krtd S |d }|d }| j|ddd	ddgdddddg}|d krd S |d }|d }| j|ddgddg}|d krd S |jd |kr d }|}| |||| j	| j
||jd |d jd }|d kr>d S | j| | j| j|j< tjdd|j g|jd gd|j dddgd}| j|| j |jd |jd< d|j |jd< | j||	|
g | j| | j| | j| | j| d| _d S )NZSkipLayerNormalizationZWhereAddZMatMulZReshapeZ	Transposer   r   SliceZSoftmaxZMulZback_transpose_in_Zback_transpose_   )permT)Zop_typer   Zmatch_parent_path	enumerater   r   r,   lenr2   r   r   Znodes_to_addr+   Znode_name_to_graph_namer    r   r-   add_nodeZnodes_to_remover0   Zprune_graph)r   Znormalize_nodeZinput_name_to_nodesZoutput_name_to_nodeZ
start_nodeZ	qkv_nodes_Zmatmul_belowZreshape_qkvZtranspose_qkvZ
matmul_qkvZother_inputsZ_ir   Z
root_inputZv_nodesr   r   Zupper_nodesZ	transposeZqk_nodesZadd_qkZ	matmul_qkZq_nodesZk_nodesZrelative_position_bias_nodesr   Zattention_last_nodenew_nodeZback_transposer   r   r   fuseh   s    









zFusionTnlrAttention.fuse)__name__
__module____qualname____doc__r   intr   r   strr   r   r2   r>   __classcell__r   r   r   r   r
      s"   
Hr
   c                       s$   e Zd Z fddZdd Z  ZS )TnlrOnnxModelc                    s4   t  ||| t| | _t| | j| j| j| _d S r   )r   r   r   r   r
   r   r   attention_fusion)r   r   r   r   r   r   r   r      s    
zTnlrOnnxModel.__init__c                 C   s   | j   d S r   )rG   apply)r   r   r   r   fuse_attention   s    zTnlrOnnxModel.fuse_attention)r?   r@   rA   r   rI   rE   r   r   r   r   rF      s   rF   )loggingtypingr   Zfusion_attentionr   r   Zfusion_utilsr   Zonnxr   r   Z
onnx_modelr   Zonnx_model_bertr	   	getLoggerr?   r(   r
   rF   r   r   r   r   <module>   s   
 L