U
    T?hm                     @   s   d dl Z d dlmZmZ d dlZd dlmZmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZ e eZG d
d deZG dd de
ZG dd deZdS )    N)OptionalUnion)AttentionMaskFusionAttention)Fusion)"FusionSimplifiedLayerNormalization&FusionSkipSimplifiedLayerNormalization)NumpyHelper)	NodeProtoTensorProtohelper)	OnnxModel)BertOnnxModelc                       s   e Zd ZdZeeeed fddZdee	e	e	eeeeee
e ee	df dddZeeeeeeeeeeeeee	df d	d
dZdd Zdd Zdd Z  ZS )FusionT5Attentionz=
    Fuse T5 Attention subgraph into one Attention node.
    )modelhidden_size	num_headsattention_maskc                    s&   t  j||||dddgd d| _d S )NF SkipSimplifiedLayerNormalizationAdd)Zuse_multi_head_attentionZsearch_op_types   )super__init__	static_kv)selfr   r   r   r   	__class__ X/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/onnx_model_t5.pyr      s    zFusionT5Attention.__init__N)
mask_indexq_matmulk_matmulv_matmulr   r   inputoutput
add_qk_strscalereturnc                 C   sX  |dkst |dkr:|| dkr:td| d|  dS | j|jd }| j|jd }| j|jd }|dkrt|jd  d dS t|}t|}t|}|j	|j	kst |j	d }|j	d }|j	d }||  kr|ksn t |dkr"||kr"t
d| d| d	 t|j	dd }tj|||fdd
}d| }| jd}tj|d tj||g| dd}| j|| j ||d dg}|dk	r|| n
|d |	dk	r|d ||	 tjd||g|d}d|_|jtd|g |
dk	r,|jtd|
g | jdk	rT|jtdt| jg |S )a  Create an Attention node.
        Args:
            mask_index (str): mask input
            q_matmul (NodeProto): MatMul node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            input (str): input name
            output (str): output name
        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   input hidden size # is not a multiple of num of heads Nr   zl is not an initializer. Please set do_constant_folding=True in torch.onnx.export to unblock attention fusionzInput hidden size (z3) is not same as weight matrix dimension of q,k,v (z:). Please provide a correct input hidden size or pass in 0)Zaxis   Z	AttentionZ_qkv_weightTnameZ	data_typeZdimsvalsraw inputsoutputsr,   com.microsoftr   r&   mask_filter_value)AssertionErrorloggerdebugr   get_initializerr#   printr	   to_arrayshapewarningnpprodstackcreate_node_namer   make_tensorr   FLOATtobytesadd_initializerthis_graph_nameappend	make_nodedomain	attributeextendmake_attributer4   float)r   r   r    r!   r"   r   r   r#   r$   r%   r&   Zq_weightZk_weightZv_weightZqwkwZvwZ
qw_in_sizeZ
kw_in_sizeZ
vw_in_sizeZqw_out_sizeZ
qkv_weightZqkv_weight_dimattention_node_nameweightattention_inputsattention_noder   r   r   create_attention_node*   sv    











z'FusionT5Attention.create_attention_node)querykeyvaluer   res_pos_biaspast_key
past_valuer$   present_keypresent_valuer   r   r'   c                 C   sv  |dkst |dkr:|| dkr:td| d|  d S | jd}||d krTdn||d krbdn|dg}|d k	r~|| n
|d |d k	r|| n
|d |d k	r|d k	st || || |g}|	d k	r|
d k	st ||	 ||
 tjd|||d}d|_|j	
td|g |j	
td	d
g | jd k	rh|j	
tdt| jg | d |S )Nr   r(   r)   ZMultiHeadAttentionr/   r0   r3   r   r&         ?r4   )r5   r6   r7   r   r@   rF   r   rG   rH   rI   rJ   rK   r4   rL   Zincrease_counter)r   rS   rT   rU   r   rV   rW   rX   r$   rY   rZ   r   r   rN   rP   Zattention_outputsrQ   r   r   r   create_mha_node   sL    






z!FusionT5Attention.create_mha_nodec                 C   s    |  ||| | ||| d S N)fuse_t5_encoderfuse_t5_decoder)r   normalize_nodeinput_name_to_nodesoutput_name_to_noder   r   r   fuse   s    zFusionT5Attention.fusec           "      C   s   |j dkr|j dkrd S | j|ddddgddddg}|d krDd S |\}}}}| j|dd	d
dgddddg}	|	d kr|d S |	d }
| j|dddgdddg}|d krd S |\}}}| j|dddgdddg}|d krd S |\}}}d }| j|ddddd	d	gddddddg}|d kr"d S |d }|d j dkr>d S | j|\}}|dkr^|| _| j|d jd }d }| j|ddgddg}|d krd S |d }|jd }| j|dddgdddg}|d krd S |\}}}| j|dddgdddg}|d krd S |\}}}|jd |
jd kr2d S | |\}} | 	|||||| |
jd |j
d |d
}!|!d krvd S | j|! | j| j|!j< | j|dd   | j| | j|d d  |d k	r| j|d d  | j|d d  d| _d S )Nr   r   MatMulReshape	Transposer   r   Concat	UnsqueezeGatherShapeSoftmaxMulSubCastRelativePositionBiasr[   T)op_typer   match_parent_pathget_constant_inputr4   r   process_maskr#   get_num_heads_and_hidden_sizerR   r$   nodes_to_addrF   rE   node_name_to_graph_namer,   nodes_to_removerJ   prune_graph)"r   r`   ra   rb   	qkv_nodes_reshape_qkvtranspose_qkv
matmul_qkvqkv_shape_nodesinput_shape_nodev_nodes	reshape_vmatmul_vqk_nodesadd_qk	matmul_qkr   
mask_nodesmul_nodemul_valrV   	rpb_nodesZrpb_add_nodek_nodes	reshape_kZmatmul_kq_nodestranspose_q	reshape_qmatmul_qq_num_headsq_hidden_sizenew_noder   r   r   r^      s    















z!FusionT5Attention.fuse_t5_encoderc           /      C   s  |j dkr|j dkrd S | j|ddddgddddg}|d krDd S |\}}}}| j|dd	d
dgddddg}	|	d kr|d S |	d }
d }d }d }| j|ddddgddddg}|d kr`| j|dddgdddg}|d k	r2|\}}}|jd }|jd }d|krd S |jd |
jd kr*d| _nd| _n,|jd }||krJd S d|krXd S d| _nZ|\}}}}|jd }||krd S d|krd S |jd }d|krd S |jd }d| _| j|dddgdddg}|d krd S |\}}}d }d }| jdkr| j|ddddd	d	gddddddg}|d kr8d S |d }|d j dkrTd S | j|\}}|dkrt|| _| j	|d jd }n^| j|ddgddg}|d k	r|jd }n0| j|ddgddg}|d krd S |jd }d }d }d }| jdkr| j|dddgdddg}|d k	r|\} }!}|!jd }||!jd  }"|"D ].}#| j
|#jd }$|$d k	rN|$j} q~qN|d krd S d|krd S nP| j|dgdg}|d krd S |d } | jd }||krd S d|krd S n| j|ddddgddddgfdddddgdddddgfg|\}%}}d }&d }"|d k	r$|d |d  }'}!|!jd }|%dkr||'jd  }&|&jd }n
|'jd }||krd S d|krd S |%dkr||'jd  }"|"D ].}#| j
|#jd }$|$d k	r|$j} qqn
|'jd }|d krd S d|krd S n| j|dddgdddg}|d krNd S |\}}!}|!jd }||!jd  }"|"D ].}#| j
|#jd }$|$d k	rt|$j} qqt|d krd S d|krd S | j|dddgdddg}(|(d krd S |(\})}*}+|+jd |
jd krd S | |*\},}-| jdkrB|d k	rB|}|}d }d }| |+jd |||||||jd |||,|-}.|.d kr|d S | j|. | j| j|.j< | j|dd   | j| | j|d d  |d k	r| j|d d  | j|(d d  d| _d S )Nr   r   rd   re   rf   r   r   rg   rh   ri   rj   rk   rZ   Zpast_value_crossZpast_value_selfZpresent_value_selfrl   rm   rn   ro   rp   Slicerq   Zpresent_key_crossZpast_key_crossZpast_key_selfZpresent_key_selfT)rr   r   rs   r#   r$   r   rt   r4   r   ru   Zfind_graph_outputr,   Zmatch_parent_pathsrv   r\   rw   rF   rE   rx   ry   rJ   rz   )/r   r`   ra   rb   r{   r|   r}   r~   r   r   r   rU   rX   rZ   r   Ztranspose_vr   r   Zconcat_vr   r   r   r   rV   r   r   r   r   rT   rW   rY   r   Ztranspose_kr   Zpresent_key_transpose_nodesZpresent_key_transpose_nodeZpresent_key_candidateidxZpast_key_transpose_nodeZconcat_kr   r   r   r   r   r   r   r   r   r   r_   X  s   





























































z!FusionT5Attention.fuse_t5_decoder)N)__name__
__module____qualname____doc__r   intr   r   strr
   r   rL   r   rR   r\   rc   r^   r_   __classcell__r   r   r   r   r      sJ    
o
A|r   c                       s,   e Zd Zeed fddZdd Z  ZS )FusionRelativePositionBiasBlock)r   max_distancec                    s$   t  |dddg || _d| _d S )Nrq   r   r   F)r   r   r   is_bidirectional)r   r   r   r   r   r   r   ]  s    z(FusionRelativePositionBiasBlock.__init__c                 C   s  |j dkr|j dkrd S | j|ddddgddddg}|d krp| j|dddddgdddddg}|d krpd S |d	 }|d
 }|d }| j|dddddddddg	d	ddddddddg	}|d krd S |d
 }	| j|	ddddddddgddddddddg}
|
d krB| j|	dddddgdddddg}
d| _|
d krBd S |
d
 }| j| | j| | j|
 | jrzdnd}| j|jd }t	|}t
|}tj| jjd|dtjt
|d t
|d g| dd}| j|| j |j|jd |jd g}|jd g}tjd||| jjd|dd}d|_|jtd| jg |jtd | jg | j| | j| j|j< d S )!Nr   r   rh   rf   ri   ZWherer   r      rk   ZMinZConstantOfShaperj   ro   rm   DivLogZNegrn   RangeZAbsTencoderdecoderZbias_table_weight)Zname_prefixr+   rq   r0   r3   r   r   )rr   r   rs   r   ry   rJ   r8   r#   r	   r:   r=   Z	transposer   rA   r@   r   rB   r;   rC   rD   rE   r,   r$   rG   rH   rI   rK   r   rw   rF   rx   )r   nodera   rb   Zcompute_bias_nodesZgatherwhereZ	unsqueezeZcompute_buckets_nodesdivZrange_nodesZ
range_nodeZnode_name_prefixZtable_weight_iZtable_weightZtable_weight_tZ
bias_tabler1   r2   rpb_noder   r   r   rc   c  s     
 
  
  


z$FusionRelativePositionBiasBlock.fuse)r   r   r   r   r   r   rc   r   r   r   r   r   r   \  s   r   c                       sT   e Zd Z fddZdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
  ZS )T5OnnxModelc                    sT   t  ||| t| | _t| | j| j| j| _t| | _	t
| | _t| d| _d S )N   )r   r   r   r   r   r   r   attention_fusionr   layer_norm_fusionr   skip_layer_norm_fusionr   
rpb_fusion)r   r   r   r   r   r   r   r     s    


zT5OnnxModel.__init__c                 C   s   | j   d S r]   )r   applyr   r   r   r   fuse_attention  s    zT5OnnxModel.fuse_attentionc                 C   s   | j   d S r]   )r   r   r   r   r   r   fuse_layer_norm  s    zT5OnnxModel.fuse_layer_normc                 C   s   | j   d S r]   )r   r   r   r   r   r   fuse_skip_layer_norm  s    z T5OnnxModel.fuse_skip_layer_normc                 C   s   g }|   D ]}|jdkr| |dddddddddd	d
gdddddddddddg}|d kr^q| |dgdg}|d krzq|d }|jd |jd< || || | | qd S )Nr   rm   rn   rh   ro   LessOrEqualTilerg   ri   rj   r   r   rq   Znodesrr   rs   r$   rJ   rF   Zremove_nodesr   ry   r   Zextended_mask_nodesr   r   r   r   r   !remove_extended_mask_decoder_init  s:    


z-T5OnnxModel.remove_extended_mask_decoder_initc                 C   s   g }|   D ]}|jdkr| |ddddddddddd	d
gddddddddddddg}|d krbq| |ddgddg}|d krq|d }|jd |jd< || || | | qd S )Nr   rm   rn   rh   rg   ro   r   r   ri   rj   r   r   r   rq   r   r   r   r   r   remove_extended_mask_decoder  s<    


z(T5OnnxModel.remove_extended_mask_decoderc                 C   s   |    | j  d S r]   )Zadjust_reshape_and_expandr   r   r   r   r   r   
preprocess  s    zT5OnnxModel.preprocessc                 C   s   |    |   |   d S r]   )r   r   rz   r   r   r   r   postprocess  s    zT5OnnxModel.postprocess)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r     s   
#$r   )loggingtypingr   r   numpyr=   Zfusion_attentionr   r   Zfusion_baser   Zfusion_simplified_layernormr   r   Zfusion_utilsr	   Zonnxr
   r   r   Z
onnx_modelr   Zonnx_model_bertr   	getLoggerr   r6   r   r   r   r   r   r   r   <module>   s"   
    LV