U
    T?hU/  ã                   @   sp   d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
mZmZmZ d dlmZ eeƒZG dd„ deƒZdS )	é    )Ú	getLogger)ÚTupleÚUnionN)ÚFusion)Ú	NodeProtoÚTensorProtoÚhelperÚnumpy_helper)Ú	OnnxModelc                       sv   e Zd ZdZeeedœ‡ fdd„Zeeeeef dœdd„Z	eeeeeeeee
e
eedf d	œd
d„Zdd„ Z‡  ZS )ÚFusionAttentionVaezI
    Fuse Attention subgraph of Vae Decoder into one Attention node.
    )ÚmodelÚhidden_sizeÚ	num_headsc                    s.   t ƒ  |ddg¡ || _|| _d| _d| _d S )NÚ	AttentionÚSoftmaxT)ÚsuperÚ__init__r   r   Únum_heads_warningÚhidden_size_warning)Úselfr   r   r   ©Ú	__class__© ú_/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/fusion_attention_vae.pyr      s
    zFusionAttentionVae.__init__)Ú	reshape_qÚadd_qÚreturnc           	      C   s:  | j  |d¡}|dks$t|jƒdkr0| j| jfS | j  |jd ¡}|dk	r`t|tj	ƒr`|j
dksl| j| jfS t|ƒ}|dkrˆ| j| jfS | j  |¡\}}|dks¶t|tj	ƒr¶|jdkrÂ| j| jfS |jd }| jdkrü|| jkrü| jrüt d|| j¡ d| _| jdkr2|| jkr2| jr2t d|| j¡ d| _||fS )	zúDetect num_heads and hidden_size from a reshape node.

        Args:
            reshape_q (NodeProto): reshape node for Q
            add_q (NodeProto): add node for Q

        Returns:
            Tuple[int, int]: num_heads and hidden_size
        é   Né   é   r   z?Detected number of attention heads is %d. Ignore --num_heads %dFz3Detected hidden size is %d. Ignore --hidden_size %d)r   Z
get_parentÚlenÚinputr   r   Zget_constant_valueÚ
isinstanceÚnpZndarrayÚsizeÚintZget_constant_inputÚndimÚshaper   ÚloggerÚwarningr   )	r   r   r   ÚconcatÚvaluer   Ú_Zbiasr   r   r   r   Úget_num_heads_and_hidden_size   s4    

  ÿz0FusionAttentionVae.get_num_heads_and_hidden_sizeN)Úq_matmulÚq_addÚk_matmulÚk_addÚv_matmulÚv_addr   r   Ú
input_nameÚoutput_namer   c           %      C   sF  |j d |	ks*|j d |	ks*|j d |	krPt d|j d |j d |j d ¡ dS |dkrv|| dkrvt d||¡ dS | j |j d ¡}| j |j d ¡}| j |j d ¡}|r¸|r¸|s¼dS | j |j d ¡pÞ| j |j d ¡}| j |j d ¡p| j |j d ¡}| j |j d ¡p*| j |j d ¡}t |¡}t |¡}t |¡}t |j	¡}t |j	¡}t |j	¡}|j
dkrˆt d¡ dS t |¡}t |¡}t |¡}|j	|j	ksÂ|j	|j	krÆdS |j	d }|j	d }|j	d }||krø||ksüt‚|dkr&||kr&td|› d	|› d
ƒ‚t |j	dd… ¡}tj|||fdd}dt|ƒ }| j d¡} ||  kr~|ks„n t‚d}!tj|||fdd}"d| }!| j| d tj||g|d tjd|gtjd}"d| }!| j| d tj|!g|"d |	| d | d g}#tjd|#|
g| d}$d|$_|$j t d|¡g¡ |  d¡ |$S )at  Create an Attention node.

        Args:
            q_matmul (NodeProto): MatMul node in fully connection for Q
            q_add (NodeProto): Add bias node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            k_add (NodeProto): Add bias node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            v_add (NodeProto): Add bias node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            input_name (str): input name
            output_name (str): output name

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   zRFor self attention, input hidden state for q and k/v shall be same. Got %s, %s, %sNz9input hidden size %d is not a multiple of num of heads %dr   é
   zBweights are in fp16. Please run fp16 conversion after optimizationzInput hidden size (z,) is not same as weight dimension of q,k,v (z:). Please provide a correct input hidden size or pass in 0)Zaxisé   r   Z_qkv_weight)ÚnameÚ	data_typeZdimsÚvals)ZdtypeZ	_qkv_bias)ÚinputsÚoutputsr8   zcom.microsoftr   zAttention (self attention))r!   r(   Údebugr   Zget_initializerr	   Zto_arrayr#   Úprodr'   r9   ÚAssertionErrorÚ
ValueErrorÚstackr%   Zcreate_node_nameZadd_initializerr   ÚFLOATZzerosZfloat32r   Z	make_nodeÚdomainÚ	attributeÚextendZmake_attributeZincrease_counter)%r   r.   r/   r0   r1   r2   r3   r   r   r4   r5   Zq_weight_tensorZk_weight_tensorZv_weight_tensorZq_bias_tensorZk_bias_tensorZv_bias_tensorZq_biasZk_biasZv_biasZq_bias_shapeZk_bias_shapeZv_bias_shapeZq_weightZk_weightZv_weightZ
qw_in_sizeZ
kw_in_sizeZ
vw_in_sizeZqw_out_sizeZ
qkv_weightZqkv_weight_dimZattention_node_nameZqkv_bias_dimZqkv_biasZattention_inputsZattention_noder   r   r   Úcreate_attention_nodeG   sš    *ü$&&









ÿüüýü
z(FusionAttentionVae.create_attention_nodec                  C   s   | j j|d|dd}|d kr d S | j j|d|dd}|d kr@d S | j j|d|dd}|d kr`d S | j j|d|dd}|d kr€d S | j j|d|dd}|d kr d S | j j|d|dd}	|	d krÀd S | j j|	d|dd}
|
d kràd S | j  |dddddgddddd g¡}|d krt d	¡ d S |\}}}}}| j  |d
dddgddddg¡}|d k	rb|\}}}}nt d¡ d S | j  |dddddgddddd g¡}|d kr¬t d¡ d S |\}}}}}| j  |ddddddgdddddd g¡}|d krút d¡ d S |\}}}}}}|}|  ||¡\}}|dkr6t d¡ d S |  |||||||||jd |jd ¡
}|d krld S | j	 
|¡ | j| j|j< | j ||g¡ d| _d S )NZMatMulF)Ú	recursiveZReshapeZ	TransposeÚAddr   r   z&fuse_attention: failed to match v pathr   ZMulz'fuse_attention: failed to match qk pathz&fuse_attention: failed to match q pathz&fuse_attention: failed to match k pathz*fuse_attention: failed to detect num_headsT)r   Zfind_first_child_by_typeZmatch_parent_pathr(   r=   r-   rF   r!   ÚoutputZnodes_to_addÚappendZthis_graph_nameZnode_name_to_graph_namer8   Znodes_to_removerE   Zprune_graph) r   Zsoftmax_nodeZinput_name_to_nodesZoutput_name_to_nodeZ
matmul_qkvZreshape_qkvZtranspose_qkvZreshape_outZ
matmul_outZadd_outZtranspose_outZv_nodesr,   Zadd_vZmatmul_vZqk_nodesZ_softmax_qkZ	_add_zeroZ_mul_qkZ	matmul_qkZq_nodesZ_transpose_qr   r   Zmatmul_qZk_nodesZadd_kZmatmul_kZattention_last_nodeZq_num_headsZq_hidden_sizeÚnew_noder   r   r   ÚfuseÑ   s¬       ÿ   ÿ  ÿ

 

  ÿ

  ÿ



ö
zFusionAttentionVae.fuse)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r
   r%   r   r   r   r-   Ústrr   rF   rL   Ú__classcell__r   r   r   r   r      s"   	+
ô r   )Úloggingr   Útypingr   r   Únumpyr#   Zfusion_baser   Zonnxr   r   r   r	   Z
onnx_modelr
   rM   r(   r   r   r   r   r   Ú<module>   s   