U
    T?hfA  ã                   @   s€   d dl mZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZmZ d dlmZmZ d dlmZ eeƒZG d	d
„ d
e	ƒZdS )é    )Ú	getLogger)ÚTupleN)ÚAttentionMask)ÚFusion)ÚFusionUtilsÚNumpyHelper)Ú	NodeProtoÚhelper)Ú	OnnxModelc                       sH   e Zd Zeeeedœ‡ fdd„Zeeeef dœdd„Z	dd„ Z
‡  ZS )	ÚFusionQOrderedAttention)ÚmodelÚhidden_sizeÚ	num_headsÚattention_maskc                    s&   || _ || _|| _tƒ  |dd¡ d S )NÚQOrderedAttentionZQOrderedLayerNormalization)r   r   r   ÚsuperÚ__init__)Úselfr   r   r   r   ©Ú	__class__© úd/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/fusion_qordered_attention.pyr      s    z FusionQOrderedAttention.__init__)Ú	reshape_qÚreturnc                 C   sr  | j  |jd ¡}|dkr†t |jd › d¡ | j  |dgdg¡}|dkrX| j| jfS |d }t|j	ƒdkrz| j| jfS |j	d j
}t |¡}t|ƒdks´|d dks´|d dkrÒt d	|› d
¡ | j| jfS |d }|d }|| }| jdkr*|| jkr*| jr*t d| j› d|› d¡ d| _| jdkrj|| jkrj| jrjt d| j› d|› d¡ d| _||fS )zÊDetect num_heads and hidden_size from a reshape node.
        Args:
            reshape_q (NodeProto): reshape node for Q
        Returns:
            Tuple[int, int]: num_heads and hidden_size
        é   Nz is not initializer.ÚConstantr   é   é   é   zq_shape_value=z7. Expected value are like [0, 0, num_heads, head_size].z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )r   Úget_initializerÚinputÚloggerÚdebugÚmatch_parent_pathr   r   ÚlenÚ	attributeÚtr   Úto_arrayZnum_heads_warningÚwarningZhidden_size_warning)r   r   Zq_shapeZconstant_nodeZq_shape_valuer   Z	head_sizer   r   r   r   Úget_num_heads_and_hidden_size!   s8    	
$ÿz5FusionQOrderedAttention.get_num_heads_and_hidden_sizec           B      C   sÞ  | j  |ddgddg¡}|d k	r*|d }nd S | j  |dgd g¡}|d krXt d¡ d S |d }| j  |dddd	dddgd d dddddg¡}|d kr¢t d
¡ d S |\}}	}
}}}}t || j ¡sÆd S t || j ¡sØd S g }t|jƒD ]2\}}||krøqæ||d jd krqæ| 	|¡ qæt
|ƒdkr,d S |d }| j  |d	dddddgdddddd g¡}|d krtt d¡ d S |\}}}}}}t || j ¡s˜d S t || j ¡s¬d S | j  |dgdg¡}|d krØt d¡ d S |d }| j  |jd ¡d krüd S t || j d¡sd S | j  |ddddddddgddddd dddg¡}|d krZt d¡ d S |\}}}}}}} }!t || j ¡s‚d S t || j ¡s–d S t | | j ¡sªd S t || j ¡s¾d S | j  |!d	dddddgdddddd g¡}"|"d krþt d¡ d S |"\}}#}$}%}&}'t |%| j ¡s"d S t |$| j ¡s6d S | j  |'dgdg¡}(|(d krbt d¡ d S |(d }(| j  |(jd ¡d kr†d S t |(| j d¡sœd S | j  |!d	dddddgdddddd g¡})|)d krÜt d¡ d S |)\}}}*}+},}-t |+| j ¡s d S t |*| j ¡sd S | j  |-dgdg¡}.|.d kr@t d¡ d S |.d }.| j  |.jd ¡d krdd S t |.| j d¡szd S | j  |dddddgd ddddg¡}/|/d kr¶t d¡ d S | j  |(jd ¡}0| j  |.jd ¡}1| j  |jd ¡}2t |0¡}3t |1¡}4t |2¡}5t |3jdd … ¡}6t |4jdd … ¡}7t |5jdd … ¡}8|jd |krÚ|'jd |krÚ|-jd |krÚ| j |/d jd ¡}9|  |#¡\}:};|jd g}<|< 	|jd ¡ |< 	|$jd ¡ |< 	|*jd ¡ |< 	|jd ¡ |< 	|(jd ¡ |< 	|.jd ¡ |< 	|jd ¡ |< 	|(jd ¡ |< 	|.jd ¡ |< 	|jd ¡ | j  |&jd ¡rl|< 	|&jd ¡ n|< 	|&jd ¡ | j  |,jd ¡r¢|< 	|,jd ¡ n|< 	|,jd ¡ | j  |jd ¡rØ|< 	|jd ¡ n|< 	|jd ¡ |< 	| jd ¡ |< 	|jd ¡ |< 	|jd ¡ |9d k	r.|< 	|9¡ n
|< 	d¡ | j  |(jd ¡}=t |=¡ | j  |.jd ¡}>t |>¡ | j  |jd ¡}?t |?¡ | j  d¡}@tjd|<|
jd g|@d}A| j  ||jd |Ajd ¡ | j  |	|	jd |jd ¡ |Aj t d|:¡g¡ |Aj t dd¡g¡ |Aj t dd¡g¡ |Aj t dd¡g¡ |Aj t d|6|7|8g¡g¡ d |A_| j 	|A¡ | j| j |Aj!< | j" |
|||g¡ | j" |¡ | j" |"¡ | j" |)¡ | j" |¡ | j" |(|.|g¡ d!| _#d S )"NZQuantizeLinearÚAddr   éÿÿÿÿZDequantizeLinearz=fuse_qordered_attention: failed to match input qdq nodes pathZMatMulZReshapeZ	Transposez1fuse_qordered_attention: failed to match qkv pathr   z/fuse_qordered_attention: failed to match v pathFZSoftmaxÚDivz0fuse_qordered_attention: failed to match qk pathz/fuse_qordered_attention: failed to match q pathz/fuse_qordered_attention: failed to match k pathZMulÚSubZCastZ	Unsqueezez8fuse_qordered_attention: failed to match mask_nodes pathÚ r   )ÚinputsÚoutputsÚnamer   Zorder_inputZorder_weightZorder_outputZqkv_hidden_sizeszcom.microsoftT)$r   r#   r!   r"   r   Zcheck_qdq_node_for_fusionÚ	enumerater    ÚoutputÚappendr$   Zget_constant_valuer   r   r'   ÚnpÚprodÚshaper   Zprocess_maskr)   Ztranspose_2d_int8_tensorZcreate_node_namer	   Z	make_nodeZreplace_node_inputr%   ÚextendZmake_attributeÚdomainZnodes_to_addZthis_graph_nameZnode_name_to_graph_namer1   Znodes_to_removeZprune_graph)Br   Znormalize_nodeZinput_name_to_nodesZoutput_name_to_nodeZadd_before_layernormZ
start_nodeZdequantize_inputZ	qkv_nodesÚ_Zprojection_matmulZreshape_qkvZtranspose_qkvZdequantize_qkvZquantize_qkvZ
matmul_qkvZother_inputsZ_ir    Z
root_inputZv_nodesZdequantize_vZ
quantize_vZadd_vZmatmul_vZdequantize_v_matmul_weightZqk_nodesZdequantize_qk_softmaxZquantize_qk_softmaxZ
softmax_qkZadd_qkZdiv_qkZdequantize_qkZquantize_qkZ	matmul_qkZq_nodesr   Zdequantize_qZ
quantize_qZadd_qZmatmul_qZdequantize_q_matmul_weightZk_nodesZdequantize_kZ
quantize_kZadd_kZmatmul_kZdequantize_k_matmul_weightZ
mask_nodesZq_weightZk_weightZv_weightZqwÚkwZvwZqw_out_sizeZkw_out_sizeZvw_out_sizeZ
mask_indexr   r   Zattention_inputsZq_weight_tensorZk_weight_tensorZv_weight_tensorZattention_node_nameZattention_noder   r   r   ÚfuseT   s¸   ý
ý
ý
ý



ø
ô

÷ý



ý



  ÿ




0





üÿÿzFusionQOrderedAttention.fuse)Ú__name__Ú
__module__Ú__qualname__r
   Úintr   r   r   r   r)   r<   Ú__classcell__r   r   r   r   r      s   û3r   )Úloggingr   Útypingr   Únumpyr5   Zfusion_attentionr   Zfusion_baser   Zfusion_utilsr   r   Zonnxr   r	   Z
onnx_modelr
   r=   r!   r   r   r   r   r   Ú<module>   s   