U
    T?hL0                     @   sH   d dl mZ d dlmZ G dd dZG dd deZG dd dZd	S )
    )ArgumentParser)Enumc                   @   s   e Zd ZdZdZdZdZdS )AttentionMaskFormatr            N)__name__
__module____qualname__MaskIndexEndZMaskIndexEndAndStartAttentionMaskNoMask r   r   Y/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/fusion_options.pyr   	   s   r   c                   @   s4   e Zd ZdZdZdZdZdd Zdd Zd	d
 Z	dS )AttentionOpType	AttentionMultiHeadAttentionGroupQueryAttentionPagedAttentionc                 C   s   | j S Nvalueselfr   r   r   __str__   s    zAttentionOpType.__str__c                 C   s
   t | jS r   )hashr   r   r   r   r   __hash__!   s    zAttentionOpType.__hash__c                 C   s   |j | j kS r   r   )r   otherr   r   r   __eq__$   s    zAttentionOpType.__eq__N)
r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r      s   r   c                   @   sV   e Zd ZdZdd ZdddZdd Zed	d
dZe	dd Z
e	edddZdS )FusionOptionsz'Options of fusion in graph optimizationc                 C   s   d| _ d| _d| _d| _d| _d| _d| _d| _d| _d| _	d| _
d| _d| _d| _d| _|dkrhd| _tj| _|dkrtj| _n|dkrtj| _d | _|dkrd| _d| _d| _d| _d| _d| _d| _d S )NTFclipZbertZvitZunetZvaer    )enable_geluenable_layer_normenable_attentionenable_rotary_embeddingsuse_multi_head_attentionZ!disable_multi_head_attention_biasenable_skip_layer_normenable_embed_layer_normenable_bias_skip_layer_normenable_bias_geluenable_gelu_approximationZenable_qordered_matmulenable_shape_inferenceenable_gemm_fast_gelugroup_norm_channels_lastr   r   attention_mask_formatr   r   attention_op_typeenable_nhwc_convenable_group_normenable_skip_group_normenable_bias_splitgeluenable_packed_qkvenable_packed_kvenable_bias_add)r   
model_typer   r   r   __init__+   s>    
zFusionOptions.__init__Tc                 C   s   |rt j| _nt j| _d S r   )r   r   r/   r   )r   Zuse_raw_maskr   r   r   use_raw_attention_maskZ   s    
z$FusionOptions.use_raw_attention_maskc                 C   s   t j| _d S r   )r   r   r/   r   r   r   r   disable_attention_mask`   s    z$FusionOptions.disable_attention_mask)attn_op_typec                 C   s
   || _ d S r   )r0   )r   r<   r   r   r   set_attention_op_typec   s    z#FusionOptions.set_attention_op_typec                 C   sB  t | j}| jrd|_| jr"d|_| jr.d|_| jr:d|_	| j
rFd|_
| jrRd|_| jr^d|_| jrjd|_| jrvd|_| jrd|_| jrd|_| jrd|_| jr|d | jr|d | jr|  | jdkr>| jrd|_| jrd|_| jrd|_ | j!rd|_"| j#rd|_$| j%r"d|_&| j'r0d|_(| j)r>d|_*|S )NFTr!   )+r   r8   disable_gelur"   disable_layer_normr#   Zdisable_rotary_embeddingsr%   disable_attentionr$   r&   disable_skip_layer_normr'   disable_embed_layer_normr(   disable_bias_skip_layer_normr)   disable_bias_gelur*   r+   disable_shape_inferencer,   r-   use_mask_indexr:   no_attention_maskr;   use_group_norm_channels_firstr.   disable_nhwc_convr1   disable_group_normr2   disable_skip_group_normr3   disable_bias_splitgelur4   disable_packed_qkvr5   disable_packed_kvr6   disable_bias_addr7   )argsoptionsr   r   r   parsef   sb    


zFusionOptions.parseparserc                 C   s  | j ddddd | jdd | j ddddd | jdd	 | j d
dddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j dddd d | jdd! | j d"ddd#d | jdd$ | j d%ddd&d | jdd' | j d(ddd)d | jdd* | j d+ddd,d | jdd- | j d.ddd/d | jdd0 | j d1ddd2d | jdd3 | j d4ddd5d | jdd6 | j d7ddd8d | jdd9 | j d:ddd;d | jdd< | j d=ddd>d | jdd? | j d@dddAd | jddB | j dCdddDd | jddE | j dFdddGd d S )HNz--disable_attentionF
store_truezdisable Attention fusion)requiredactionhelp)r@   z--disable_skip_layer_normz%disable SkipLayerNormalization fusion)rA   z--disable_embed_layer_normz&disable EmbedLayerNormalization fusion)rB   z--disable_bias_skip_layer_normz2disable Add Bias and SkipLayerNormalization fusion)rC   z--disable_bias_geluz)disable Add Bias and Gelu/FastGelu fusion)rD   z--disable_layer_normz!disable LayerNormalization fusion)r?   z--disable_geluzdisable Gelu fusion)r>   z--enable_gelu_approximationz+enable Gelu/BiasGelu to FastGelu conversion)r+   z--disable_shape_inferencez disable symbolic shape inference)rE   z--enable_gemm_fast_geluzenable GemmfastGelu fusion)r-   z--use_mask_indexzWuse mask index to activate fused attention to speed up. It requires right-side padding!)rF   z--use_raw_attention_maskzuse raw attention mask. Use this option if your input is not right-side padding. This might deactivate fused attention and get worse performance.)r:   z--no_attention_maskz1no attention mask. Only works for model_type=bert)rG   z--use_multi_head_attentionzUse MultiHeadAttention instead of Attention operator for testing purpose. Note that MultiHeadAttention might be slower than Attention when qkv are not packed. )r&   z--disable_group_normz9not fuse GroupNorm. Only works for model_type=unet or vae)rJ   z--disable_skip_group_normzPnot fuse Add + GroupNorm to SkipGroupNorm. Only works for model_type=unet or vae)rK   z--disable_packed_kvz[not use packed kv for cross attention in MultiHeadAttention. Only works for model_type=unet)rN   z--disable_packed_qkvz[not use packed qkv for self attention in MultiHeadAttention. Only works for model_type=unet)rM   z--disable_bias_addz0not fuse BiasAdd. Only works for model_type=unet)rO   z--disable_bias_splitgeluz6not fuse BiasSplitGelu. Only works for model_type=unet)rL   z--disable_nhwc_convz:Do not use NhwcConv. Only works for model_type=unet or vae)rI   z--use_group_norm_channels_firstznUse channels_first (NCHW) instead of channels_last (NHWC) for GroupNorm. Only works for model_type=unet or vae)rH   z--disable_rotary_embeddingsz5Do not fuse rotary embeddings into RotaryEmbedding op)add_argumentset_defaultsrS   r   r   r   add_arguments   s@   zFusionOptions.add_argumentsN)T)r   r	   r
   __doc__r9   r:   r;   r   r=   staticmethodrR   r   r[   r   r   r   r   r   (   s   /

5r   N)argparser   enumr   r   r   r   r   r   r   r   <module>   s   