U
    T?h@                     @   s   d dl Z d dlZd dlZd dlmZmZ d dlZd dlmZm	Z	m
Z
mZmZ d dlmZmZ d dlmZmZ d dlmZ eeZG dd dZG d	d
 d
eZG dd deZG dd dZdd Zdd Zdd Zedkre  dS )    N)ListUnion)AttentionInputIDsAttentionOutputIDsMultiHeadAttentionInputIDsMultiHeadAttentionOutputIDs	Operators)helper
load_model)	NodeProto	OnnxModel)SymbolicShapeInferenceHelperc                   @   s   e Zd ZeedddZeedf dddZeedf ddd	Z	eedf dd
dZ
edddZee ee ddddZee ee ddddZeeddddZeedf dddZdeddddZdS )PackingAttentionBase)modelattention_op_typec                 C   sD   || _ g | _g | _d| _i | _| j j jj| _|| _| j 	|| _
d S )NF)r   nodes_to_removenodes_to_addprune_graphnode_name_to_graph_namegraphnamethis_graph_namer   get_nodes_by_op_typeattention_nodes)selfr   r    r   b/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/convert_to_packing_mode.py__init__   s    zPackingAttentionBase.__init__Nreturnc                 C   sr   | j tjkrtjntj}|  }|r2t|j	|kr6d S |j	| }| j
D ]&}t|j	|ksf|j	| |krF d S qF|S N)r   r   	ATTENTIONr   Z
MASK_INDEXr   ZKEY_PADDING_MASK_try_getting_first_attentionleninputr   )r   Z
mask_indexfirst_attention_nodeattention_masknoder   r   r   _try_getting_attention_mask%   s    


z0PackingAttentionBase._try_getting_attention_maskc                 C   s   t | jdkrd S | jd S )Nr   )r#   r   r   r   r   r   r"   9   s    z1PackingAttentionBase._try_getting_first_attentionc                 C   s4   d }| j  D ] }|jtjks*|jtjkr|}q|S r    )r   Znodesop_typer   Z	LAYERNORMZSKIPLAYERNORM)r   last_layernorm_noder'   r   r   r   _try_getting_last_layernorm?   s
    z0PackingAttentionBase._try_getting_last_layernormc                 C   s
   t  d S r    NotImplementedErrorr)   r   r   r   _are_attentions_supportedF   s    z.PackingAttentionBase._are_attentions_supported)inputsoutputsr   c                 C   sB   t jtj||| jtjd}d|_| j| | j	| j
|j< d S Nr0   r1   r   com.microsoft)r	   	make_noder   ZREMOVEPADDINGr   create_node_namedomainr   appendr   r   r   r   r0   r1   new_noder   r   r   _insert_removepadding_nodeI   s    z/PackingAttentionBase._insert_removepadding_nodec                 C   sB   t jtj||| jtjd}d|_| j| | j	| j
|j< d S r2   )r	   r5   r   ZRESTOREPADDINGr   r6   r7   r   r8   r   r   r   r9   r   r   r   _insert_restorepadding_nodeU   s    z0PackingAttentionBase._insert_restorepadding_nodetoken_offsetcumulative_sequence_lengthr   c                 C   s
   t  d S r    r-   )r   r>   r?   r   r   r   )_replace_attention_with_packing_attentiona   s    z>PackingAttentionBase._replace_attention_with_packing_attentionc                 C   s   | j tjkr|jtj S d S r    )r   r   r!   r$   r   INPUT)r   r%   r   r   r   _get_input_to_remove_paddingd   s    z1PackingAttentionBase._get_input_to_remove_paddingTuse_symbolic_shape_inferr   c                 C   s  t d |  sd S |  }|s&d S |  }|  }|s>d S | |}|sPd S |d }|d }|d }|d }	| ||g||||	g | j	|| t d |j
d d }
| |
|g|j
d g | j|j
d |
 t d	|j d
 | || t d| j d| j  | j| j | j| j| j | jrH| j  n| jsX| jrb| j  | j  |rt| jjdd}|j| jjddd}|r|| j_d S )Nz$start converting to packing model...Z_no_paddingZ_token_offsetZ_cumulated_seq_lenZ_max_seq_lenz'inserted RemovePadding before Attentionr   Z_restore_inputz#inserted RestorePadding after last z layerz	replaced z with PackedverboseTF)Z
auto_mergeZguess_output_rank)loggerdebugr/   r(   r"   r,   rB   r;   r   Zreplace_input_of_all_nodesoutputr<   Zreplace_output_of_all_nodesr*   r@   r   Zremove_nodesr   Z	add_nodesr   r   r   Zupdate_graphZclean_shape_inferr   Zinfer_shapes)r   rD   r&   r%   r+   Zinput_to_remove_paddingZoutput_without_paddingr>   Zcumulated_seq_lenZmax_seq_lenZrestorepadding_inputshape_infer_helperZinferred_modelr   r   r   converti   sR    





zPackingAttentionBase.convert)T)__name__
__module____qualname__r   strr   r   r(   r   r"   r,   boolr/   r   r;   r<   r@   rB   rK   r   r   r   r   r      s   
r   c                       sB   e Zd Zed fddZedddZeeddd	d
Z  Z	S )PackingAttentionr   c                    s   t  |tj d S r    )superr   r   r!   r   r   	__class__r   r   r      s    zPackingAttention.__init__r   c                 C   s   | j D ]}t|dd k	r  dS t|dd k	r6 dS t|d}|d k	rX|dkrX dS t|jtjkrz|jtj sz dS t|jtjkr|jtj s dS qdS )NZpast_present_share_bufferFZ	do_rotaryZunidirectionalr   T)r   r   Zget_node_attributer#   r$   r   ZPASTZPAST_SEQUENCE_LENGTH)r   r'   Zunidirection_attrr   r   r   r/      s     

z*PackingAttention._are_attentions_supportedNr=   c              	   C   s   | j D ]}t|jtjkr&|jtj nd}tjtj|jtj	 |jtj
 |jtj |||g|jtj g| jtjd}g }|jD ]}|jdkr|| q|j| d|_| j| | j| | j| j|j< qtdt| j  d S )N r3   )	num_headsZqkv_hidden_sizesscaler4   z0Converted %d Attention nodes to PackedAttention.)r   r#   r$   r   RELATIVE_POSITION_BIASr	   r5   r   ZPACKEDATTENTIONrA   ZWEIGHTSBIASrI   r   OUTPUTr   r6   	attributer   r8   extendr7   r   r   r   r   rG   info)r   r>   r?   Z	attentionrelative_pos_biasZpacked_attention
attributesattrr   r   r   r@      s6    





z:PackingAttention._replace_attention_with_packing_attention)
rL   rM   rN   r   r   rP   r/   rO   r@   __classcell__r   r   rU   r   rQ      s   rQ   c                       sx   e Zd Zed fddZeedddZeedddZe	d	d
dZ
eeddddZeedf d	ddZ  ZS )PackingMultiHeadAttentionrR   c                    s   t  |tj d S r    )rS   r   r   MULTI_HEAD_ATTENTIONrT   rU   r   r   r      s    z"PackingMultiHeadAttention.__init__)indexr   c                 C   sD   t |j|kr@t |j| dkr@td| d| d|  dS dS )'Check a node does not have given input.r   znode input  (0) is not supported in PackedMultiHeadAttention: FT)r#   r$   rG   errorr   r'   rf   r   r   r   r   _check_empty_input   s
    z,PackingMultiHeadAttention._check_empty_inputc                 C   sD   t |j|kr@t |j| dkr@td| d| d|  dS dS )rg   r   znode output rh   ri   FT)r#   rI   rG   rj   rk   r   r   r   _check_empty_output   s
    z-PackingMultiHeadAttention._check_empty_outputr   c                 C   s   | j D ]}|jD ].}|jdkrtd|j d|    dS q|jtj rh|jtj shtd  dS | 	|tj
dr| 	|tjdr| |tjdr| |tjds dS qdS )	NrX   Zmask_filter_valuerY   znode attribute z/ is not supported in PackedMultiHeadAttention: Fz=packed kv format is not supported in PackedMultiHeadAttentionZpast_keyZpresent_keyT)r   r]   r   rG   rj   r$   r   KEYVALUErl   ZPAST_KEYZ
PAST_VALUErm   r   ZPRESENT_KEYZPRESENT_VALUE)r   r'   rb   r   r   r   r/      s$    




z3PackingMultiHeadAttention._are_attentions_supportedNr=   c           
   
   C   sJ  d}| j D ]}t|jtjkr,|jtj nd}tjtj|jtj	 |jtj
 |jtj |jtj |||g|jtj g| jtjd}g }|jD ]}|jdkr|| q|j| d|_| j| | j| | j| j|j< |r
| j|tj}	|	r
|	jdkr
t|	jdkr
|	j| |d7 }q
td	t| j  td
| d S )Nr   rW   r3   rn   r4   ZGatedRelativePositionBias      zBConverted %d MultiHeadAttention nodes to PackedMultiHeadAttention.z=Converted %d GatedRelativePositionBias nodes to packing mode.)r   r#   r$   r   rZ   r	   r5   r   ZPACKED_MULTI_HEAD_ATTENTIONZQUERYro   rp   r[   rI   r   r\   r   r6   r]   r   r8   r^   r7   r   r   r   r   
get_parentr*   rG   r_   )
r   r>   r?   Zgated_relative_pos_bias_countZmhar`   Z
packed_mhara   rb   Zrel_pos_bias_noder   r   r   r@     sP    



	


zCPackingMultiHeadAttention._replace_attention_with_packing_attentionc                 C   s*   | j |d}|r&|jdkr&|jd S d S )Nr   ZMatMul)r   rs   r*   r$   )r   r%   matmulr   r   r   rB   5  s    
z6PackingMultiHeadAttention._get_input_to_remove_padding)rL   rM   rN   r   r   intrO   rl   rm   rP   r/   r@   r   rB   rc   r   r   rU   r   rd      s   0rd   c                   @   s,   e Zd ZedddZd	eddddZdS )
PackingModerR   c                 C   s
   || _ d S r    rR   rT   r   r   r   r   >  s    zPackingMode.__init__TNrC   c                 C   sr   | j tjr>| j tjr*td d S t| j }||S | j tjr`t	| j }||S td d S d S )NzRPacking mode does not support both Attention and MultiHeadAttention in same graph.zPPacking mode requires either Attention or MultiHeadAttention node in onnx graph.)
r   r   r   r!   re   rG   rj   rQ   rK   rd   )r   rD   Zpackingr   r   r   rK   A  s    





zPackingMode.convert)T)rL   rM   rN   r   r   rP   rK   r   r   r   r   rv   =  s   rv   c                  C   sx   t jdd} | jddtdd | jddtdd | jd	d
ddd | jd
d | jdd
ddd | jd
d |  }|S )Nz_Convert to packing mode tool for ONNX Runtime. It converts BERT like model to use packing mode.)descriptionz--inputTzinput onnx model path)requiredtypehelpz--outputzoptimized onnx model pathz	--verboseF
store_truezshow debug information.)rx   actionrz   rE   z--use_external_data_formatz4use external data format to store large model (>2GB)use_external_data_format)argparseArgumentParseradd_argumentrO   set_defaults
parse_args)parserargsr   r   r   _parse_argumentsP  s     r   c                 C   s$   | rt jddd nt jdd d S )NDEBUGz8[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s)levelfmtz%(funcName)20s: %(message)s)r   )coloredlogsinstallrE   r   r   r   _setup_loggerh  s    r   c                  C   s|   t  } t| j td|   tj| jtj| j	krFt
d t| j}tt|}|  |jj| j	| jd d S )Nz
arguments:zYSpecified the same input and output path. Note that this may overwrite the original modelr}   )r   r   rF   rG   rH   ospathrealpathr$   rI   warningr
   rv   r   rK   r   Zsave_model_to_filer~   )r   r   Zpacking_moder   r   r   mainr  s    


r   __main__)r   loggingr   typingr   r   r   	constantsr   r   r   r   r   Zonnxr	   r
   Z
onnx_modelr   r   rJ   r   	getLoggerrL   rG   r   rQ   rd   rv   r   r   r   r   r   r   r   <module>   s&   
 
9a
