U
    T?hN                     @  sp  d dl mZ d dlZd dlZd dlmZmZ d dlm	Z	m
Z
 ddddd	Zd;dddddddddZd<dddddddddddZd=ddddddddddddddZdddddddddddZd>ddddddddZd d!d"d#Zd?d%dddd&d'd(Zd%ddd)d*d+Zd,d%d-d.d/Zd,d%dddd%d0d1d2Zd,d%d%ddd3d4d5Zdd6dd7ddddd8d9d:ZdS )@    )annotationsN)
AutoConfigAutoTokenizer)InferenceSessionOrtValueztorch.Tensorbool)attention_maskuse_past_kvc                 C  s@   |   dd }|| dkd |r<|d d df d}|S )N   r   )longZcumsumZmasked_fill_	unsqueeze)r   r	   position_ids r   d/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/models/llama/llama_inputs.pyget_position_ids   s
    r   ptFr   ztorch.deviceintstr)configdevice
batch_sizeseq_lenenginereturn_dictc           
      C  s   t jd| j||ft jd}t j||t jd}t|dd}|dkrH| n||}|dkrb| n||}|dkr|| n||}|s|||fS |||d}	|	S )Nr   lowhighsizedtyper   Fr	   ort	input_idsr   r   )torchrandint
vocab_sizeint64onesr   numpyto)
r   r   r   r   r   r   r$   r   r   inputsr   r   r   get_sample_inputs   s    
r-   r   )r   r   r   past_seq_lenuse_fp16r   r   
world_sizec                   s.  t jd| j|dft jd}t j||d t jd}	t|	dd}
t| ||||d}|dkr^| n| }|dkrx|	 n|	 }	|dkr|
 n|
 }
|dkrt	|nt
t fd	d
|}|st|t
st||	|
|fS ||	|
d}|dkrt|tst|| nt|t
s"t||d< |S )Nr   r   r   r    Tr!   r0   r"   c                   s   | d   | d   fS Nr   r   r+   kvr   r   r   <lambda>Y       z0get_sample_with_past_kv_inputs.<locals>.<lambda>r#   past_key_values)r%   r&   r'   r(   r)   r   get_past_kv_inputsr*   r+   flatten_past_kv_inputslistmap
isinstanceAssertionErrordictupdate)r   r   r   r.   r/   r   r   r0   r$   r   r   past_kvr,   r   r6   r   get_sample_with_past_kv_inputsB   s0    


rC   )r   r   r   r   r.   max_seq_lenr/   use_buffer_sharer   r   r0   c                   sD  t jd| j||ft jd}t j||| t jd}t||dkd}t| ||||
d}|dkrb| n| }|dkr|| n| }|dkr| n| }|dkrt	|nt
t fdd|}|	st|t
st||||fS |||d	}|dkr(t|ts
t|| |r@t|||}nt|t
s8t||d
< |S )Nr   r   r    r!   r1   r"   c                   s   | d   | d   fS r2   r3   r4   r6   r   r   r7      r8   z7get_merged_sample_with_past_kv_inputs.<locals>.<lambda>r#   r9   )r%   r&   r'   r(   r)   r   r:   r*   r+   r;   r<   r=   r>   r?   r@   rA    enable_past_present_share_buffer)r   r   r   r   r.   rD   r/   rE   r   r   r0   r$   r   r   rB   r,   r   r6   r   %get_merged_sample_with_past_kv_inputsz   s4    


rG   )r   r   r.   r   rD   r/   rE   split_kvc                 C  sp  |r
t jnt j}| j| j }	|st j||| j|dt jt 	|||fdd |t j|| j
|| j|	|t j|| j
|| j|	|t j|t jdd}
nt j||| j|t jt j	|||ft jdddd t jt j|t jdd}
t| j
D ]T}|
d| dt j|| j||	|d	| dt j|| j||	|i q|rlt|
||}
|
S )
Ng     r   )kr    )x	attn_maskZk_cacheZv_cachepos)rJ   rK   rL   Zk__cacheZv_)npfloat16float32hidden_sizenum_attention_headsrandomrandZastypeZtriur)   num_hidden_layersarrayr(   int32rangerA   rF   )r   r   r.   r   rD   r/   rE   rH   Znp_dtype	head_size
ort_inputsir   r   r   get_msft_sample_inputs   sp    
"        $
    
    r\   )r   r   r.   r/   r0   c                   sZ   | j | t| dr| jn
| j| j |r0tjntj fddt| j	D }|S )Nhead_dimc              
     s4   g | ],}t j d t j d fqS )r    )r%   rT   ).0_r   rY   	num_headsr.   torch_dtyper   r   
<listcomp>   s   z&get_past_kv_inputs.<locals>.<listcomp>)
num_key_value_headshasattrr]   rQ   rR   r%   rO   rP   rX   rU   )r   r   r.   r/   r0   rB   r   r`   r   r:      s    
r:   z'list[tuple[torch.Tensor, torch.Tensor]])r9   c                 C  sV   i }t | D ]D\}\}}|   |d| d< |   |d| d< q|S )Npast_key_values..key.value)	enumeratedetachcpur*   )r9   rB   r[   Zpast_kZpast_vr   r   r   r;      s
    r;      r@   )	pt_inputsrE   r.   rD   c                 C  sl   i }|   D ]J\}}t|tjr*|||< q|dkrB|t| q|   ||< q|rht	|||}|S )Nr9   )
itemsr>   rN   ZndarrayrA   r;   rj   rk   r*   rF   )rm   rE   r.   rD   rZ   rI   vr   r   r   convert_inputs_for_ort   s    
rp   )rZ   r.   rD   c           
      C  st   |   D ]f\}}d|ks d|kr|j\}}}}tj||||f|jd}	||	d |d |d |d |f< |	| |< q| S )Ncacher9   r    )rn   shaperN   zerosr   )
rZ   r.   rD   rI   ro   r   ra   r_   rY   new_vr   r   r   rF     s     
rF   r   )modelrZ   c                 C  sl   t tdd |  }t | }|| }t|rHtd|  td|| }t|rh|D ]
}||= q\|S )Nc                 S  s   | j S Nname)Zmodel_inputr   r   r   r7   $  r8   z#verify_ort_inputs.<locals>.<lambda>z(The following model inputs are missing: zEThere are missing inputs to the model. Please add them and try again.)setr=   
get_inputskeyslenprint	Exception)ru   rZ   model_inputsZuser_inputsZmissing_inputsZunnecessary_inputsZunnecessary_inputr   r   r   verify_ort_inputs"  s    r   )ru   rZ   r   	device_idrE   kv_cache_ortvaluesc                 C  s"  |   }ttdd |  }| D ]\}}	||kr8q&|rd|ksLd|kr||krztj|	||d}
|||
 |
||< q|| |	 ||||  q&tj|	||d}
|||
 q&| 	 D ]X}|j
}|rd|ksd|kr|dddd}||||  q|j|||d q||fS )Nc                 S  s   | j S rv   rw   )r[   r   r   r   r7   @  r8   z.add_io_bindings_as_ortvalues.<locals>.<lambda>rq   r9   )device_typer   outpresent)
io_bindingry   r=   rz   rn   r   Zortvalue_from_numpyZbind_ortvalue_inputZupdate_inplaceget_outputsrx   replaceZbind_ortvalue_outputbind_output)ru   rZ   r   r   rE   r   r   r   rI   ro   Zv_deviceoutputrx   Z
input_namer   r   r   add_io_bindings_as_ortvalues6  s*    
r   )ru   r,   outputsr/   rE   c              	   C  s   t | |}d }tjtjtjtjd}|  }| D ]P\}}	|j||	j	j
|	j	j
dkrXdn|	j	j|t|	j t|	j|	 d |	j	}q4|  D ]l}
|
j}|rd|kr||dd n|| }	|j||j
|j
dkrdn|j|rtjntjt|	j|	 d q|S )N)ztorch.int32ztorch.int64ztorch.float16ztorch.float32rk   r   )rx   r   r   Zelement_typerr   Z
buffer_ptrr   r9   )r   rN   rW   r(   rO   rP   r   rn   Z
bind_inputr   typeindexreprr   tuplerr   Zdata_ptrr   rx   r   r   )ru   r,   r   r/   rE   r   Zpt_to_npr   rI   ro   r   rx   r   r   r   add_io_bindings_as_tensorsc  sF    
	r   r   z	list[str])r   	tokenizerrequested_lengthpromptr   r/   rE   r   c              	   C  s  |j |_|j|dd}|r tjntj}	tj|d |tjd}
tj|d |tjd}t|dd}|
j	d }||kr|
d d d |f }
|d d d |f }t|dd}nt||k r|
d d d	f 
d	j}|d d d	f 
d	j}t|| D ] }t||
f}
t||f}qt|dd}|
j	d }||ks2t|d
krD|
 n|
|d
krX| n||d
krl| n|d}|d
krg |d< |
j	\}}| j}| j}t| dr| jn
| j| j }t| jD ]}tj|||r|nd	|||	d}tj|||r|nd	|||	d}|d
kr>|d| d| d| d| i n|d ||f qd }|d
krtj||| j||	d}d| i}|st| jD ]\}tj||||||	d}tj||||||	d}|d| d| d| d| i q||fS )NT)paddingr$   )r   r   r   Fr!   r
   r   r"   r#   r9   r]   rf   rg   rh   logitszpresent.)Z	eos_tokenZ	pad_tokenZbatch_encode_plusr%   rO   rP   Ztensorr(   r   rr   r   TrX   Zhstackr?   
contiguousZmax_position_embeddingsrd   re   r]   rQ   rR   rU   rs   rA   appendr'   )r   r   r   r   r   r/   rE   r   Zencodings_dictrb   r$   r   r   Ztokenized_lengthZinput_ids_first_colZattention_mask_first_colr_   r,   r   Zsequence_lengthZmax_sequence_lengthra   rY   r[   Zpast_keyZ
past_valuer   r   Zpresent_keyZpresent_valuer   r   r   get_initial_inputs_and_outputs  s    







 
 
          "r   )r   F)Fr   Fr   )FFr   Fr   )r   )Fr   rl   )
__future__r   r*   rN   r%   Ztransformersr   r   Zonnxruntimer   r   r   r-   rC   rG   r\   r:   r;   rp   rF   r   r   r   r   r   r   r   r   <module>   s>     (    ?     $65   -0