U
    T?hWB                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
 d dlZd dlZd dlmZmZ eeZG dd dZG dd dZG d	d
 d
ZG dd deZG dd dZdS )    N)OrderedDict)AnyDictListOptionalTupleUnion)InferenceSession
RunOptionsc                   @   s   e Zd ZeeeedddZeeedddZeeddd	Zeedd
dZ	ee
jdddZeejdddZeeeee
jf dddZdS )
TypeHelper)ort_sessionnamereturnc                 C   s>   t |  D ]\}}|j|kr|j  S qtd| dd S )Nzinput name 
 not found)	enumerate
get_inputsr   type
ValueError)r   r   _iinput r   \/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/transformers/io_binding_helper.pyget_input_type   s    
zTypeHelper.get_input_type)r   r   c                 C   s>   t |  D ]\}}|j|kr|j  S qtd| dd S )Nzoutput name r   )r   get_outputsr   r   r   )r   r   r   outputr   r   r   get_output_type   s    
zTypeHelper.get_output_type)ort_typec                 C   s6   t jt jt jt jtd}| |kr.t|  d||  S N)ztensor(int64)ztensor(int32)ztensor(float)ztensor(float16)ztensor(bool) not found in map)numpylonglongintcfloat32float16boolr   )r   Zort_type_to_numpy_type_mapr   r   r   ort_type_to_numpy_type   s    z!TypeHelper.ort_type_to_numpy_typec                 C   s8   t jt jt jt jt jd}| |kr0t|  d||  S r   )torchint64int32r"   r#   r$   r   )r   Zort_type_to_torch_type_mapr   r   r   ort_type_to_torch_type,   s    z!TypeHelper.ort_type_to_torch_type)
numpy_typec                 C   sP   t jtjt jtjt jtjt jtjt jtjttji}| |krHt	|  d||  S Nr   )
r   r    r&   r'   r!   r(   r"   r#   r$   r   )r*   Znumpy_type_to_torch_type_mapr   r   r   numpy_type_to_torch_type:   s           z#TypeHelper.numpy_type_to_torch_type)
torch_typec              
   C   sH   t jtjt jtjt jtjt jtjt jti}| |kr@t	|  d||  S r+   )
r&   r'   r   r    r(   r!   r"   r#   r$   r   )r-   Ztorch_type_to_numpy_type_mapr   r   r   torch_type_to_numpy_typeI   s         z#TypeHelper.torch_type_to_numpy_type)r   r   c                 C   sH   i }|   D ]}t|j||j< q|  D ]}t|j||j< q,|S )z:Create a mapping from input/output name to numpy data type)r   r   r%   r   r   r   )r   Zname_to_numpy_typer   r   r   r   r   get_io_numpy_type_mapW   s    z TypeHelper.get_io_numpy_type_mapN)__name__
__module____qualname__staticmethodr	   strr   r   r%   r)   r   dtyper,   r&   r.   r   r/   r   r   r   r   r      s   r   c                   @   sR   e Zd ZeedddZedejejejeej dddZ	edd	d
Z
dS )IOBindingHelper)r   c                 C   sJ   i }|  D ]8\}}t| |}t|}tjt|||d||< q|S )zpReturns a dictionary of output name as key, and 1D tensor as value. The tensor has enough space for given shape.)r5   device)itemsr   r   r)   r&   emptyr   prod)r   output_shapesr7   output_buffersr   shaper   r-   r   r   r   get_output_buffersd   s    
z"IOBindingHelper.get_output_buffersN)	input_idsposition_idsattention_maskpastc              
   C   s  |dkrt | }|  }| s&t|d|jjd|d t|	 |
  |dk	rt|D ]\\}	}
|
 srt|

 }|dkr|
 }|d|	 |
jjd|d|	  t|
	 | q^|dk	r| st|d|jjd|d t|	 |
  |dk	r8| st|d|jjd|d t|	 |
  |  D ]^}|j}|| }t| d|jj dt|	   |||jjd|| || |
  q@|S )	z)Returnas IO binding object for a session.Nr?   r   Zpast_rA   r@   z device type=z shape=)r   r/   
io_bindingis_contiguousAssertionError
bind_inputr7   r   listsizedata_ptrr   r   r   loggerdebugbind_output)r   r?   r@   rA   rB   r<   r;   Zname_to_np_typerC   iZpast_irI   r   output_nameZoutput_bufferr   r   r   prepare_io_bindingn   sv    

	
	
	


&	z"IOBindingHelper.prepare_io_bindingTc           
      C   sn   g }|   D ]\}|j}|| }|| }|dt| |  }	|r^||	   q||	 q|S )z3Copy results to cpu. Returns a list of numpy array.r   )	r   r   r   r:   Zreshapeclonedetachappendcpu)
r   r<   r;   Zreturn_numpyZort_outputsr   rN   bufferr=   Zcopy_tensorr   r   r   "get_outputs_from_io_binding_buffer   s     z2IOBindingHelper.get_outputs_from_io_binding_buffer)N)T)r0   r1   r2   r3   r	   r>   r&   Tensorr   rO   rU   r   r   r   r   r6   c   s   		 Ur6   c                   @   s   e Zd ZdZdeejdddZeedddZ	d	d
 Z
eejdddZeeeee ee f f dddZdeeejf eedddZedeeeeeef dddZdS )CudaSessionzLInference Session with IO Binding for ONNX Runtime CUDA or TensorRT providerF)r   r7   c                 C   sr   || _ dd | j  D | _dd | j  D | _t| j | _| j  | _|| _	t
 | _t
 | _|| _i | _d S )Nc                 S   s   g | ]
}|j qS r   r   ).0r   r   r   r   
<listcomp>   s     z(CudaSession.__init__.<locals>.<listcomp>c                 S   s   g | ]
}|j qS r   rX   )rY   r   r   r   r   rZ      s     )r   r   input_namesr   output_namesr   r/   io_name_to_numpy_typerC   enable_cuda_graphr   input_tensorsoutput_tensorsr7   buffer_sharing)selfr   r7   r^   r   r   r   __init__   s    zCudaSession.__init__)
input_namerN   c                 C   s4   || j kst|| jkst|| j|< || j|< d S N)r[   rE   r\   ra   )rb   rd   rN   r   r   r   set_buffer_sharing   s    
zCudaSession.set_buffer_sharingc                 C   s   | ` | `| `d S re   )r_   r`   rC   )rb   r   r   r   __del__   s    zCudaSession.__del__)r   tensorc              	   C   s   |j jd k	r|j jnd}t|jdkr,dgnt|j}| j||j j|| j| ||	  || j
kr| j| j
| |j j|| j| ||	  || j| j
| < d S )Nr      )r7   indexlenr=   rG   rC   rF   r   r]   rI   ra   rL   r`   )rb   r   rh   	device_idZtensor_shaper   r   r   bind_input_and_buffer_sharing   s(    	
z)CudaSession.bind_input_and_buffer_sharing)
shape_dictc              
   C   sR  | j r| D ]\}}|| jkr|| jkrLt| j| jt|krDqtd| j| }tj	t|t
|dj| jd}|| j|< | || q| D ]\}}|| jkr|| jkrt| j| jt|krq|| jkrq| j| }tj	t|t
|dj| jd}|| j|< | j||jj|jjdk	r4|jjnd|t| |  qdS )z Allocate tensors for I/O Bindingz(Expect static input shape for cuda graph)r5   )r7   Nr   )r^   r8   r[   r_   tupler=   RuntimeErrorr]   r&   r9   r   r,   tor7   rm   r\   r`   ra   rC   rL   r   rj   rG   rH   rI   )rb   rn   r   r=   Znumpy_dtyperh   r   r   r   allocate_buffers	  s@    




"



zCudaSession.allocate_buffersNT)	feed_dictrun_optionssynchronizec                 C   s   |  D ]\}}t|tjr$| s(t|| jkr| jr| j| 	 |	 ksRt| j| j
|j
ksht|jjdksxt| j| | q| || q|r| j  | j| j| | j  n| j| j| | jS )z$Bind input tensors and run inferencecuda)r8   
isinstancer&   rV   rD   rE   r[   r^   r_   Znelementr5   r7   r   Zcopy_rm   rC   Zsynchronize_inputsr   Zrun_with_iobindingZsynchronize_outputsr`   )rb   rs   rt   ru   r   rh   r   r   r   infer3  s    

zCudaSession.inferr   )rl   r^   streamr   c                 C   s$   | d|d}|dkr t ||d< |S )NZkSameAsRequested)rl   Zarena_extend_strategyr^   r   Zuser_compute_stream)r4   )rl   r^   ry   optionsr   r   r   get_cuda_provider_optionsI  s    z%CudaSession.get_cuda_provider_options)F)NT)r   )r0   r1   r2   __doc__r	   r&   r7   rc   r4   rf   rg   rV   rm   r   r   r   intr   rr   r
   r$   rx   r3   r   r{   r   r   r   r   rW      s   &*rW   c                       s   e Zd Zdeejeeee	e
 ee
 f f ee
e
eeeef  d fddZdeedd	d
Zdeeejf ed fddZ  ZS )
GpuBindingFr   N)r   r7   rn   enable_gpu_graphgpu_graph_idry   ra   c           
         sf   t  ||| |r2| D ]\}}	| ||	 q| | || _|rPt|nd | _|| _	d | _
d S re   )superrc   r8   rf   rr   r   copydeepcopyrn   ry   last_run_gpu_graph_id)
rb   r   r7   rn   r   r   ry   ra   rd   rN   	__class__r   r   rc   Y  s    

zGpuBinding.__init__)disable_cuda_graph_in_runr   c                 C   s.   t  }|rdn| j}|dt| || _|S )Nr   r   )r
   r   add_run_config_entryr4   r   )rb   r   rz   r   r   r   r   get_run_optionsp  s
    zGpuBinding.get_run_options)rs   r   c                    s*   |  |}| jr|dd t ||S )NZ'disable_synchronize_execution_providers1)r   ry   r   r   rx   )rb   rs   r   rt   r   r   r   rx   {  s    
zGpuBinding.infer)Fr   r   N)F)F)r0   r1   r2   r	   r&   r7   r   r4   r   r   r}   r   r$   r   rc   r
   r   rV   rx   __classcell__r   r   r   r   r~   X  s       r~   c                   @   sb   e Zd ZdZdeejeedddZde	e
eee ee f f eee	e
e
f  ed	d
dZdS )GpuBindingManagerzA manager for I/O bindings that support multiple CUDA Graphs.
    One cuda graph is reused for same input shape. Automatically add a new cuda graph for new input shape.
    r   ri   )r   r7   ry   max_cuda_graphsc                 C   s(   || _ || _g | _d | _|| _|| _d S re   )r   r7   graph_bindingsno_graph_bindingry   r   )rb   r   r7   ry   r   r   r   r   rc     s    zGpuBindingManager.__init__FN)rn   use_cuda_graphra   r   c              	   C   s   | j D ]}|j|kr|  S qt| j | jks2|sj| jd krXt| j| j|| j|d| _n| j	| | jS t| j| j|dt| j | j|d}| j 
| |S )N)ry   ra   T)r   r   ry   ra   )r   rn   rk   r   r   r~   r   r7   ry   rr   rR   )rb   rn   r   ra   Zgpu_graph_bindingr   r   r   get_binding  s2    



    
	zGpuBindingManager.get_binding)r   ri   )FN)r0   r1   r2   r|   r	   r&   r7   r}   rc   r   r4   r   r   r   r$   r   r~   r   r   r   r   r   r     s     r   )r   loggingcollectionsr   typingr   r   r   r   r   r   r   r&   Zonnxruntimer	   r
   	getLoggerr0   rJ   r   r6   rW   r~   r   r   r   r   r   <module>   s    
Uq ,