U
    yh$                     @   s  d dl mZmZ d dlZd dlm  mZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZ d d
lmZmZ dd ZdddZejdddZejdddZdd Z dd Z!dd Z"dd Z#e" Z$e# Z%dd Z&dS )    )OptionalTupleN)_prims)DispatchKey)autograd_not_implemented)HigherOrderOperator)CUDARngStateHelpermake_contiguous_strides_for)FakeTensorMode)disable_proxy_modes_tracingProxyTorchDispatchModetrack_tensor_tree)_device_dtypec                 C   s&   t d| j d| j d| j dd S )Nz"You are trying to functionalize a z RNG operator but zE does not use Philox/counter-based RNG. Therefore, functionalizing a zo RNG operator is not supported. We are discussing the possibility of a Philox-based RNG implementation for CPU.)RuntimeErrortype)device r   H/var/www/html/venv/lib/python3.8/site-packages/torch/_prims/rng_prims.pythrow_on_non_cuda   s    r   c           
      C   s|   t jjd|  |d|d}|| tt jjj| }|j}|rB||_	||fD ],}	||	_
t jjj|	_| | |	_||	_||	_qJd S )Nz
rngprims::r   )Zmutates_argsschema)torchlibraryZ	custom_opZregister_fakegetattrZ_opsZopsZrngprimsdefaultZ_tags__doc__Z_prims_commonZRETURN_TYPEZNEWreturn_typer   	impl_atenZprim_meta_impl)
namer   r   	impl_metadoctagsZrngprim_defZprim_packetZprimpr   r   r   register_rng_prim   s"       

r#   shapec                 C   s   t tjdtjdS )Nr   dtype)r   Z
TensorLiker   Ztensorint64r$   r   r   r   philox_rand_offset_meta3   s    r)   c                 C   s   d}| D ]}||9 }qt j|t jd}d}d}d}t jt j }|j| }|| d | }	t|	|j| }	|d ||	 |  d | }
|
S )N   r&         )	r   Zscalar_tensorr(   cudaZget_device_propertiesZcurrent_deviceZmax_threads_per_multi_processorminZmulti_processor_count)r%   Znumel_scalarZdim_sizeZnumel
block_sizeZunrollZcurand4_engine_callsZdevice_propertyZblocks_per_smZ	grid_sizeoffsetr   r   r   philox_rand_offset9   s    

r1   c                  C   sz   d} d}t jt jt jtttdf  ttddd}t jt jt jtttdf  ttddd}t| |||d	t j	j
fd
 d S )NZphilox_randz{(SymInt[] size, Tensor seed, Tensor offset, int[]? stride, Device? device=None, ScalarType? dtype=None) -> (Tensor, Tensor).)r%   seedr0   strider   r'   c                 S   s6   |d kst t| }tj| |||d}t| }||fS )N)r%   stridesr'   r   )AssertionErrorr	   r   Z
TensorMetar)   )r%   r2   r0   r3   r   r'   random_valuesr   r   r   _philox_rand_metaU   s    	   z/register_philox_rand.<locals>._philox_rand_metac              	   S   st   |d kst |jdkrg }n|g}|jdkr4t|tj|" t|| tj| ||d}W 5 Q R X |t	| fS )Ncpur-   )r   r'   )
r5   r   r   r   randomZfork_rngr   Zset_torch_state_tensorZrandr1   )r%   r2   r0   r3   r   r'   devicesr6   r   r   r   _philox_randf   s    	

z*register_philox_rand.<locals>._philox_randz$Philox based stateless rand operator)r   r   r   r   r    r!   )r   SizeTensorr   r   intr   r   r#   TagZnondeterministic_seeded)r   r   r7   r;   r   r   r   register_philox_randQ   s0    r@   c                 C   sl   | dr.| d}t|tr(t|}|jS dd | D }tdd |D rRdS tdd |D rhdS d S )	Nr   c                 S   s    h | ]}t |tjr|jjqS r   )
isinstancer   r=   r   r   ).0argr   r   r   	<setcomp>   s      zget_device.<locals>.<setcomp>c                 s   s   | ]}|d kV  qdS )r-   Nr   rB   devr   r   r   	<genexpr>   s     zget_device.<locals>.<genexpr>r-   c                 s   s   | ]}|d kV  qdS )r8   Nr   rE   r   r   r   rG      s     r8   )getrA   strr   r   r   any)argskwargsr   r:   r   r   r   
get_device   s    



rM   c                     s   t dtjtdd tjdd tjdd tjfdd	 t fd
d} t	 fdd}S )Nrun_and_save_rng_stateTZdeferred_errorc                 _   s   t j | ||fS N)r   r-   get_rng_stateoprK   rL   r   r   r   	impl_cuda   s    z5register_run_and_save_rng_state_op.<locals>.impl_cudac                 _   s   t  | ||fS rP   )r   rQ   rR   r   r   r   impl_cpu   s    z4register_run_and_save_rng_state_op.<locals>.impl_cpuc                    sB    d}t ||}||ks*td| || }|| f||S N)r-   r8   zBackend not supported for rM   r5   )rS   rK   rL   impl_mapr   implrU   rT   r   r   impl_backend_select   s
    

z?register_run_and_save_rng_state_op.<locals>.impl_backend_selectc              
      s,   |   |f||W  5 Q R  S Q R X d S rP   r   )moderS   rK   rL   )r[   r   r   impl_fake_tensor_mode   s    zAregister_run_and_save_rng_state_op.<locals>.impl_fake_tensor_modec                    st   | j r` |f||}t| jj|f|}t| jj|}| jd||}t||d | jdS |f||S d S NZcall_function)Zconstanttracer)enable_tracingpytreetree_mapr_   unwrap_proxycreate_proxyr   )r\   rS   rK   rL   out
proxy_argsproxy_kwargs	out_proxy)r[   rN   r   r   impl_proxy_dispatch_mode   s       zDregister_run_and_save_rng_state_op.<locals>.impl_proxy_dispatch_mode)
r   py_implr   Autogradr   CUDACPUBackendSelectr
   r   )r]   ri   r   )r[   rU   rT   rN   r   "register_run_and_save_rng_state_op   s    






ro   c                     s   t dtjtdd tjdd tjdd  tfdd	} tj fd
d}t	dd }S )Nrun_with_rng_stateTrO   c                 _   s4   t j }t j|   |||}t j| |S rP   )r   r-   rQ   set_rng_stater8   	rng_staterS   rK   rL   Zcurrent_statere   r   r   r   rT      s
    

z1register_run_with_rng_state_op.<locals>.impl_cudac                 _   s*   t  }t |  |||}t | |S rP   )r   rQ   rq   rr   r   r   r   rU      s
    


z0register_run_with_rng_state_op.<locals>.impl_cpuc           	   	      s   | j rvt   ||f||}W 5 Q R X t| jj||f|}t| jj|}| jd ||}t||d | jdS  ||f||S d S r^   )r`   r   ra   rb   r_   rc   rd   r   )	r\   rs   rS   rK   rL   re   rf   rg   rh   )rp   r   r   ri      s      
   z@register_run_with_rng_state_op.<locals>.impl_proxy_dispatch_modec                    sD    d}t ||}||ks*td| || }|| |f||S rV   rW   )rs   rS   rK   rL   rX   r   rY   rZ   r   r   r[      s
    

z;register_run_with_rng_state_op.<locals>.impl_backend_selectc              
   _   s&   |  |||W  5 Q R  S Q R X d S rP   r   )r\   rs   rS   rK   rL   r   r   r   r]      s    z=register_run_with_rng_state_op.<locals>.impl_fake_tensor_mode)
r   rj   r   rk   r   rl   rm   r   rn   r
   )ri   r[   r]   r   )rU   rT   rp   r   register_run_with_rng_state_op   s    







rt   c                   C   s
   t   d S rP   )r@   r   r   r   r   register_rng_prims  s    ru   )N)'typingr   r   r   Ztorch.utils._pytreeutilsZ_pytreera   r   Ztorch._Cr   Ztorch._higher_order_ops.utilsr   Z
torch._opsr   Ztorch._prims_commonr   r	   Ztorch._subclasses.fake_tensorr
   Z"torch.fx.experimental.proxy_tensorr   r   r   Ztorch.typesr   r   r   r#   r<   r)   r1   r@   rM   ro   rt   rN   rp   ru   r   r   r   r   <module>   s.   
7-9