U
    Mh-                  
   @   s  U d Z ddlZddlZddlZddlmZmZmZmZm	Z	 ddl
Z
ddlZddlZej Zeolej dkZer|edndZeredd Znedd Zed	d Zed
d Zedd Zedd Zedd Zedd Zedd Zedd Zdd Zedd Zedd Zdd Z dd Z!edd Z"e#e$d< edd Z%e#e$d< edd Z&e#e$d< edd Z'e#e$d < eoe Z(e#e$d!< ed"d Z)e#e$d#< erzddl*Z+e+j, Z-W n, e.k
r Z/ zd$Z-d$ZW 5 dZ/[/X Y nX nd$Z-d$a0d%d& Z1d'd( Z2ej3d)d* Z4ej3dAd,d-Z5dBd.d/Z6d0d1 Z7d2d3 Z8d4d5 Z9d6d7 Z:d8d9 Z;d:d; Z<e; Z=e< Z>d<ej?j@dfd=d>ZAd<ejBej?j@dfd?d@ZCesej rtDdS )Cz>This file is allowed to initialize CUDA context when imported.    N)LazyVal
TEST_NUMBATEST_WITH_ROCM	TEST_CUDA
IS_WINDOWS   zcuda:0c                   C   s   t S N)r    r	   r	   U/var/www/html/venv/lib/python3.8/site-packages/torch/testing/_internal/common_cuda.py<lambda>       r   c                   C   s   t otjjtjdtdS )N      ?device)r   torchbackendscudnnZis_acceptableZtensorCUDA_DEVICEr	   r	   r	   r
   r      r   c                   C   s   t rtjj S dS )Nr   )
TEST_CUDNNr   r   r   versionr	   r	   r	   r
   r      r   c                   C   s   t j ot j dkS )N)      r   cudais_availableZget_device_capabilityr	   r	   r	   r
   r      r   c                   C   s   t j ot j dkS )N)   r   r   r	   r	   r	   r
   r      r   c                   C   s   t j ot j dkS )N)   r   r   r	   r	   r	   r
   r      r   c                   C   s   t j ot j dkS )N)r   r   r   r	   r	   r	   r
   r      r   c                   C   s   t j ot j dkS )N)   r   r   r	   r	   r	   r
   r      r   c                   C   s   t j ot j dkS )N)	   r   r   r	   r	   r	   r
   r       r   c                   C   s   t j ot j dkS )N))r   r   )r   r   r   r	   r	   r	   r
   r   "   r   c                 C   s2   t j sdS t jdj}tjd|}|| kS )NFr   Z/PYTORCH_DEBUG_FLASH_ATTENTION_GCN_ARCH_OVERRIDE)r   r   r   get_device_propertiesZgcnArchNameosenvironget)Zmatching_archZgcn_arch_namearchr	   r	   r
   evaluate_gfx_arch_exact$   s
    
r$   c                   C   s   t dS )Ngfx90a:sramecc+:xnack-r$   r	   r	   r	   r
   r   +   r   c                   C   s   t dS )Ngfx942:sramecc+:xnack-r&   r	   r	   r	   r
   r   ,   r   c                   C   s&   t rtdptdS tr"t o tS dS )Nr%   r'   F)r   r$   r   r   SM80OrLaterr	   r	   r	   r
   *evaluate_platform_supports_flash_attention.   s
    
r)   c                   C   s    t rtdptdS trdS dS )Nr%   r'   TF)r   r$   r   r	   r	   r	   r
   .evaluate_platform_supports_efficient_attention5   s
    r*   c                   C   s   t  S r   )r)   r	   r	   r	   r
   r   <   r   !PLATFORM_SUPPORTS_FLASH_ATTENTIONc                   C   s   t  S r   )r*   r	   r	   r	   r
   r   =   r   #PLATFORM_SUPPORTS_MEM_EFF_ATTENTIONc                   C   s   t ot otjj S r   )r   r   r   r   r   Zcudnn_sdp_enabledr	   r	   r	   r
   r   ?   s   
!PLATFORM_SUPPORTS_CUDNN_ATTENTIONc                   C   s   t ptS r   )r+   r,   r	   r	   r	   r
   r   B   r   !PLATFORM_SUPPORTS_FUSED_ATTENTIONPLATFORM_SUPPORTS_FUSED_SDPAc                   C   s   t otS r   )r   r(   r	   r	   r	   r
   r   F   r   PLATFORM_SUPPORTS_BF16Fc                  C   s@   t stdts<ttj D ]} tjdd|  d qdad S )Nz?CUDA must be available when calling initialize_cuda_context_rng   zcuda:r   T)r   AssertionError__cuda_ctx_rng_initializedranger   r   device_countrandn)ir	   r	   r
   initialize_cuda_context_rngX   s
    r8   c                   C   sX   t j rt jjd krdS t jt j jdk r6dS tt jjdd dk rTdS dS )NFr   .r      T)	r   r   r   r   r   Zcurrent_devicemajorintsplitr	   r	   r	   r
   tf32_is_not_fp32e   s    r>   c               	   c   sX   t jjjj} z8dt jjj_t jjjd d d dd d V  W 5 Q R X W 5 | t jjj_X d S )NFZenabledZ	benchmarkZdeterministic
allow_tf32)r   r   r   matmulr@   r   flags)old_allow_tf32_matmulr	   r	   r
   tf32_offo   s    rD   h㈵>c              	   c   sj   t jjjj}| j}z>dt jjj_|| _t jjjd d d dd d V  W 5 Q R X W 5 |t jjj_|| _X d S )NTr?   )r   r   r   rA   r@   	precisionr   rB   )selftf32_precisionrC   Zold_precisionr	   r	   r
   tf32_onz   s    rI   c                    s&   dd  fddfdd}|S )Nc              	   S   s   t   |  W 5 Q R X d S r   rD   rG   Zfunction_callr	   r	   r
   with_tf32_disabled   s    z+tf32_on_and_off.<locals>.with_tf32_disabledc              	      s    t |   |  W 5 Q R X d S r   )rI   rK   )rH   r	   r
   with_tf32_enabled   s    z*tf32_on_and_off.<locals>.with_tf32_enabledc                    s8   t j}t|  t fdd}|S )Nc                     s   t | D ]\}}| |< q
t }d krB|o@t d jdk}d krb|o` d tjtjhk}|r d  fdd  d  fdd n
f   d S )Nr   r   dtyperG   c                      s
    f S r   r	   r	   fkwargsr	   r
   r      r   zCtf32_on_and_off.<locals>.wrapper.<locals>.wrapped.<locals>.<lambda>c                      s
    f S r   r	   r	   rO   r	   r
   r      r   )zipr>   r   r   typeZfloat32Z	complex64)argsrQ   kvZcond)	arg_namesrP   rL   rM   )rQ   r
   wrapped   s    
z1tf32_on_and_off.<locals>.wrapper.<locals>.wrapped)inspect	signature
parameterstuplekeys	functoolswraps)rP   paramsrX   )rL   rM   )rW   rP   r
   wrapper   s
    z tf32_on_and_off.<locals>.wrapperr	   )rH   ra   r	   )rH   rL   rM   r
   tf32_on_and_off   s    rb   c                    s   t   fdd}|S )Nc               
      s(   t    | |W  5 Q R  S Q R X d S r   rJ   )rT   rQ   rP   r	   r
   rX      s    zwith_tf32_off.<locals>.wrapped)r^   r_   )rP   rX   r	   rc   r
   with_tf32_off   s    rd   c                  C   s^   dt j krdS t j d} t j | td d  dd }tdd |dD S )	NZMagmar   r   zMagma 
r   c                 s   s   | ]}t |V  qd S r   r<   .0xr	   r	   r
   	<genexpr>   s     z%_get_magma_version.<locals>.<genexpr>r9   )r   Z
__config__showfindlenr=   r\   )positionversion_strr	   r	   r
   _get_magma_version   s
    $rq   c                  C   s4   t jjd krdS tt jj} tdd | dD S )Nre   c                 s   s   | ]}t |V  qd S r   rg   rh   r	   r	   r
   rk      s     z*_get_torch_cuda_version.<locals>.<genexpr>r9   )r   r   r   strr\   r=   )Zcuda_versionr	   r	   r
   _get_torch_cuda_version   s    rs   c                  C   s:   t sdS ttjj} | dd } tdd | dD S )Nre   -r   c                 s   s   | ]}t |V  qd S r   rg   rh   r	   r	   r
   rk      s     z*_get_torch_rocm_version.<locals>.<genexpr>r9   r   rr   r   r   Zhipr=   r\   )rocm_versionr	   r	   r
   _get_torch_rocm_version   s
    rw   c                   C   s   t  S r   )r   r	   r	   r	   r
   !_check_cusparse_generic_available   s    rx   c                  C   sL   t sdS ttjj} | dd } tdd | dD }|d kpH|dk  S )NFrt   r   c                 s   s   | ]}t |V  qd S r   rg   rh   r	   r	   r
   rk      s     z5_check_hipsparse_generic_available.<locals>.<genexpr>r9   )r   r1   ru   )rv   Zrocm_version_tupler	   r	   r
   "_check_hipsparse_generic_available   s    ry   r   c           
   	   C   s   t jt jddt jddj| d}t jt jddt jddj| d}t  , t| | D ]\}}|| qpW 5 Q R X ddi}|d k	r|	| || f|}|| f|}	||||	fS )Nr   r   lrr   )
r   nnZ
SequentialZLineartoZno_gradrR   r[   Zcopy_update)
r   optimizer_ctoroptimizer_kwargsZmod_controlZmod_scalingcsrQ   Zopt_controlZopt_scalingr	   r	   r
   !_create_scaling_models_optimizers   s    **

r   c              	   C   s   t jd|| dt jd|| dft jd|| dt jd|| dft jd|| dt jd|| dft jd|| dt jd|| dfg}t j | }d}t| ||d|||f S )N)r   r   )rN   r   r   )r   r~   r   )r   r6   r{   ZMSELossr|   r   )r   rN   r~   r   dataZloss_fnZ	skip_iterr	   r	   r
   _create_scaling_case  s      r   )rE   )rE   )E__doc__r^   r   Z
torch.cudaZ$torch.testing._internal.common_utilsr   r   r   r   r   rY   
contextlibr    r   Zis_initializedZ"CUDA_ALREADY_INITIALIZED_ON_IMPORTr5   ZTEST_MULTIGPUr   r   r   ZTEST_CUDNN_VERSIONZSM53OrLaterZSM60OrLaterZSM70OrLaterZSM75OrLaterr(   ZSM90OrLaterZ	IS_JETSONr$   ZGFX90A_ExactZGFX942_Exactr)   r*   r+   bool__annotations__r,   r-   r.   r/   r0   Z
numba.cudaZnumbar   ZTEST_NUMBA_CUDA	Exceptioner3   r8   r>   contextmanagerrD   rI   rb   rd   rq   rs   rw   rx   ry   ZTEST_CUSPARSE_GENERICZTEST_HIPSPARSE_GENERICZoptimZSGDr   floatr   r2   r	   r	   r	   r
   <module>   sx   



)
%
