U
    yh                  o   @   s
  U d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
mZmZmZmZmZmZmZ ddlZddlZddlmZ ddlmZ ddlmZmZmZ d	d
lmZ d	dlmZm Z m!Z!m"Z"m#Z# d	dl$m%Z%m&Z&m'Z' zddlm(Z( W n e)k
r   dZ(Y nX da*e+ Z,e- Z.g a/eeeg df ee0 f  e1d< e2ej3ddd Z4eee0e5df Z6dZ7dZ8zTzddl9Z9dZ7W n e:k
r   Y nX zddl;Z;dZ7W n e:k
r   Y nX W n( e)k
r Z< ze<Z8W 5 dZ<[<X Y nX e a=e>ej3drej3j?Z?nedZ?e>ej3dr0ej3j@ZAne5e5dddZAe>ej3drXej3jBZCne5e5dddZCdZDeEe1d< ej3jFZGeEe1d< dZHeej3jI e1d< eEd d!d"ZJeEd d#d$ZKeEd d%d&ZLdeEd'd(d)ZMe	d*d+e6dd,d-ZNd.d/ ZOe0d0d1d2ZPd3d4 ZQd5d6 ZRd7d8 ZSd9d: ZTeTeQ eTeR G d;d< d<eUZVej3jWZWd=d> ZXd?d@ ZYdAdB ZZG dCdD dDZ[G dEdF dFe\Z]e5ddGdHdIZ^G dJdK dKZ_G dLdM dMZG dNdO dOeZ`e6dddPdQZadee6 e0ddRdSZbdee6 ee5e5f ddTdUZce6e?ddVdWZde6e6eEdXdYdZZeG d[d\ d\Zfed] efd^d_d`Zgdadb Zhe'dcdddeZieee5 ee0 f d dfdgZje5d dhdiZke5d djdkZleee0  d dldmZmeee0  d dndoZnee0 ee0 ee5 dpdqdrZoe5d dsdtZpe5d dudvZqeee5ef  e5ddwdxZrdasee5 e1dy< e5d dzd{Ztee0 d d|d}Zue0d d~dZve5d ddZwde6ddddZxdd Zydee6 e'dddZzdee6 e'dddZ{dd Z|ee5e0f ddddZ}e5d ddZ~deeee5f  dddZdeeee5f  dddZeee5ef  e5dddZdeeee5f  e5dddZd eeee5f  e5dddZd!eeee5f  e5dddZd"eeee5f  e5dddZd#eeee5f  e5dddZd$eeee5f  e5dddZd%eeee5f  e5dddZd&eeee5f  e5dddZd'eeee5f  e5dddZd(eeee5f  e5dddZee5e0ejf ejdddZejej3jIdddZd)e5ee5e0ejf ddddZd*ee5e0ejf e5dddZd	dlT d	dlT edd ZG dd dZddlmZmZ G dd deZG dd deZG dd deZG dd deZG ddĄ deZG ddƄ deZG ddȄ deZG ddʄ deZG dd̄ deZG dd΄ deZG ddЄ deZG dd҄ deZG ddԄ deZ[[eje eje eje eje eje eje eje eje eje eje eje eje G ddք dփZdd؄ ZeTe d	dlmZmZmZmZmZmZ ddddddddddddddddddddddddFd<dddd\ddddZdIdDdBddddddMd{dOddddd}dUdSdWdddddddddd>ddd&d)dd8ddddddddddd ddddddddddddd	d
dddddddQddddeddd`ddddgoZdS (+  aM  
This package adds support for CUDA tensor types.

It implements the same function as CPU tensors, but they utilize
GPUs for computation.

It is lazily initialized, so you can always import it, and use
:func:`is_available()` to determine if your system supports CUDA.

:ref:`cuda-semantics` has more details about working with CUDA.
    N)	lru_cache)AnyCallablecastListOptionalTupleUnion)Device   device)_dummy_type_LazySeedTrackerclassproperty   )_get_device_index)	CUDAGraphgraphgraph_pool_handleis_current_stream_capturingmake_graphed_callables)EventExternalStreamStream)_cudartF_queued_callsZ_cuda_isInBadForkc                   C   s   dS NF r   r   r   E/var/www/html/venv/lib/python3.8/site-packages/torch/cuda/__init__.py<lambda>3       r    T_CudaDeviceProperties_cuda_exchangeDevice)r   returnc                 C   s   | dk rdS t dd S Nr   z)PyTorch was compiled without CUDA supportRuntimeErrorr   r   r   r   _exchange_deviceT   s    r)   _cuda_maybeExchangeDevicec                 C   s   | dk rdS t dd S r%   r'   r   r   r   r   _maybe_exchange_device^   s    r+   has_half	has_magmar   default_generators)r$   c                   C   s   t tjdS )z)Return true if compile with CUDA support._cuda_getDeviceCount)hasattrtorch_Cr   r   r   r   _is_compiledj   s    r3   c                   C   s   t ddkS )NZPYTORCH_NVML_BASED_CUDA_CHECK1)osgetenvr   r   r   r   _nvml_based_availo   s    r7   c                   C   s,   t  s
dS t rt dkS tj dkS dS )z8Return a bool indicating if CUDA is currently available.Fr   N)r3   r7   device_countr1   r2   r/   r   r   r   r   is_availables   s
    
r9   )including_emulationc                 C   sb   t jjrdS t j }t jj}|dk	rRt|dd dkrRt j|jdkrRdS | sZdS t	|S )zQReturn a bool indicating if the current CUDA/ROCm device supports dtype bfloat16.TN.r         F)
r1   versionhipcudacurrent_deviceintsplitget_device_propertiesmajor_check_bf16_tensor_supported)r:   r   Zcuda_versionr   r   r   is_bf16_supported   s    
rG      )maxsizec                 C   s6   zt jdgt j| d W dS  tk
r0   Y dS X d S )Ng      ?)dtyper   TF)r1   Ztensorbfloat16	Exceptionr   r   r   r   rF      s
    rF   c                 C   s   t j|  d S N)r1   r2   Z_cuda_sleep)Zcyclesr   r   r   _sleep   s    rN   )arch_stringc                 C   s,   |  dd }|dr$|dd }t|S )z4Extracts the architecture string from a CUDA version_r   aNr&   )rC   endswithrB   )rO   baser   r   r   _extract_arch_version   s    
rT   c            
      C   s   d} d}t jjd k	rt j }tt D ]v}t|}|d }|d }t|}|d | }t	dd t j
 D dd	}	||	k r(t||||||	d |	d f  q(d S )
Nz
    Found GPU%d %s which requires CUDA_VERSION >= %d to
     work properly, but your PyTorch was compiled
     with CUDA_VERSION %d. Please install the correct PyTorch binary
     using instructions from https://pytorch.org
    z
    Found GPU%d %s which is of cuda capability %d.%d.
    PyTorch no longer supports this GPU because it is too old.
    The minimum cuda capability supported by this library is %d.%d.
    r   r   
   c                 s   s   | ]}t |V  qd S rM   rT   .0archr   r   r   	<genexpr>   s     z$_check_capability.<locals>.<genexpr>#   )default)r1   r>   r@   r2   Z_cuda_getCompiledVersionranger8   get_device_capabilityget_device_nameminget_arch_listwarningswarn)
Zincorrect_binary_warnZold_gpu_warnZCUDA_VERSIONd
capabilityrE   minornameZcurrent_archZmin_archr   r   r   _check_capability   s(    
rh   c               
      s   d} t jjd krd S t }t|dkr*d S dd |D }tt D ]Z}t|\ }t fdd|D }|sBt	|} d | }t
| ||d|| qBd S )	Na	  
{} with CUDA capability sm_{} is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities {}.
If you want to use the {} GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
r   c                 S   s   g | ]}d |krt |qS )Zsm_rV   rW   r   r   r   
<listcomp>   s      z!_check_cubins.<locals>.<listcomp>c                 3   s   | ]}|d   kV  qdS )rU   Nr   )rX   smZ	cap_majorr   r   rZ      s     z _check_cubins.<locals>.<genexpr>rU    )r1   r>   r@   ra   lenr]   r8   r^   anyr_   rb   rc   formatjoin)Zincompatible_device_warn	arch_listZsupported_smidxZ	cap_minor	supportedZdevice_namere   r   rk   r   _check_cubins   s*       rt   c                   C   s   t o
t  S )z9Return whether PyTorch's CUDA state has been initialized.)_initialized_is_in_bad_forkr   r   r   r   is_initialized   s    rw   c                 K   s`   t  r|   nN|ddr,t| t  n0|ddrJt| t  nt| t f d S )Nseed_allFseed)	rw   get_lazy_seed_trackerZqueue_seed_all	tracebackformat_stackZ
queue_seedr   append)callablekwargsr   r   r   
_lazy_call   s    r   c                   @   s   e Zd ZdS )DeferredCudaCallErrorN)__name__
__module____qualname__r   r   r   r   r     s   r   c                   C   s
   t   dS )a  Initialize PyTorch's CUDA state.

    You may need to call this explicitly if you are interacting with
    PyTorch via its C API, as Python bindings for CUDA functionality
    will not be available until this initialization takes place.
    Ordinary users should not need this, as all of PyTorch's CUDA methods
    automatically initialize CUDA state on-demand.

    Does nothing if the CUDA state is already initialized.
    N)
_lazy_initr   r   r   r   init  s    r   c                  C   s(  t  sttdrd S t t  r0W 5 Q R  d S t r>tdttjdsRtdt	d krbtddt
jkrvdt
jd< tj  dt_t D ]} | rt|  qzftD ]\\}}z
|  W q tk
r } z(d	t| d
d| }t||W 5 d }~X Y qX qW 5 ttd X daW 5 Q R X d S )Nis_initializingzwCannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start methodr/   z$Torch not compiled with CUDA enabledzGlibcudart functions unavailable. It looks like you have a broken build?ZCUDA_MODULE_LOADINGZLAZYTz6CUDA call failed lazily at initialization with error: z(

CUDA call was originally invoked at:

 )rw   r0   _tls_initialization_lockrv   r(   r1   r2   AssertionErrorr   r5   environZ
_cuda_initr   r{   Z	get_callsr   r~   delattrrL   strrp   r   ru   )ZcallsZqueued_callZorig_tracebackemsgr   r   r   r     s@    



"r   c                   C   s
   t   tS rM   )r   r   r   r   r   r   cudartS  s    r   c                   @   s&   e Zd ZU dZeed< dZeed< dS )
cudaStatusr   SUCCESS"   ERROR_NOT_READYN)r   r   r   r   rB   __annotations__r   r   r   r   r   r   X  s   
r   c                       s$   e Zd Zedd fddZ  ZS )	CudaErrorN)coder$   c                    s,   t t |}t | d| d d S )Nz ())r   ZcudaGetErrorString	cudaErrorsuper__init__)selfr   r   	__class__r   r   r   ^  s    zCudaError.__init__)r   r   r   rB   r   __classcell__r   r   r   r   r   ]  s   r   )resr$   c                 C   s   | t jjkrt| d S rM   )r   r   successr   )r   r   r   r   check_errorc  s    r   c                   @   s4   e Zd ZedddZdd ZeeedddZd	S )
_DeviceGuard)indexc                 C   s   || _ d| _d S Nr&   )rr   prev_idx)r   r   r   r   r   r   i  s    z_DeviceGuard.__init__c                 C   s   t j| j| _d S rM   r1   r@   r)   rr   r   r   r   r   r   	__enter__m  s    z_DeviceGuard.__enter__typevaluer|   c                 C   s   t j| j| _dS r   r1   r@   r+   r   rr   r   r   r   r|   r   r   r   __exit__p  s    z_DeviceGuard.__exit__N)r   r   r   rB   r   r   r   r   r   r   r   r   r   h  s   r   c                   @   s8   e Zd ZdZedddZdd Zeeeddd	Zd
S )r   zContext-manager that changes the selected device.

    Args:
        device (torch.device or int): device index to select. It's a no-op if
            this argument is a negative integer or ``None``.
    r   c                 C   s   t |dd| _d| _d S )NToptionalr&   )r   rr   r   )r   r   r   r   r   r   }  s    zdevice.__init__c                 C   s   t j| j| _d S rM   r   r   r   r   r   r     s    zdevice.__enter__r   c                 C   s   t j| j| _dS r   r   r   r   r   r   r     s    zdevice.__exit__N)r   r   r   __doc__r   r   r   r   r   r   r   r   r   u  s   r   c                       s    e Zd ZdZ fddZ  ZS )	device_ofa  Context-manager that changes the current device to that of given object.

    You can use both tensors and storages as arguments. If a given object is
    not allocated on a GPU, this is a no-op.

    Args:
        obj (Tensor or Storage): object allocated on the selected device.
    c                    s"   |j r| nd}t | d S r   )is_cuda
get_devicer   r   )r   objrr   r   r   r   r     s    zdevice_of.__init__)r   r   r   r   r   r   r   r   r   r   r     s   	r   c                 C   s    t | } | dkrtj|  dS )a=  Set the current device.

    Usage of this function is discouraged in favor of :any:`device`. In most
    cases it's better to use ``CUDA_VISIBLE_DEVICES`` environmental variable.

    Args:
        device (torch.device or int): selected device. This function is a no-op
            if this argument is negative.
    r   N)r   r1   r2   Z_cuda_setDevicer   r   r   r   
set_device  s    
r   c                 C   s
   t | jS )a  Get the name of a device.

    Args:
        device (torch.device or int or str, optional): device for which to return the
            name. This function is a no-op if this argument is a negative
            integer. It uses the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Returns:
        str: the name of the device
    )rD   rg   r   r   r   r   r_     s    r_   c                 C   s   t | }|j|jfS )a  Get the cuda capability of a device.

    Args:
        device (torch.device or int or str, optional): device for which to return the
            device capability. This function is a no-op if this argument is
            a negative integer. It uses the current device, given by
            :func:`~torch.cuda.current_device`, if :attr:`device` is ``None``
            (default).

    Returns:
        tuple(int, int): the major and minor cuda capability of the device
    )rD   rE   rf   )r   propr   r   r   r^     s    r^   c                 C   s4   t   t| dd} | dk s$| t kr,tdt| S )zGet the properties of a device.

    Args:
        device (torch.device or int or str): device for which to return the
            properties of the device.

    Returns:
        _CudaDeviceProperties: the properties of the device
    Tr   r   Invalid device id)r   r   r8   r   Z_get_device_propertiesr   r   r   r   rD     s
    
rD   )r   peer_devicer$   c                 C   s\   t   t| dd} t|}| dk s,| t kr4td|dk sF|t krNtdtj| |S )z5Check if peer access between two devices is possible.Tr   r   r   zInvalid peer device id)r   r   r8   r   r1   r2   Z_cuda_canDeviceAccessPeer)r   r   r   r   r   can_device_access_peer  s    r   c                   @   sJ   e Zd ZU dZed ed< ed dddZdd Zeeed	d
dZ	dS )StreamContexta  Context-manager that selects a given stream.

    All CUDA kernels queued within its context will be enqueued on a selected
    stream.

    Args:
        Stream (Stream): selected stream. This manager is a no-op if it's
            ``None``.
    .. note:: Streams are per-device.
    torch.cuda.Stream
cur_streamstreamc                 C   sh   || _ td d| _tj s,| jd kr,d| _tj s:d n
tjd | _tj sVd n
tjd | _	d S )NTr&   )
r   r   rr   r1   ZjitZis_scriptingr@   default_streamsrc_prev_streamdst_prev_stream)r   r   r   r   r   r     s    

zStreamContext.__init__c              	   C   sn   | j }|d ks| jdkrd S tjd | _| jj|jkr^t|j tj|j| _W 5 Q R X tj| d S r   )	r   rr   r1   r@   current_streamr   r   r   
set_stream)r   r   r   r   r   r     s    zStreamContext.__enter__r   c                 C   sJ   | j }|d ks| jdkrd S | jj|jkr8tj| j tj| j d S r   )r   rr   r   r   r1   r@   r   r   )r   r   r   r|   r   r   r   r   r     s    zStreamContext.__exit__N)
r   r   r   r   r   r   r   r   r   r   r   r   r   r   r     s
   

r   r   )r   r$   c                 C   s   t | S )aM  Wrap around the Context-manager StreamContext that selects a given stream.

    Arguments:
        stream (Stream): selected stream. This manager is a no-op if it's
            ``None``.
    ..Note:: In eager mode stream is of type Stream class while in JIT it is
    an object of the custom class ``torch.classes.cuda.Stream``.
    )r   r   r   r   r   r     s    	r   c                 C   s   t jj| ||d dS )zset stream specified by the stream id, device index and
        device type

    Args: stream_id (int): stream id in stream pool
          device_index (int): device index in topo
          device_type (int): enum device type
    	stream_iddevice_indexdevice_typeN)r1   r2   Z_cuda_setStreamr   r   r   r   _set_stream_by_id(  s
    r   r   c                 C   s$   | dkrdS t | j| j| jd dS )a  Set the current stream.This is a wrapper API to set the stream.
        Usage of this function is discouraged in favor of the ``stream``
        context manager.

    Args:
        stream (Stream): selected stream. This function is a no-op
            if this argument is ``None``.
    Nr   )r   r   r   r   r   r   r   r   r   7  s    	r   c                  C   s   t tjjsdnd} | dkr*ttdS ttddd}ttt	t dd	d
}| 
drd|| dS | 
drx|| dS g }| dD ]@}|| }||krtt	t g   S |dk r q|| q|S )z0Parse CUDA_VISIBLE_DEVICES environment variable.ZCUDA_VISIBLE_DEVICESZHIP_VISIBLE_DEVICESN@   )sr$   c                 S   sj   | sdS t | D ]<\}}| s4|dkr0|dks4 qN|d t| kr|d7 }q|dkrft| d| S dS )z:Return -1 or positive integer sequence string starts with.r&   r   z+-r   N)	enumerateisdigitrm   rB   )r   rr   cr   r   r   _strtoulQ  s    
z(_parse_visible_devices.<locals>._strtoul)lstprefixr$   c                 S   sJ   g }|  dD ]6}||kr,ttt g   S ||s: qF|| q|S )N,)rC   r   r   r   
startswithr~   )r   r   rcselemr   r   r   parse_list_with_prefix\  s    
z6_parse_visible_devices.<locals>.parse_list_with_prefixzGPU-MIG-r   r   )r5   r6   r1   r>   r?   listr]   r   rB   r   r   rC   stripr   r~   )varr   r   rcr   xr   r   r   _parse_visible_devicesI  s(    



r   c               
   C   sb   t sdS zt  W n< tjk
rP }  ztd| j  W Y dS d } ~ X Y nX t }t|S )Nr&   z&Can't initialize amdsmi - Error code: )	_HAS_PYNVMLamdsmiamdsmi_initAmdSmiExceptionrb   rc   Zerr_codeamdsmi_get_processor_handlesrm   )r   socket_handlesr   r   r   _raw_device_count_amdsmi{  s    r   c                  C   sn   ddl m} m}m} |d}| }|dkr:td dS |d}|| |}|dkrftd dS ~|jS )zgReturn number of devices as reported by NVML or negative value if NVML discovery/initialization failed.r   )byrefc_intCDLLlibnvidia-ml.so.1Can't initialize NVMLr&   Can't get nvml device count)	ctypesr   r   r   nvmlInitrb   rc   nvmlDeviceGetCount_v2r   )r   r   r   nvml_hr   	dev_countr   r   r   _raw_device_count_nvml  s    

r   c               	   C   s  ddl m} m}m}m}m} ts$d S zt  W n" tj	k
rR   t
d Y d S X zt }t|}W n" tj	k
r   t
d Y d S X g }t|D ]~}zt | }	W n$ tj	k
r   t
d Y  d S X zt|	}
W n& tj	k
r   t
d Y  d S X |t|
 q|S )Nr   r   r   c_void_pr   create_string_bufferzCan't initialize amdsmizCan't get amdsmi device countzCannot get amd device handlerzCannot get uuid for amd device)r   r   r   r   r   r   r   r   r   r   rb   rc   r   rm   r]   Zamdsmi_get_gpu_device_uuidr~   r   )r   r   r   r   r   r   r   uuidsrr   handleruuidr   r   r   _raw_device_uuid_amdsmi  s8    





r   c                  C   s   ddl m} m}m}m}m} |d}| }|dkrBtd dS |d}|	| |}|dkrntd dS g }t
|jD ]|}	| }
||	| |
}|dkrtd  dS d	}||}||
||}|dkrtd
  dS ||jdd q|~|S )z^Return list of device UUID as reported by NVML or None if NVM discovery/initialization failed.r   r   r   r   Nr&   r   zCan't get device handle`   zCan't get device UUIDascii )r   r   r   r   r   r   r   rb   rc   r   r]   r   ZnvmlDeviceGetHandleByIndex_v2ZnvmlDeviceGetUUIDr~   rawdecoder   )r   r   r   r   r   r   r   r   r   rr   Zdev_idZbuf_lenbufr   r   r   _raw_device_uuid_nvml  s6    



r  )
candidatesr   r$   c                 C   sb   t tt  tddd}g }| D ]>}|||}|dk r8 q^||krRttt g   S || q|S )zqGiven the set of partial uuids and list of known uuids builds a set of ordinals excluding ambiguous partials IDs.)	candidater   r$   c                 S   s8   d}t |D ]&\}}|| s q|dkr. dS |}q|S r   )r   r   )r  r   
best_matchrr   r   r   r   r   uuid_to_orinal  s    
z3_transform_uuid_to_ordinals.<locals>.uuid_to_orinalr   )r   r   rB   r   r~   )r  r   r  r   r  rr   r   r   r   _transform_uuid_to_ordinals  s    
r  c                  C   s   t  } | sdS zXt| d tkr&W dS t }|dkr:|W S t| D ] \}}tt||krB|  W S qBW n* tk
r|   Y dS  tk
r   Y dS X t	| S )Nr   r&   )
r   r   r   r   r   r   rB   OSErrorAttributeErrorrm   )visible_devicesraw_cntrr   valr   r   r   _device_count_amdsmi  s"    r  c                  C   s   t  } | sdS zt| d tkr^| d dr4W dS t }|dkrHW dS tttt | |} n>t }|dkrr|W S t	| D ] \}}tt
||krz|  W S qzW n* tk
r   Y dS  tk
r   Y dS X t| S )zReturn number of devices as reported by NVML taking CUDA_VISIBLE_DEVICES into account.

    Negative value is returned if NVML discovery or initialization has failed.
    r   r   r&   N)r   r   r   r   r  r  r   r   r   r   rB   r  r  rm   )r	  r   r
  rr   r  r   r   r   _device_count_nvml
  s2     r  c                 C   s   t | dd}t }t|d tkrLt }|dkr8tdtttt ||}ttt	 |}|dk sn|t
|krtd| d| d|| S )	zNReturn the NVML index of the device, taking CUDA_VISIBLE_DEVICES into account.Tr   r   NzCan't get device UUIDsdevice z& is not visible (CUDA_VISIBLE_DEVICES=r   )r   r   r   r   r  r(   r  r   r   rB   rm   )r   rr   r	  r   r   r   r   _get_nvml_device_index,  s      r  _cached_device_countc                  C   sL   t  s
dS tdk	rtS tjjr$t nt } | dk r<tj n| }t	rH|a|S )z$Return the number of GPUs available.r   N)
r3   r  r1   r>   r?   r  r  r2   r/   ru   )Z
nvml_countrr   r   r   r8   B  s    r8   c                  C   s(   t  s
g S tj } | dkr g S |  S )z=Return list CUDA architectures this library was compiled for.N)r9   r1   r2   Z_cuda_getArchFlagsrC   )Z
arch_flagsr   r   r   ra   T  s    
ra   c                  C   s8   t  } t| dkrdS dd | D }ddd |D S )z9Return NVCC gencode flags this library was compiled with.r   r   c                 S   s   g | ]}| d qS )rP   )rC   rW   r   r   r   ri   c  s     z%get_gencode_flags.<locals>.<listcomp>rl   c                 S   s&   g | ]\}}d | d| d| qS )z-gencode compute=compute_z,code=rP   r   )rX   kindrY   r   r   r   ri   e  s   )ra   rm   rp   )rq   Z
arch_list_r   r   r   get_gencode_flags^  s    r  c                   C   s   t   tj S )z0Return the index of a currently selected device.)r   r1   r2   Z_cuda_getDevicer   r   r   r   rA   l  s    rA   c              
   C   s4   t   tj|  tj W  5 Q R  S Q R X dS )a,  Wait for all kernels in all streams on a CUDA device to complete.

    Args:
        device (torch.device or int, optional): device for which to synchronize.
            It uses the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).
    N)r   r1   r@   r   r2   Z_cuda_synchronizer   r   r   r   synchronizer  s    r  c                   C   s   t   tj S )ax  Force collects GPU memory after it has been released by CUDA IPC.

    .. note::
        Checks if any sent CUDA tensors could be cleaned from the memory. Force
        closes shared memory file used for reference counting if there is no
        active counters. Useful when the producer process stopped actively sending
        tensors and want to release unused memory.
    )r   r1   r2   Z_cuda_ipc_collectr   r   r   r   ipc_collect  s    	r  c                 C   s4   t   tjt| dd}t|d |d |d dS )aS  Return the currently selected :class:`Stream` for a given device.

    Args:
        device (torch.device or int, optional): selected device. Returns
            the currently selected :class:`Stream` for the current device, given
            by :func:`~torch.cuda.current_device`, if :attr:`device` is ``None``
            (default).
    Tr   r   r   r   r   )r   r1   r2   Z_cuda_getCurrentStreamr   r   r   Z
streamdatar   r   r   r     s    	
  r   c                 C   s4   t   tjt| dd}t|d |d |d dS )a=  Return the default :class:`Stream` for a given device.

    Args:
        device (torch.device or int, optional): selected device. Returns
            the default :class:`Stream` for the current device, given by
            :func:`~torch.cuda.current_device`, if :attr:`device` is ``None``
            (default).
    Tr   r   r   r   r   )r   r1   r2   Z_cuda_getDefaultStreamr   r   r  r   r   r   r     s    	
  r   c                   C   s   t   tj S )z6Return cublasHandle_t pointer to current cuBLAS handle)r   r1   r2   Z_cuda_getCurrentBlasHandler   r   r   r   current_blas_handle  s    r  )
debug_moder$   c                 C   sR   t   t| trB| dkrd} n$| dkr,d} n| dkr:d} ntdtj|  dS )	a  Set the debug mode for cuda synchronizing operations.

    Args:
        debug_mode(str or int): if "default" or 0, don't error or warn on synchronizing operations,
            if "warn" or 1, warn on synchronizing operations, if "error" or 2, error out synchronizing operations.

    Warning:
        This is an experimental feature, and not all synchronizing operations will trigger warning or error. In
        particular, operations in torch.distributed and torch.sparse namespaces are not covered yet.
    r\   r   rc   r   errorr   zGinvalid value of debug_mode, expected one of `default`, `warn`, `error`N)r   
isinstancer   r(   r1   r2   Z_cuda_set_sync_debug_mode)r  r   r   r   set_sync_debug_mode  s    
r  c                   C   s   t   tj S )zEReturn current value of debug mode for cuda synchronizing operations.)r   r1   r2   Z_cuda_get_sync_debug_moder   r   r   r   get_sync_debug_mode  s    r  c              
   C   sj   t stdtddlm} zt  W n, |k
rR } ztd|W 5 d }~X Y nX t| } t| }|S )Nz=pynvml does not seem to be installed or it can't be imported.r   )NVMLError_DriverNotLoadedz-cuda driver can't be loaded, is cuda enabled?)	r   ModuleNotFoundError_PYNVML_ERRpynvmlr  r   r(   r  nvmlDeviceGetHandleByIndex)r   r  r   handler   r   r   _get_pynvml_handler  s    
r#  c              
   C   sb   t stdtzt  W n. tjk
rH } ztd|W 5 d }~X Y nX t| } t |  }|S )Nz=amdsmi does not seem to be installed or it can't be imported.z>amdsmi driver can't be loaded, requires >=ROCm5.6 installation)	r   r  r  r   r   r   r(   _get_amdsmi_device_indexr   )r   r   r"  r   r   r   _get_amdsmi_handler  s"    r%  c                 C   sf   t | dd}t }t|d tkr*tdttttt	 |}||kr^td| d| d|| S )zOReturn the amdsmi index of the device, taking HIP_VISIBLE_DEVICES into account.Tr   r   z5HIP_VISIBLE_DEVICES should be indices and not stringsr  z% is not visible (HIP_VISIBLE_DEVICES=r   )
r   r   r   r   r(   dictr   r   r   rB   )r   rr   r	  Zidx_mapr   r   r   r$    s    r$  c                 C   s   t  }t| } t|d S )NZ	vram_used)r%  r$  r   Zamdsmi_get_gpu_vram_usager   r"  r   r   r   _get_amdsmi_memory_usage  s    r(  c                 C   s(   t  }t| } t |  }t|d S )NZgfx_activity)r%  r$  r   r   Zamdsmi_get_gpu_activityr'  r   r   r   _get_amdsmi_utilization  s    r)  c                 C   s   t | }t|tjjtjjS rM   )r%  r   Zamdsmi_get_temp_metricZAmdSmiTemperatureTypeZJUNCTIONZAmdSmiTemperatureMetricZCURRENTr'  r   r   r   _get_amdsmi_temperature  s    r*  c                 C   s   t | }t|d S )NZcurrent_socket_power)r%  r   Zamdsmi_get_power_infor'  r   r   r   _get_amdsmi_power_draw  s    r+  c                 C   s   t | }t|tjjd S )NZcur_clk)r%  r   Zamdsmi_get_clock_infoZAmdSmiClkTypeZGFXr'  r   r   r   _get_amdsmi_clock_rate!  s    r,  c                 C   s8   t jjs,t }t| } t| }t|jS t	| S dS )a  Return the percent of time over the past sample period during which global (device)
    memory was being read or written as given by `nvidia-smi`.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Warning: Each sample period may be between 1 second and 1/6 second,
    depending on the product being queried.
    N)
r1   r>   r?   r#  r  r   r!  nvmlDeviceGetUtilizationRatesmemoryr(  r'  r   r   r   memory_usage&  s    
r/  c                 C   s:   t jjs.t| }t| } t| }t|jS t	| S dS )a  Return the percent of time over the past sample period during which one or
    more kernels was executing on the GPU as given by `nvidia-smi`.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Warning: Each sample period may be between 1 second and 1/6 second,
    depending on the product being queried.
    N)
r1   r>   r?   r#  r  r   r!  r-  Zgpur)  r'  r   r   r   utilization;  s    
r0  c                 C   s(   t jjst| }t|dS t| S dS )a	  Return the average temperature of the GPU sensor in Degrees C (Centigrades).

    The average temperature is computed based on past sample period as given by `nvidia-smi`.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Warning: Each sample period may be between 1 second and 1/6 second,
    depending on the product being queried.
    r   N)r1   r>   r?   r#  r   ZnvmlDeviceGetTemperaturer*  r'  r   r   r   temperatureP  s    r1  c                 C   s&   t jjst| }t|S t| S dS )a	  Return the average power draw of the GPU sensor in mW (MilliWatts)
        over the past sample period as given by `nvidia-smi` for Fermi or newer fully supported devices.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Warning: Each sample period may be between 1 second and 1/6 second,
    depending on the product being queried.
    N)r1   r>   r?   r#  r   ZnvmlDeviceGetPowerUsager+  r'  r   r   r   
power_drawe  s    
r2  c                 C   s(   t jjst| }t|dS t| S dS )a  Return the clock speed of the GPU SM in Hz Hertz over the past sample period as given by `nvidia-smi`.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Warning: Each sample period may be between 1 second and 1/6 second,
    depending on the product being queried.
    r   N)r1   r>   r?   r#  r   ZnvmlDeviceGetClockInfor,  r'  r   r   r   
clock_ratex  s    r3  c                 C   s0   t | trt| } nt | tr,td| } | S )zReturn the torch.device type object from the passed in device.

    Args:
        device (torch.device or int): selected device.
    r@   )r  r   r1   r   rB   r   r   r   r   _get_device  s
    

r4  c                 C   s    | j }|dkrt }tjj| S )zvReturn the CUDA Generator object for the given device.

    Args:
        device (torch.device): selected device.
    N)r   rA   r1   r@   r.   )r   rr   r   r   r   _get_generator  s    r5  r@   )offsetr   r$   c                    s"   t |  fdd}t| dS )a'  Set the random number generator state offset of the specified GPU.

    Args:
        offset (int): The desired offset
        device (torch.device or int, optional): The device to set the RNG state.
            Default: ``'cuda'`` (i.e., ``torch.device('cuda')``, the current CUDA device).
    c                     s   t  } |  d S rM   )r5  Z
set_offset)default_generatorfinal_devicer6  r   r   cb  s    z!_set_rng_state_offset.<locals>.cbN)r4  r   )r6  r   r:  r   r8  r   _set_rng_state_offset  s    
r;  c                 C   s   t   t| }t|}| S )aP  Return the random number generator state offset of the specified GPU.

    Args:
        device (torch.device or int, optional): The device to return the RNG state offset of.
            Default: ``'cuda'`` (i.e., ``torch.device('cuda')``, the current CUDA device).

    .. warning::
        This function eagerly initializes CUDA.
    )r   r4  r5  Z
get_offset)r   r9  r7  r   r   r   _get_rng_state_offset  s    
r<  )*c                 O   s   t   tt| j| f||S rM   )r   r   	_CudaBase__new__clsargsr   r   r   r   	_lazy_new  s    rC  c                       s(   e Zd ZdZdZ fddZeZ  ZS )r>  TFc              
      s2   t |   t j||W  5 Q R  S Q R X d S rM   )r   r   r   r   )r   rB  r   r   r   r   r     s    z_CudaBase.type)	r   r   r   r   Z	is_sparser   rC  r?  r   r   r   r   r   r>    s   r>  )_LegacyStorage_warn_typed_storage_removalc                   @   s8   e Zd Zedd Zedd ZedddddZdS )	_CudaLegacyStoragec                 O   s   t   tdd S )Nz+from_buffer: Not available for CUDA storage)rE  r(   r@  r   r   r   from_buffer  s    z_CudaLegacyStorage.from_bufferc                 O   s   t dd S )Nz2_new_with_weak_ptr: Not available for CUDA storager'   r@  r   r   r   _new_with_weak_ptr  s    z%_CudaLegacyStorage._new_with_weak_ptrN)r   rJ   c                C   s   t dd S )Nz4_new_shared_filename: Not available for CUDA storager'   )rA  managerr   sizer   rJ   r   r   r   _new_shared_filename  s    z'_CudaLegacyStorage._new_shared_filename)r   r   r   classmethodrG  rH  rK  r   r   r   r   rF    s   

rF  c                   @   s$   e Zd Zedd Zedd ZdS )ByteStoragec                 C   s   t   | jS rM   rE  _dtyper   r   r   r   rJ     s    zByteStorage.dtypec                 C   s   t jS rM   )r1   Zuint8r   r   r   r   rO    s    zByteStorage._dtypeNr   r   r   r   rJ   rO  r   r   r   r   rM    s   
rM  c                   @   s$   e Zd Zedd Zedd ZdS )DoubleStoragec                 C   s   t   | jS rM   rN  r   r   r   r   rJ     s    zDoubleStorage.dtypec                 C   s   t jS rM   )r1   doubler   r   r   r   rO  	  s    zDoubleStorage._dtypeNrP  r   r   r   r   rQ    s   
rQ  c                   @   s$   e Zd Zedd Zedd ZdS )FloatStoragec                 C   s   t   | jS rM   rN  r   r   r   r   rJ     s    zFloatStorage.dtypec                 C   s   t jS rM   )r1   floatr   r   r   r   rO    s    zFloatStorage._dtypeNrP  r   r   r   r   rS    s   
rS  c                   @   s$   e Zd Zedd Zedd ZdS )HalfStoragec                 C   s   t   | jS rM   rN  r   r   r   r   rJ     s    zHalfStorage.dtypec                 C   s   t jS rM   )r1   Zhalfr   r   r   r   rO    s    zHalfStorage._dtypeNrP  r   r   r   r   rU    s   
rU  c                   @   s$   e Zd Zedd Zedd ZdS )LongStoragec                 C   s   t   | jS rM   rN  r   r   r   r   rJ   %  s    zLongStorage.dtypec                 C   s   t jS rM   )r1   longr   r   r   r   rO  *  s    zLongStorage._dtypeNrP  r   r   r   r   rV  $  s   
rV  c                   @   s$   e Zd Zedd Zedd ZdS )
IntStoragec                 C   s   t   | jS rM   rN  r   r   r   r   rJ   0  s    zIntStorage.dtypec                 C   s   t jS rM   )r1   rB   r   r   r   r   rO  5  s    zIntStorage._dtypeNrP  r   r   r   r   rX  /  s   
rX  c                   @   s$   e Zd Zedd Zedd ZdS )ShortStoragec                 C   s   t   | jS rM   rN  r   r   r   r   rJ   ;  s    zShortStorage.dtypec                 C   s   t jS rM   )r1   shortr   r   r   r   rO  @  s    zShortStorage._dtypeNrP  r   r   r   r   rY  :  s   
rY  c                   @   s$   e Zd Zedd Zedd ZdS )CharStoragec                 C   s   t   | jS rM   rN  r   r   r   r   rJ   F  s    zCharStorage.dtypec                 C   s   t jS rM   )r1   Zint8r   r   r   r   rO  K  s    zCharStorage._dtypeNrP  r   r   r   r   r[  E  s   
r[  c                   @   s$   e Zd Zedd Zedd ZdS )BoolStoragec                 C   s   t   | jS rM   rN  r   r   r   r   rJ   Q  s    zBoolStorage.dtypec                 C   s   t jS rM   )r1   boolr   r   r   r   rO  V  s    zBoolStorage._dtypeNrP  r   r   r   r   r\  P  s   
r\  c                   @   s$   e Zd Zedd Zedd ZdS )BFloat16Storagec                 C   s   t   | jS rM   rN  r   r   r   r   rJ   \  s    zBFloat16Storage.dtypec                 C   s   t jS rM   )r1   rK   r   r   r   r   rO  a  s    zBFloat16Storage._dtypeNrP  r   r   r   r   r^  [  s   
r^  c                   @   s$   e Zd Zedd Zedd ZdS )ComplexDoubleStoragec                 C   s   t   | jS rM   rN  r   r   r   r   rJ   g  s    zComplexDoubleStorage.dtypec                 C   s   t jS rM   )r1   Zcdoubler   r   r   r   rO  l  s    zComplexDoubleStorage._dtypeNrP  r   r   r   r   r_  f  s   
r_  c                   @   s$   e Zd Zedd Zedd ZdS )ComplexFloatStoragec                 C   s   t   | jS rM   rN  r   r   r   r   rJ   r  s    zComplexFloatStorage.dtypec                 C   s   t jS rM   )r1   Zcfloatr   r   r   r   rO  w  s    zComplexFloatStorage._dtypeNrP  r   r   r   r   r`  q  s   
r`  c                   @   s    e Zd ZdZdd Zdd ZdS )_WrappedTritonKernelzBJust a simple wrapper to store some metadata for testing purposes.c                 C   s   || _ d| _d S r   kernelZkernel_invoked)r   rc  r   r   r   r     s    z_WrappedTritonKernel.__init__c                 O   s   | j ||}d| _|S )NTrb  )r   rB  r   r   r   r   r   __call__  s    z_WrappedTritonKernel.__call__N)r   r   r   r   r   rd  r   r   r   r   ra    s   ra  c                  C   s`   t  rd S tdd } tdd }tjdd k	}|r\t jdd| d t jd	d
|d d S )Nc                  _   s   ddl m} || ddi|S )Nr   )bsr_dense_mmskip_checksT)torch.sparse._triton_opsre  )rB  r   re  r   r   r   kernel_impl  s    z-_register_triton_kernels.<locals>.kernel_implc                  _   s   ddl m} || ddi|S )Nr   )bsr_dense_addmmrf  T)rg  ri  )rB  r   ri  r   r   r   addmm_kernel_impl  s    z3_register_triton_kernels.<locals>.addmm_kernel_implZtritonZ_triton_bsr_dense_mm_outzS_triton_bsr_dense_mm_out(Tensor bsr, Tensor dense, *, Tensor(a!) out) -> Tensor(a!)ZSparseCsrCUDAZ_triton_bsr_dense_addmm_outz_triton_bsr_dense_addmm_out(Tensor input, Tensor bsr, Tensor dense, *, Scalar beta, Scalar alpha, Tensor(a!) out) -> Tensor(a!))r1   Z_running_with_deployra  	importlibutil	find_specZ_TritonLibraryZ
registerOp)rh  rj  Z
has_tritonr   r   r   _register_triton_kernels  s(    

rn  )amp	jiteratornvtxprofilersparsetunableZBFloat16TensorZ
BoolTensorZ
ByteTensorZ
CharTensorZDoubleTensorZFloatTensorZ
HalfTensorZ	IntTensorZ
LongTensorZShortTensorr   r   r   r   ro  Zcaching_allocator_allocZcaching_allocator_deleteZempty_cacheZget_allocator_backendZCUDAPluggableAllocatorZchange_current_allocatorZget_rng_stateZget_rng_state_allr   r   graphsZinitial_seedr   rp  Zlist_gpu_processesr   Zmanual_seedZmanual_seed_allZmax_memory_allocatedZmax_memory_cachedZmax_memory_reservedZmem_get_infor.  Zmemory_allocatedZmemory_cachedZmemory_reservedZmemory_snapshotZmemory_statsZmemory_stats_as_nested_dictZmemory_summaryZncclrq  rr  randomZreset_accumulated_memory_statsZreset_max_memory_allocatedZreset_max_memory_cachedZreset_peak_memory_statsry   rx   Zset_per_process_memory_fractionZset_rng_stateZset_rng_state_allrs  streamsrt  )T)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)r@   )r@   )r   
contextlibrk  r5   sys	threadingr|   rb   	functoolsr   typingr   r   r   r   r   r   r	   r1   Ztorch._CZtorch.typesr
   r   r   Z_deviceZ_utilsr   r   r   r   ru  r   r   r   r   r   rw  r   r   r   r   ImportErrorru   localr   Lockr   r   r   r   getattrr2   rv   rB   Z	_device_tr   r  r   r  r   errr{   r0   r"   r#   r)   r*   r+   r,   r]  Z
_has_magmar-   r.   	Generatorr3   r7   r9   rG   rF   rN   rT   rh   rt   rw   r   rL   r   ZOutOfMemoryErrorr   r   r   r   r(   r   r   r   r   r   r_   r^   rD   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r8   ra   r  rA   r  r  r   r   r  r  r  r#  r%  r$  r(  r)  r*  r+  r,  r/  r0  r1  r2  r3  r4  r5  r;  r<  r.  rv  staticmethodrC  r>  Ztorch.storagerD  rE  rF  rM  rQ  rS  rU  rV  rX  rY  r[  r\  r^  r_  r`  Z_storage_classesaddra  rn  ro  rp  rq  rr  rs  rt  __all__r   r   r   r   <module>   s8  $




!7 82  "
   	         
$ 