U
    yh}                     @   s  U d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	 d dl
Z
d dlmZ ddlmZ ddgZe sd dlZG dd	 d	Zd
d Zeejd _eejd _nd dlmZmZmZmZmZmZmZmZmZm Z  e !e"Z#erzd dl$m%Z% W n  e&k
r   e#'d Y nX G dd dej(Z)e) Z*e)e+d< de,dddZ-G dd dZdde,ee.df eee,df  edddZdS )    N)DictListOptionalTupleTYPE_CHECKINGUnion)is_available   )not_noneinit_device_mesh
DeviceMeshc                   @   s   e Zd ZdS )_DeviceMeshStubN)__name__
__module____qualname__ r   r   O/var/www/html/venv/lib/python3.8/site-packages/torch/distributed/device_mesh.pyr      s   r   c                   C   s   d S Nr   r   r   r   r   _init_device_mesh_stub   s    r   ztorch.distributed.device_mesh)
_find_pg_by_ranks_and_tag_get_default_group_get_group_tagget_process_group_ranksget_rankget_world_sizeinit_process_groupis_initialized	new_groupProcessGroup)	ArrayLikezCDeviceMesh requires numpy >= 1.21 to be installed for type checkingc                   @   s   e Zd ZddddZddddZdeedf dd	d
dZded dddZ	dee
 dddZeee
dddZeee
dddZdee
dddZde
eeej ddddZdS )_MeshEnvNreturnc                 C   s   g | _ i | _i | _d S r   )
mesh_stackchild_to_parent_mappingmesh_dim_group_optionsselfr   r   r   __init__?   s    z_MeshEnv.__init__r   c                 C   s    t | jdkrtd| jd S )Nr   z#No device mesh is currently active!)lenr#   RuntimeErrorr&   r   r   r   get_current_meshF   s    z_MeshEnv.get_current_mesh.)parent_meshsubmesh_dim_namesr"   c                    s    fdd|D } fdd|D }t t jj}|D ]}|| q8 jj|| jd| }  }|D ]"}	t j	|	|dd}
||	krn|
}qn |_
 fdd|D |_ | j|< |S )	Nc                    s   g | ]}t  j|qS r   )r
   mesh_dim_namesindex.0mesh_dim_namer-   r   r   
<listcomp>O   s   z._MeshEnv.create_child_mesh.<locals>.<listcomp>c                    s   g | ]} j |qS r   )meshsizer2   mesh_dimr4   r   r   r5   S   s    r)   Fr/   _init_backendc                    s   g | ]} j | qS r   )_dim_group_infosr8   r4   r   r   r5   n   s    )r)   )listranger6   ndimremoveZpermutereshaper   r   device_type_parent_meshr<   r$   )r'   r-   r.   Zsubmesh_dimsZsubmesh_dim_sizesZmesh_dims_remainedZsubmesh_dimpg_ranks_by_dimZcur_rankZmesh_ndsubmeshZres_submeshr   r4   r   create_child_meshK   sB    

  

z_MeshEnv.create_child_mesh)device_meshr"   c                 C   s   | j |d S r   )r$   get)r'   rG   r   r   r   get_parent_meshu   s    z_MeshEnv.get_parent_meshc                 C   sD   |  |}|j}|r@|r@t|dks,td|d }| ||S dS )z
            Return the index of the mesh dim in the parent mesh.
            The device_mesh passed in needs to be sliced out from a parent mesh.
               z%The child mesh can only be a 1D mesh.r   N)rI   r/   r*   AssertionErrorget_mesh_dim_by_name)r'   rG   r-   Zchild_mesh_dim_namesZchild_mesh_dim_namer   r   r   get_parent_mesh_dimx   s    

z_MeshEnv.get_parent_mesh_dim)rB   r"   c                 C   s   t |  S r   )_get_device_handledevice_countrB   r   r   r   num_devices_per_host   s    z_MeshEnv.num_devices_per_hostc                 C   s   t  t|  S r   )r   r    rQ   rP   r   r   r   	num_hosts   s    z_MeshEnv.num_hosts)rG   r3   r"   c                 C   sT   |j d kst|j dkr td||j krDtd| dd|j  t|j |S )Nr   zNo `mesh_dim_names` found.zMesh dimension 'z' does not exist.z.Available mesh dimensions are: mesh_dim_names=)r/   r*   KeyErrorr
   r0   )r'   rG   r3   r   r   r   rL      s    


z_MeshEnv.get_mesh_dim_by_name)dimbackend
pg_optionsr"   c                 C   s   ||f| j |< d S r   )r%   )r'   rT   rU   rV   r   r   r   _set_mesh_dim_group_options   s    z$_MeshEnv._set_mesh_dim_group_options)N)r   r   r   r(   r,   r   strrF   r   rI   intrM   staticmethodrQ   rR   rL   r   OptionsrW   r   r   r   r   r    >   s.    
*  r    _mesh_resourcescudarP   c                 C   s   t t| dS )a:  
        Get the module corresponding to the device_type which is cuda or cuda-like device.
        For example, when the device_type is cuda, the module `torch.cuda` is returned.
        Return None when there is no corresponding module for device_type, otherwise
        return the corresponding module.
        N)getattrtorchrP   r   r   r   rN      s    rN   c                
   @   s  e Zd ZU dZeed< ejed< ee	edf  ed< dddee
ejd	f ee	edf  edd
ddZdd Zdd Zd dddZddddZedddZdd ZeedddZe
ee	edf f d dddZd5ee
eef  ed d!d"Zee dd#d$Zed6dd%e
eee f eee
ejd	f  ee	edf  d d&d'd(Zd7ee ed d)d*Zeedd+d,Zee	edf dd-d.Z edd/d0Z!d8ee
eef  ed d1d2Z"eee  dd3d4Z#dS )9r   a  
        DeviceMesh represents a mesh of devices, where layout of devices could be
        represented as a n-d dimension array, and each value of the n-d dimensional
        array is the global id of the default process group ranks.

        DeviceMesh could be used to describe the layout of devices across the cluster,
        and serves as a proxy for communication among the device lists within the cluster.

        DeviceMesh can be used as a context manager.

        .. note::
            DeviceMesh follows SPMD programming model, which means the same PyTorch Python program
            is running on all processes/ranks in the cluster. Therefore, users need to make sure the
            `mesh` array (which describes the layout of devices) should be identical across all ranks.
            Inconsistent `mesh` will lead to silent hang.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
            mesh (ndarray): A multi-dimensional array or an integer tensor describing the layout
                of devices, where the IDs are global IDs of the default process group.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        The following program runs on each process/rank in an SPMD manner. In this example, we have 2
        hosts with 4 GPUs each.
        A reduction over the first dimension of mesh will reduce across
        columns (0, 4), .. and (3, 7), a reduction over the second dimension
        of mesh reduces across rows (0, 1, 2, 3) and (4, 5, 6, 7).

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import DeviceMesh
            >>>
            >>> # Initialize device mesh as (2, 4) to represent the topology
            >>> # of cross-host(dim 0), and within-host (dim 1).
            >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
        rB   r6   .r/   NTr:   r   )rB   r6   r/   r;   r"   c                C   s   || _ t|tjr,|jjdkr,td| t|tjrJ| jtj	dntj
|dtj	d| _|rjt|nd | _t| j  | _d | _t | _|dkr|r|   |   | jt k }|ddkst|ddkr|d  nd | _d S )Ncpuz!`mesh` must be a CPU tensor, got dtypedevicerb   Zxlar   )r   rJ   )rB   
isinstancer_   Tensorrd   type
ValueErrordetachtorY   tensorr6   tupler/   flattentolist_flatten_mesh_listrC   	threading	get_ident
_thread_id_get_or_create_default_group_init_process_groupsr   Znonzeror7   rK   _coordinate_on_dim)r'   rB   r6   r/   r;   Zrank_coordsr   r   r   r(      s&    

zDeviceMesh.__init__c                 C   s   t  }|st  t }| j |kr:td| j  dt| j}|s|r| }||kr|| dkrtd| d| d| j d|	t
 |  t S )Nz=Mesh should not be bigger than default world size, but found z ranks!r   z8DeviceMesh only support homogeneous hardware, but found z ranks and  z	 devices!)r   r   r   r6   numelr+   rN   rB   rO   Z
set_devicer   r   )r'   Zdefault_initializedZ
world_sizeZdevice_handlerQ   r   r   r   rs   	  s(    

z'DeviceMesh._get_or_create_default_groupc           	      C   s  g }| j jdkrF| j  t krF|tt ttt t j	f nt| j jD ]}| j 
d|d| j |}|D ]}| }|tjkrtj| \}}nd\}}t|||d}|  |krxt||krtd| j d| d|tt|||j	f qxqR|| _d S )NrJ   r)   )NN)ZranksrU   rV   zFEach device mesh dimension should get only one process group, but got z in !)r6   r?   rw   r   appendr   r   r=   r>   
group_nameZswapdimsrA   r7   rn   r\   r%   r   r   r*   r+   r
   r<   )	r'   Zdim_group_infosrT   rD   Zdim_meshZsubgroup_ranksrU   rV   Z	dim_groupr   r   r   rt   &  sN    	 


zDeviceMesh._init_process_groupsr!   c                 C   s   t j|  | S r   )r\   r#   ry   r&   r   r   r   	__enter__j  s    zDeviceMesh.__enter__c                 C   s   t j  d S r   )r\   r#   pop)r'   exc_type	exc_valueexc_tracebackr   r   r   __exit__p  s    zDeviceMesh.__exit__c                 C   s6   | j sd| j  dnd| j  d| j  d}|S )NzDeviceMesh()z, mesh_dim_names=)r/   r6   rn   )r'   Zdevice_mesh_reprr   r   r   __repr__t  s
    zDeviceMesh.__repr__c                 C   s>   t | dd | _| js8t| j| jj| j| j| j| j	f| _| jS )N_hash)
r^   r   hashro   r6   shaperB   r/   rC   rr   r&   r   r   r   __hash__|  s    
zDeviceMesh.__hash__)otherr"   c                 C   sr   t |tsdS t| t|kr"dS | j|jkol| jj|jjkol| j|jkol| j|jkol| j|jkol| j	|j	kS d S )NFT)
re   r   idro   r6   r   rB   r/   rC   rr   )r'   r   r   r   r   __eq__  s    




zDeviceMesh.__eq__)r/   r"   c                    s    j stdt|tr|fn|}d| d j  d}| j krD S t|t j kslt fdd|D svt|nF|d } j |}t| j |t| D ]\}}||krt|qt	
 |}|S )a  
            Slice the current DeviceMesh based on the mesh_dim_name given to create a child
            DeviceMesh.

            Args:
                mesh_dim_name (Union[str, Tuple[str]]): the name or the tuple of names of the
                mesh dimension of the parent DeviceMesh to create the child DeviceMesh for.
            Returns:
                A :class:`DeviceMesh` object

            The following program runs on each process/rank in an SPMD manner. In this example, we have 2
            hosts with 4 GPUs each.
            Calling mesh["tp"] on rank 0, 1, 2, 3 would return a 1D child DeviceMesh:([0, 1, 2, 3]).
            Calling mesh["tp"] on rank 4, 5, 6, 7 would return a 1D child DeviceMesh:([4, 5, 6, 7]).
            Calling mesh["dp"] on rank 0, 4 would return a 1D child DeviceMesh:([0, 4]).
            Calling mesh["dp"] on rank 1, 5 would return a 1D child DeviceMesh:([1, 5]).
            Calling mesh["dp"] on rank 2, 6 would return a 1D child DeviceMesh:([2, 6]).
            Calling mesh["dp"] on rank 3, 7 would return a 1D child DeviceMesh:([3, 7]).

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
            z1Cannot slice a DeviceMesh without mesh_dim_names!zInvalid mesh_dim_name zG specified. Valid mesh_dim_names should be a contiguous subsequence of .c                 3   s   | ]}| j kV  qd S r   r/   r1   r&   r   r   	<genexpr>  s    z)DeviceMesh.__getitem__.<locals>.<genexpr>r   )r/   r+   re   rX   r*   allrS   r0   zipr\   rF   )r'   r/   	error_msgZoutermost_dim_nameZoutermost_dim_idxijrE   r   r&   r   __getitem__  s,    


zDeviceMesh.__getitem__)r9   r"   c                 C   s   t | dstd| jjdkr>|dkr>td| jj ddd| jjdkrX|dkrXd	}nt|trnt| |n|}tt	| j
| dd
  S )a  
            Returns the single ProcessGroup specified by mesh_dim, or, if mesh_dim is not specified and the
            DeviceMesh is 1-dimensional, returns the only ProcessGroup in the mesh.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                A :class:`ProcessGroup` object.
            r<   z*DeviceMesh process groups not initialized!rJ   NFound the DeviceMesh have  dimensionsJOptional kwarg `mesh_dim` needs to be specified when device_mesh.ndim > 1.zmIf you want to get the list of all the ProcessGroups in the DeviceMesh,please use `get_all_groups()` instead.r   r	   )hasattrr+   r6   r?   re   rX   r\   rL   r
   r   r<   r'   r9   r   r   r   	get_group  s"    
zDeviceMesh.get_groupc                    s    fddt  jjD S )z
            Returns a list of ProcessGroups for all mesh dimensions.

            Returns:
                A list of :class:`ProcessGroup` object.
            c                    s   g | ]}  |qS r   )r   )r2   r   r&   r   r   r5     s     z-DeviceMesh.get_all_groups.<locals>.<listcomp>)r>   r6   r?   r&   r   r&   r   get_all_groups  s    zDeviceMesh.get_all_groupsr   )grouprB   r6   r/   r"   c                C   s<  t | trt| }t |tjr*| |ks:|dk	rR||krRtdt| d| tj|dtj	d}t
|||dd}t| || jfg|_|S t| }t|dkrtd	|dkrtd
t |tjr| jtj	ddntj|dtj	d}|jt|krtd|  dt| dt
|||dd}dd |D |_|S )aN  
            Contstructs a :class:`DeviceMesh` with ``device_type`` from an
            existing :class:`ProcessGroup`.

            The constructed device mesh has number of dimensions equal to the
            number of groups passed. If more than one group is passed, then the
            ``mesh`` argument is required.
            NzInvalid mesh z for ProcessGroup with ranks r`   rc   Fr:   r   z.Expects at least one ProcessGroup to be passedz0Must pass mesh if passing multiple ProcessGroups)rb   rd   zEExpects mesh with ndim equal to number of ProcessGroups but got mesh z and z ProcessGroupsc                 S   s    g | ]}t |t||jfqS r   )r   r   rz   )r2   r   r   r   r   r5   <  s
   z)DeviceMesh.from_group.<locals>.<listcomp>)re   r   r   r_   rf   rn   rh   rX   rk   rY   r   r   rz   r<   r=   r*   ri   rj   r?   )r   rB   r6   r/   Zgroup_ranksrG   groupsr   r   r   
from_group  s\    



   zDeviceMesh.from_groupc                 C   s   |d kr| j  S | j |S r   )r6   rw   r7   r   r   r   r   r7   F  s    zDeviceMesh.sizec                 C   s   | j jS r   )r6   r?   r&   r   r   r   r?   I  s    zDeviceMesh.ndimc                 C   s   t | jjS r   )rl   r6   r   r&   r   r   r   r   M  s    zDeviceMesh.shapec                 C   s   t  S )z:
            Returns the current global rank.
            )r   r&   r   r   r   r   Q  s    zDeviceMesh.get_rankc                 C   sb   | j dkr*|dkr*td| jj  ddn|dkr6d}t| |}t|tsVtdtt|S )a{  
            Returns the local rank of the given mesh_dim of the DeviceMesh.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                An integer denotes the local rank.

            The following program runs on each process/rank in an SPMD manner. In this example, we have 2
            hosts with 4 GPUs each.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 0, 1, 2, 3 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 4, 5, 6, 7 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 0, 4 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 1, 5 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 2, 6 would return 2.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 3, 7 would return 3.

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
            rJ   Nr   r   r   r   z1We expect ProcessGroup before calling `get_rank`!)	r?   r+   r6   r
   r   re   r   rK   r   )r'   r9   Zmesh_dim_groupr   r   r   get_local_rankW  s     zDeviceMesh.get_local_rankc                 C   s   | j r| j S dS )z
            Return the relative indices of this rank relative to all
            dimensions of the mesh. If this rank is not part of the mesh, return None.
            N)ru   r&   r   r   r   get_coordinate  s    zDeviceMesh.get_coordinate)N)N)N)N)$r   r   r   __doc__rX   __annotations__r_   rf   r   r   r   boolr(   rs   rt   r{   r   r   r   objectr   r   rY   r   r   r   r   rZ   r   r7   propertyr?   r   r   r   r   r   r   r   r   r      sX   
'
(D?$	 >*r   .)rB   
mesh_shaper/   r"   c             	   C   s   |dk	r\t t|t |kr,tdd| t |t |kr\tddt | dt | d| rz|  sztd|  d	d
td" tjt|tj	d
|}W 5 Q R X t| ||d}|S )a  
        Initializes a `DeviceMesh` based on `device_type`, `mesh_shape`, and `mesh_dim_names` parameters.

        This creates a DeviceMesh with an n-dimensional array layout, where `n` is the length of `mesh_shape`.
        If `mesh_dim_names` is provided, each dimension is labeled as `mesh_dim_names[i]`.

        .. note::
            `init_device_mesh` follows SPMD programming model, meaning the same PyTorch Python program
            runs on all processes/ranks in the cluster. Ensure `mesh_shape` (the dimensions of the nD array
            describing device layout) is identical across all ranks. Inconsistent `mesh_shape` may lead to hanging.

        .. note::
            If no process group is found, init_device_mesh will initialize distributed process group/groups
            required for distributed communications behind the scene.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
                Passing in a device type with a GPU index, such as "cuda:0", is not allowed.
            mesh_shape (Tuple[int]): A tuple defining the dimensions of the multi-dimensional array
                describing the layout of devices.
            mesh_dim_names (Tuple[str], optional): A tuple of mesh dimension names to assign to each dimension
                of the multi-dimensional array describing the layout of devices. Its length must match the length
                of `mesh_shape`. Each string in `mesh_dim_names` must be unique.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import init_device_mesh
            >>>
            >>> mesh_1d = init_device_mesh("cuda", mesh_shape=(8,))
            >>> mesh_2d = init_device_mesh("cuda", mesh_shape=(2, 8), mesh_dim_names=("dp", "tp"))

        Nz"Each mesh_dim_name must be unique.z/Found repeated mesh_dim_name in mesh_dim_names z6mesh_shape and mesh_dim_names should have same length!zFound len(mesh_dim_names): z and len(mesh_shape):r   z4Device type with GPU index is not supported but got z. zUIf you maintained a 'torch.device' object, it's recommended to pass in 'device.type'.r`   ra   )rB   r6   r/   )r*   setr+   isalphar_   rd   ZarangemathprodrY   viewr   )rB   r   r/   r6   rG   r   r   r   r     s0    )
&)r]   )/loggingr   rp   typingr   r   r   r   r   r   r_   Ztorch.distributedr   Zutils._typing_utilsr
   __all__sysr   r   modulesr   r   Z"torch.distributed.distributed_c10dr   r   r   r   r   r   r   r   r   r   	getLoggerr   loggerZnumpy.typingr   ImportErrorwarninglocalr    r\   r   rX   rN   rY   r   r   r   r   <module>   sP     0

l	   Z
