U
    yh                     @   s   d dl mZmZ d dlmZmZmZmZmZm	Z	 d dl
Z
d dlmZ d dlm  mZ d dlmZ d dlmZmZ d dlmZmZ d dlmZmZmZmZ d dlm Z  G d	d
 d
eZ!G dd de!Z"G dd de!Z#dS )    )ABCabstractmethod)AnyCallableDictListOptionalTupleN)FakeTensorMode)DataParallelStylepartition_data_parallel)_convert_to_distributedSchema)
DeviceMesh	Placement	ReplicateShard)GraphModulec                
   @   sp   e Zd ZdZeeejje	ej
j eeef eeef eedf eeef edddZeeedddZd	S )
ParallelModez
    Basic Parallel Mode interface. Each parallelism pattern should implement
    this interface to describe how to partition and compile the graph in the
    spmd compiler.
    .gmmodel	optimizerparams_and_buffersnamed_statesargskwargsreturnc                 C   s   t dS )z
        Partition a single device graph to a distributed graph.

        TODO(@wanchaol): some of these arguments are not necessary for
        partitioning, remove the unnecessary ones later.
        NNotImplementedError)selfr   r   r   r   r   r   r    r!   W/var/www/html/venv/lib/python3.8/site-packages/torch/distributed/_spmd/parallel_mode.py	partition   s    zParallelMode.partitionr   r   c                 C   s   t dS )a  
        Transform and compile a distributed graph with a set of graph
        transformation and optimization passes for each parallel mode.

        The returned result should be a compiled executable graph in
        the distributed environment.
        Nr   r    r   r!   r!   r"   transform_and_compile,   s    
z"ParallelMode.transform_and_compileN)__name__
__module____qualname____doc__r   r   torchnnModuler   optim	Optimizerr   strr   r	   r#   r&   r!   r!   r!   r"   r      s   




r   c                	   @   s   e Zd ZdZddddeeeeegef  dddZ	ee
jjee
jj eeef eeef eed	f eeef ed
ddZeedddZdS )DataParallelzData Parallelism mode.	replicater   N)input_batch_dimcustom_passes)parallel_styler3   r4   c                C   sh   |dkrt j| _n2|dkr$t j| _n |dkr6t j| _ntd| || _|dk	rZ|| _n
dd | _dS )a  
        DataParallel Mode that partition the model and graph to data parallel style
        parallelism (i.e. DDP/FSDP/ZERO-3). It currently supports three different
        parallel styles: "replicate", "fully_shard", and "default". See
        :class:`DataParallelStyle` for more details.

        Args:
            parallel_style (str): parallel style to use. Currently supports
                "replicate", "fully_shard", and "default".

        Keyword args:
            input_batch_dim (int): the batch dimension of the input tensor.
                 default: 0
            custom_passes (Callable[[GraphModule], GraphModule], optional):
                A custom callable that overrides the default graph transformation
                and optimization passes.
        r2   Zfully_sharddefaultzUnknown parallel style: Nc                 S   s   | S Nr!   r   r!   r!   r"   <lambda>e       z'DataParallel.__init__.<locals>.<lambda>)r   Z	REPLICATEr5   ZFULLY_SHARDDEFAULTRuntimeErrorr3   
_gm_passes)r    r5   r3   r4   r!   r!   r"   __init__<   s    


zDataParallel.__init__.r   c           	      C   s6   t dtt }t||||||||| j| j
}|S )Ncuda)r   r+   arangedistget_world_sizer   r5   r3   )	r    r   r   r   r   r   r   r   meshr!   r!   r"   r#   g   s    zDataParallel.partitionr$   c                 C   s
   |  |S )z>optimize a distributed graph with a set of optimization passesr=   r%   r!   r!   r"   r&      s    z"DataParallel.transform_and_compile)r2   )r'   r(   r)   r*   r0   intr   r   r   r>   r+   r,   r-   r.   r/   r   r   r	   r#   r&   r!   r!   r!   r"   r1   9   s(    -




r1   c                	   @   s   e Zd ZdZdeeegef  dddZeej	j
eejj eeef eeef eedf eeef eddd	Zeed
ddZdS )DTensorExpandModez
    The DTensor Expand mode. It's replicating the parameters and
    shard the inputs to represent DDP like behavior, it's currently
    a transitent mode before we move to the new data parallel expansion.
    N)r4   c                 C   s$   i | _ |d k	r|| _n
dd | _d S )Nc                 S   s   | S r7   r!   r8   r!   r!   r"   r9      r:   z,DTensorExpandMode.__init__.<locals>.<lambda>)_placements_overrider=   )r    r4   r!   r!   r"   r>      s    zDTensorExpandMode.__init__.r   c              	   C   s  t j||}tdtt  }	t|	t	dgd}
t|	t
 gd}g g  }}t |D ]6}t|tjs|tdt| || || qZt |D ]@}t|tjr|| || q|td || q|D ]p}t|tjr8|| t|| jkr,|t|	| jt| d n
||
 q|td ||
 qtdd dd |D }W 5 Q R X t||||	d	d
d S )Nr?   r   )rC   Z
placementszexpecting Tensor but got T)Zallow_non_fake_inputsc                 S   s   g | ]}t |qS r!   )r+   Z
empty_like).0Zinpr!   r!   r"   
<listcomp>   s     z/DTensorExpandMode.partition.<locals>.<listcomp>F)Zdefault_meshZ_allow_partial)pytreeZarg_tree_leavesr   r+   r@   rA   rB   r?   r   r   r   Ztree_leaves
isinstanceZTensorAssertionErrortypeappendemptyidrG   r
   r   )r    r   r   r   r   r   r   r   Z	flat_argsrC   Zshard_schemaZreplicate_schemaZinpsZschemaspoaZ	fake_inpsr!   r!   r"   r#      sH    




    zDTensorExpandMode.partitionr$   c                 C   s
   |  |S )z
        Transform and compile a distributed graph with a set of graph transformation
        and optimization passes for the dtensor fallback parallel mode.
        rD   r%   r!   r!   r"   r&      s    z'DTensorExpandMode.transform_and_compile)N)r'   r(   r)   r*   r   r   r   r>   r+   r,   r-   r.   r/   r   r0   r   r	   r#   r&   r!   r!   r!   r"   rF      s    




9rF   )$abcr   r   typingr   r   r   r   r   r	   r+   Ztorch.distributeddistributedrA   Ztorch.utils._pytreeutilsZ_pytreerJ   Ztorch._subclassesr
   Z%torch.distributed._spmd.data_parallelr   r   Z"torch.distributed._spmd.distributer   r   Ztorch.distributed._tensorr   r   r   r   Ztorch.fxr   r   r1   rF   r!   r!   r!   r"   <module>   s    'O