U
    yhj                  	   @   s  U d dl mZmZ d dlmZmZmZmZ d dlZd dl	m
Z d dlmZ d dlmZ d dlmZmZ d dlmZmZmZ G dd	 d	eZdaee ed
< eddddZdejee eejee f dddZd ejeee ejdddZd!ejeeeejee ejdddZ d"ejeeee ejdddZ!d#ejee eejee f dddZ"d$eee ee ejdddZ#dS )%    )ABCabstractmethod)AnyListOptionalTupleN)ShardedTensor)Shard)
DeviceMeshDTensor)_all_gather_dtensor_create_chunk_dtensor_create_chunk_sharded_tensorc                
   @   s   e Zd ZdZeejeejee	 f dddZ
eeje	ejdddZedejeeeejeej ejd	d
dZeejeeejdddZeejeejee f dddZeeee ejdddZdS )FSDPExtensionsz
    This enables some customizable hooks to enable composability with tensor
    parallelism. To activate these hooks, use :func:`_set_fsdp_extensions` to
    set a custom :class:`FSDPExtensions` that implements the hooks.
    )tensorreturnc                 C   s   dS )z6E.g. converting ``DistributedTensor`` to local tensor.N selfr   r   r   Y/var/www/html/venv/lib/python3.8/site-packages/torch/distributed/fsdp/_fsdp_extensions.pypre_flatten_transform   s    z$FSDPExtensions.pre_flatten_transform)r   param_extensionr   c                 C   s   dS )z6E.g. converting local tensor to ``DistributedTensor``.Nr   )r   r   r   r   r   r   post_unflatten_transform   s    z'FSDPExtensions.post_unflatten_transformN)r   rank
world_sizenum_devices_per_nodepgdevicer   c                 C   s   dS )z6Shards a tensor to chunks and returns the local chunk.Nr   )r   r   r   r   r   r   r   r   r   r   chunk_tensor(   s    zFSDPExtensions.chunk_tensor)r   r   device_meshr   c                 C   s   dS )zAShards a tensor/DTensor to DTensor and returns the local DTensor.Nr   )r   r   r   r   r   r   r   chunk_dtensor5   s    zFSDPExtensions.chunk_dtensorc                 C   s   dS )z
        This is to be called before loading a *sharded* model state dict and
        should return the tensor and list of shards from which to load data.
        Nr   r   r   r   r   pre_load_state_dict_transform?   s    	z,FSDPExtensions.pre_load_state_dict_transform)r   parent_meshr   c                 C   s   dS )z
        This is to be called before loading a *sharded* DTensor state dict.
        This gathers tensor in FSDP dimension and returns local tensor of
        TP DTensor.
        Nr   )r   r   r"   r   r   r   all_gather_dtensorJ   s    z!FSDPExtensions.all_gather_dtensor)N)__name__
__module____qualname____doc__r   torchTensorr   r   r   r   r   intdistProcessGroupr   r   r
   r    r   r	   r!   r   r#   r   r   r   r   r      sH    	
r   _extensions)	flattenerr   c                 C   s   | a d S N)r-   )r.   r   r   r   _set_fsdp_extensions[   s    r0   )r   fsdp_extensionr   c                 C   s.   |d k	r&| | \}}|d k	r&||fS | d fS r/   )r   )r   r1   Z
new_tensorr   r   r   r   _ext_pre_flatten_transform`   s
    r2   )r   r   r1   r   c                 C   s    |d k	r|d k	r| | |S | S r/   )r   )r   r   r1   r   r   r   _ext_post_unflatten_transformk   s    r3   )r   r   r   r   r   r1   r   c                 C   s"   |d k	r|j nt}|| ||||S r/   )r   r   )r   r   r   r   r   r1   Zchunk_tensor_fnr   r   r   _ext_chunk_tensoru   s    
r4   )r   r   r   r1   r   c                 C   s   |d k	r|j nt}|| ||S r/   )r    r   )r   r   r   r1   Zchunk_dtensor_fnr   r   r   _ext_chunk_dtensor   s    r5   c                 C   s2   |d k	r| | S t| tks"t|  }| |fS r/   )r!   typer   AssertionErrorZlocal_shards)r   r1   Zshardsr   r   r   "_ext_pre_load_state_dict_transform   s
    
r8   )r   r"   r1   r   c                 C   s   |d k	r|j nt}|| |S r/   )r#   r   )r   r"   r1   Zall_gather_dtensor_fnr   r   r   _ext_all_gather_dtensor   s
    r9   )N)N)N)N)N)N)$abcr   r   typingr   r   r   r   r(   Ztorch.distributeddistributedr+   Z+torch.distributed._shard.sharded_tensor.apir   Z-torch.distributed._shard.sharded_tensor.shardr	   Ztorch.distributed._tensorr
   r   Z#torch.distributed.fsdp._shard_utilsr   r   r   r   r-   __annotations__r0   r)   r2   r3   r*   r,   r4   r5   r8   r9   r   r   r   r   <module>   sn    H      