U
    yhI                     @   s  d dl mZmZmZmZ d dlmZ d dlZd dlm	Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZm Z  d d	l!m"Z"m#Z#m$Z$ d d
l%m&Z& d dl'm(Z( d dl)m*Z*m+Z+m,Z,m-Z- d dl.m/Z/ eedede0dddddddddddddej1ee
j2 ee/ ee- ee, ee+ eeejj1  eee3ej4f  eeej1gdf  e5e5eeeejj6  eeejj1  f ej1dddZ7dS )    )CallableIterableOptionalUnion)
deprecatedN)contract)_get_module_state_insert_module_state)
_FSDPState)_annotate_modules_for_dynamo)
_init_buffer_state_init_core_state_init_device_handle_init_ignored_module_states_init_param_handle_from_module_init_prefetching_state_init_process_group_state_init_runtime_state_init_state_dict_stateHYBRID_SHARDING_STRATEGIES)_register_post_forward_hook_register_pre_forward_hook_register_root_pre_forward_hook)_register_all_state_dict_hooks)
_auto_wrap)BackwardPrefetch
CPUOffloadMixedPrecisionShardingStrategy)_Policy)Z	state_clsa1  `torch.distributed._composable.fully_shard` is being deprecated. You can continue to use the wrapper based FSDP. See usage in: https://github.com/pytorch/pytorch/blob/main/torch/distributed/fsdp/fully_sharded_data_parallel.py. `torch.distributed._composable.fully_shard` will be removed after PyTorch 2.5.)categoryF)process_grouppolicystrategymixed_precisioncpu_offloadignored_modules	device_idparam_init_fnsync_module_statesforward_prefetchignored_states)moduler!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   returnc                C   s  t jd |dk	r,t|ts,td| t| }t|| ||}t	|| |j
|}t| |jd t||||}|dk	r||||||||	|
|d
}|tkr|j|jf|d< t| ||j|j
|t t||ptj||ddddd}t|}t|tj|
d	}t|| }t|| |||	}t|}t| t||  t||  t||  t | | | ! D ]*}||j"krPt#|dkrPt || qP| S )
zDApplies ``FullyShardedDataParallel`` (FSDP) semantics to ``module``.ztorch.distributed.fully_shardNzExpects a `_Policy` but got T)
r!   r#   r$   r%   r&   r'   r(   r)   r*   r+   r!      )Zlimit_all_gathersZuse_orig_paramsZbackward_prefetch_limitZforward_prefetch_limit)r*   )$torchZ_CZ_log_api_usage_once
isinstancer   
ValueErrorfully_shardstater   r   Z_ignored_paramsr   Z_ignored_modulesr   r   r!   Z_inter_node_pgr   r   r   Z
FULL_SHARDr   r   r   ZBACKWARD_PREr   r   r   r   r   r   r   r	   modulesZ_fully_sharded_module_to_handler   )r,   r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r3   Zroot_kwargs	submodule r6   [/var/www/html/venv/lib/python3.8/site-packages/torch/distributed/_composable/fully_shard.pyr2   (   s    

  
    




r2   )8typingr   r   r   r   typing_extensionsr   r/   Ztorch.distributeddistributeddistZtorch.nnnnZ&torch.distributed._composable.contractr   Z#torch.distributed._composable_stater   r	   Z$torch.distributed.fsdp._common_utilsr
   Z$torch.distributed.fsdp._dynamo_utilsr   Z"torch.distributed.fsdp._init_utilsr   r   r   r   r   r   r   r   r   r   Z%torch.distributed.fsdp._runtime_utilsr   r   r   Z(torch.distributed.fsdp._state_dict_utilsr   Z"torch.distributed.fsdp._wrap_utilsr   Ztorch.distributed.fsdp.apir   r   r   r   Ztorch.distributed.fsdp.wrapr   FutureWarningModuleZProcessGroupintZdevicebool	Parameterr2   r6   r6   r6   r7   <module>   s^   0
