U
    Mh<                     @   s|  d dl mZmZmZmZ d dlZd dlmZ ddlmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZ ddgZG dd deZd	e d
e d
e
 d
e d	e_ee ee ee ee ee ee eeeeeeeeedddZee ee ee ee ee ee eeeeeeeeedddZeeddee ee ee ee ee ee ee eeeeeeeeedddZdS )    )ListOptionalTupleUnionN)Tensor   )_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported_foreach_doc!_get_capturable_supported_devices_get_scalar_dtype
_get_value_maximize_doc_use_grad_for_differentiable_view_as_real	OptimizerParamsTASGDasgdc                       s\   e Zd Zdeeeeeeee eeed
 fd	d
Z fddZdd Z	e
dddZ  ZS )r   {Gz?-C6?      ?    .Ar   NF)
paramslrlambdalphat0weight_decayforeachmaximizedifferentiable
capturablec                    sX   d|kst d| d|ks,t d| t||||||||	|
d	}t || d S )Ng        zInvalid learning rate: zInvalid weight_decay value: )	r   r   r   r   r    r!   r"   r#   r$   )
ValueErrordictsuper__init__)selfr   r   r   r   r   r    r!   r"   r#   r$   defaults	__class__ B/var/www/html/venv/lib/python3.8/site-packages/torch/optim/asgd.pyr(      s     zASGD.__init__c                    s   t  | | jD ]}|dd  |dd |dd |dd |d D ]}| j|g }t|dkrNt|d st	|d }tj
|t |jd	|d< t|d
 stj
|d
 t |jd	|d
< t|d sNtj
|d t |jd	|d< qNqd S )Nr!   r"   Fr#   r$   r   r   step)dtypedeviceetamu)r'   __setstate__param_groups
setdefaultstategetlentorchZ	is_tensorfloattensorr   r1   )r)   r7   grouppZp_stateZstep_valr+   r-   r.   r4   :   s8    
  
  
  zASGD.__setstate__c                 C   s
  d}|d D ]}	|	j d k	r|t|	O }||	 |	j jrBtd||	j  | j|	 }
t|
dkrtjd|	j	t
 d|
d< tj|d |	j	t
 d  |
d	< tjd|	j	t
 d|
d
< tj|	tjd|
d< ||
d
  ||
d  ||
d	  ||
d  q|S )NFr   z&ASGD does not support sparse gradientsr   r-   )r1   r0   r/   r   r2   r3   )Zmemory_formatax)gradr:   
is_complexappendZ	is_sparseRuntimeErrorr7   r9   Zzerosr1   r   	as_tensorclonedetachZonesZ
zeros_likeZpreserve_format)r)   r=   params_with_gradgradsmusaxsetasstate_stepshas_complexr>   r7   r-   r-   r.   _init_groupR   sF    


  
    
 
zASGD._init_groupc                 C   s   |    d}|dk	r.t  | }W 5 Q R X | jD ]~}g }g }g }g }g }g }	| |||||||	}
t||||||	|d |d |d |d |d |d |d |d	 |d
 |
d q4|S )zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r    r!   r"   r#   r$   )
r   r   r   r   r    r!   r"   r#   r$   rM   )Z _cuda_graph_capture_health_checkr:   Zenable_gradr5   rN   r   )r)   closureZlossr=   rG   rH   rI   rJ   rK   rL   rM   r-   r-   r.   r/   v   sP    

      z	ASGD.step)	r   r   r   r   r   NFFF)N)__name__
__module____qualname__r   r;   r   boolr(   r4   rN   r   r/   __classcell__r-   r-   r+   r.   r      s2            $ah  Implements Averaged Stochastic Gradient Descent.

    It has been proposed in `Acceleration of stochastic approximation by
    averaging`_.

    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-2)
        lambd (float, optional): decay term (default: 1e-4)
        alpha (float, optional): power for eta update (default: 0.75)
        t0 (float, optional): point at which to start averaging (default: 1e6)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        z	
        zx

    .. _Acceleration of stochastic approximation by averaging:
        https://dl.acm.org/citation.cfm?id=131098

    )r   rH   rJ   rI   rK   rL   r   r   r   r   r    r"   r#   r$   rM   c       	      
   C   s  t | D ]\}}|| }|s"|n| }|| }|| }|| }|| }tj s|rt }|jj|jj  kr|jj  kr|jjkrn n|jj|kstd| dt|rt	|}t	|}t	|}|d7 }|
dkr|j
||
d}|r|d||   |j||dd n*t|}|d||   |j|| d |sX| dkrp|||| n
|| |r||d|| |  |	   |dt|| t|  qt|}t|d|| |  |	  }|| tdtd||  }|| qd S )NUIf capturable=True, params, mus, etas, and state_steps must be on supported devices: .r   r   r   value)	enumerater:   _utilsis_compilingr   r1   typeAssertionErrorrA   Zview_as_realaddZmul_Zaddcmul_r   Zadd_itemsubZcopy_maximumZ	ones_likerD   max)r   rH   rJ   rI   rK   rL   r   r   r   r   r    r"   r#   r$   rM   iparamr@   r3   r?   r2   Zstep_tcapturable_supported_devicesZ	eta_valuer/   Znew_etaZnew_mur-   r-   r.   _single_tensor_asgd   sX    







"
rh   c       	            sL  t | dkrd S |rtdtj sd|rdtddtfddt| |||D sdtd dt	| |||||g}|
 D ]\\}\\}}}}}}}|rt||| |rt|}|d jrtj|tjd	d
dd	d nt|d |
dkr8|rtj|||
d |}ntj|||
d}tj||d ntj||d}tj|||dd ~t||}t||| ~|rt|}t|d	 t| t|| ~t|}t| t|d t|  t| t| t|| q fdd|D }fdd|D }t|| t|| qd S )Nr   z#_foreach ops don't support autogradF)Zsupports_xlac                 3   sT   | ]L\}}}}|j j|j j  ko:|j j  ko:|j jkn  oJ|j j kV  qd S )N)r1   r^   ).0r>   r3   r2   r/   )rg   r-   r.   	<genexpr>'  s   
2z%_multi_tensor_asgd.<locals>.<genexpr>rU   rV   g      ?cpur1   rW   r   rX   rY   c                    s.   g | ]&}t jd  |     dqS r   rl   )r:   rD   ri   r/   )r   r1   r   r   r-   r.   
<listcomp>  s   z&_multi_tensor_asgd.<locals>.<listcomp>c                    s,   g | ]$}t jd td t|   dqS rm   )r:   rD   rd   r   rn   )r1   r   r-   r.   ro     s   )r9   r_   r:   r\   r]   r   allzipr   Z"_group_tensors_by_device_and_dtypeitemsr   Z_foreach_negZis_cpuZ_foreach_add_r<   Z_foreach_addZ_foreach_addcmul_Z_foreach_subZ_foreach_maximum_Z_foreach_reciprocal_Z_foreach_copy_Z_foreach_mulZ_foreach_mul_Z_foreach_pow_)r   rH   rJ   rI   rK   rL   r   r   r   r   r    r"   r#   r$   rM   Zgrouped_tensors_Zgrouped_paramsZgrouped_gradsZgrouped_axsZgrouped_musZgrouped_etasZgrouped_state_stepsZintermediateZnew_musZnew_etasr-   )r   rg   r1   r   r   r   r.   _multi_tensor_asgd  s    



  
    


rt   )Zsingle_tensor_fnF)r   rH   rJ   rI   rK   rL   r!   r"   r#   r$   rM   r   r   r   r   r    c                C   sr   |dkrt | |dd\}}|r0tj r0td|rDtj sDt}nt}|| |||||||||||||	|
d dS )znFunctional API that performs asgd algorithm computation.

    See :class:`~torch.optim.ASGD` for details.
    NF)Z	use_fusedz6torch.jit.script not supported with foreach optimizers)	r   r   r   r   r    r"   r#   r$   rM   )r	   r:   ZjitZis_scriptingrC   rt   rh   )r   rH   rJ   rI   rK   rL   r!   r"   r#   r$   rM   r   r   r   r   r    rs   funcr-   r-   r.   r     s8      
)NFFFF)typingr   r   r   r   r:   r   Z	optimizerr   r	   r
   r   r   r   r   r   r   r   r   r   r   __all__r   __doc__r;   rS   rh   rt   r   r-   r-   r-   r.   <module>   s   < 
M 
     