U
    Mh8K                     @   sz  d dl mZmZ d dlZd dlmZ ddlmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZ ddgZG dd deZd	d
e
 de de de d	 e_ee ee ee ee ee ee eeeeeeeeeedddZee ee ee ee ee ee eeeeeeeeeedddZe	eddee ee ee ee ee ee ee eeeeeeeeeedddZdS )    )ListOptionalN)Tensor   )_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported_foreach_doc!_get_capturable_supported_devices_get_scalar_dtype_maximize_doc_use_grad_for_differentiable_view_as_real	OptimizerParamsTRMSproprmspropc                       sZ   e Zd Zdeeeeeeee eed	 fdd	Z fd
dZdd Z	e
dddZ  ZS )r   {Gz?Gz?:0yE>r   FN)	paramslralphaepsweight_decaymomentumforeachmaximizedifferentiablec                    s   d|kst d| d|ks,t d| d|ksBt d| d|ksXt d| d|ksnt d| t||||||||	|
|d
}t || d S )Ng        zInvalid learning rate: zInvalid epsilon value: zInvalid momentum value: zInvalid weight_decay value: zInvalid alpha value: )
r   r   r   r   centeredr   
capturabler   r   r   )
ValueErrordictsuper__init__)selfr   r   r   r   r   r   r    r!   r   r   r   defaults	__class__ E/var/www/html/venv/lib/python3.8/site-packages/torch/optim/rmsprop.pyr%      s.    zRMSprop.__init__c                    s   t  | | jD ]}|dd |dd |dd  |dd |dd |dd |d	 D ]h}| j|g }t|dkrft|d
 sft	|d
 }|d rtj
|t |jdntj
|t d|d
< qfqd S )Nr   r   r    Fr   r   r   r!   r   stepdtypedevicer.   )r$   __setstate__param_groups
setdefaultstategetlentorchZ	is_tensorfloattensorr   r/   )r&   r4   grouppZp_stateZstep_valr(   r*   r+   r1   @   s(    
  zRMSprop.__setstate__c                 C   s<  d}|d D ](}	|	j d krq|t|	O }||	 |	j jrFtd||	j  | j|	 }
t|
dkr|d rtjdt	 |	j
dntjdt	 d|
d	< tj|	tjd
|
d< |d dkrtj|	tjd
|
d< |d rtj|	tjd
|
d< ||
d  ||
d	  |d dkr ||
d  |d r||
d  q|S )NFr   z)RMSprop does not support sparse gradientsr   r!   r*   r-   r0   r,   )Zmemory_format
square_avgr   Zmomentum_bufferr    grad_avg)gradr7   
is_complexappendZ	is_sparseRuntimeErrorr4   r6   Zzerosr   r/   Z
zeros_likeZpreserve_format)r&   r:   params_with_gradgradssquare_avgsmomentum_buffer_list	grad_avgsstate_stepshas_complexr;   r4   r*   r*   r+   _init_groupU   sH    



 
 
 
zRMSprop._init_groupc                 C   s   |    d}|dk	r.t  | }W 5 Q R X | jD ]}g }g }g }g }g }g }	| |||||||	}
t||||||	|d |d |d |d |d |d |d |d	 |d
 |d |
d q4|S )zPerforms a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r   r    r   r   r   r!   )r   r   r   r   r   r    r   r   r   r!   rH   )Z _cuda_graph_capture_health_checkr7   Zenable_gradr2   rI   r   )r&   closureZlossr:   rB   rC   rD   rF   rE   rG   rH   r*   r*   r+   r,      sR    


zRMSprop.step)
r   r   r   r   r   FFNFF)N)__name__
__module____qualname__r   r8   r   boolr%   r1   rI   r   r,   __classcell__r*   r*   r(   r+   r      s2             '3a  Implements RMSprop algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \alpha \text{ (alpha)},\: \gamma \text{ (lr)},
                \: \theta_0 \text{ (params)}, \: f(\theta) \text{ (objective)}                   \\
            &\hspace{13mm}   \lambda \text{ (weight decay)},\: \mu \text{ (momentum)},\: centered\\
            &\textbf{initialize} : v_0 \leftarrow 0 \text{ (square average)}, \:
                \textbf{b}_0 \leftarrow 0 \text{ (buffer)}, \: g^{ave}_0 \leftarrow 0     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm}if \: \lambda \neq 0                                                    \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda  \theta_{t-1}                            \\
            &\hspace{5mm}v_t           \leftarrow   \alpha v_{t-1} + (1 - \alpha) g^2_t
                \hspace{8mm}                                                                     \\
            &\hspace{5mm} \tilde{v_t} \leftarrow v_t                                             \\
            &\hspace{5mm}if \: centered                                                          \\
            &\hspace{10mm} g^{ave}_t \leftarrow g^{ave}_{t-1} \alpha + (1-\alpha) g_t            \\
            &\hspace{10mm} \tilde{v_t} \leftarrow \tilde{v_t} -  \big(g^{ave}_{t} \big)^2        \\
            &\hspace{5mm}if \: \mu > 0                                                           \\
            &\hspace{10mm} \textbf{b}_t\leftarrow \mu \textbf{b}_{t-1} +
                g_t/ \big(\sqrt{\tilde{v_t}} +  \epsilon \big)                                   \\
            &\hspace{10mm} \theta_t \leftarrow \theta_{t-1} - \gamma \textbf{b}_t                \\
            &\hspace{5mm} else                                                                   \\
            &\hspace{10mm}\theta_t      \leftarrow   \theta_{t-1} -
                \gamma  g_t/ \big(\sqrt{\tilde{v_t}} + \epsilon \big)  \hspace{3mm}              \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to
    `lecture notes <https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf>`_ by G. Hinton.
    and centered version `Generating Sequences
    With Recurrent Neural Networks <https://arxiv.org/pdf/1308.0850v5.pdf>`_.
    The implementation here takes the square root of the gradient average before
    adding epsilon (note that TensorFlow interchanges these two operations). The effective
    learning rate is thus :math:`\gamma/(\sqrt{v} + \epsilon)` where :math:`\gamma`
    is the scheduled learning rate and :math:`v` is the weighted moving average
    of the squared gradient.
    a  
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-2)
        momentum (float, optional): momentum factor (default: 0)
        alpha (float, optional): smoothing constant (default: 0.99)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        centered (bool, optional) : if ``True``, compute the centered RMSProp,
            the gradient is normalized by an estimation of its variance
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        z	
        z

    )r   rC   rD   rF   rE   rG   r   r   r   r   r   r    r   r   r!   rH   c       
         C   s  t | D ]\}}|| }tj sZ|rZt }|jj|jjkrJ|jj|ksZtd| d|| }|sj|n| }|| }|d7 }|	dkr|j||	d}t	|}|rt
|}t
|}t
|}||j||d| d |r|| }|rt
|}||d|  |j||dd }n| }|r8||}n
||}|
dkr|| }|rdt
|}||
|| |j|| d q|j||| d qd S )NIIf capturable=True, params and state_steps must be on supported devices: .r   r   r   value)	enumerater7   _utilsis_compilingr   r/   typeAssertionErroraddr?   Zview_as_realZmul_Zaddcmul_Zlerp_ZaddcmulZsqrt_sqrtZadd_Zaddcdiv_)r   rC   rD   rF   rE   rG   r   r   r   r   r   r    r   r   r!   rH   iparamr,   capturable_supported_devicesr>   r<   Zis_complex_paramr=   avgbufr*   r*   r+   _single_tensor_rmsprop  sN    









rb   c       
            sj  t | dkrd S |rtdtj s\|r\t  t fddt| |D s\td  dt	| |||||g}|
 D ]\\}}}}}}}|r||g}|
dkr|| |r|| t|f|  |rt|}|d jrtj|tjddd	dd
 nt|d |	dkr<|r,tj|||	d
 ntj|||	d
}t|| tj|||d| d |rt||d|  tj|||dd}t| t|| nt|}t|| |
dkrt||
 t||| |r
t|tjr
t|| }t|| ntj||| d
 qz|rPt|tjrPt||  t||| qztj|||| d qzd S )Nr   z#_foreach ops don't support autogradc                 3   s.   | ]&\}}|j j|j jko$|j j kV  qd S N)r/   rY   ).0r;   r,   r_   r*   r+   	<genexpr>c  s   z(_multi_tensor_rmsprop.<locals>.<genexpr>rP   rQ   g      ?cpu)r/   rR   r   rS   rU   )r6   rZ   r7   rW   rX   r   allzipr   Z"_group_tensors_by_device_and_dtypevaluesr@   r   Z_foreach_negZis_cpuZ_foreach_add_r9   Z_foreach_addZ_foreach_mul_Z_foreach_addcmul_Z_foreach_lerp_Z_foreach_addcmulZ_foreach_sqrt_Z_foreach_sqrtZ_foreach_addcdiv_
isinstancer   Z_foreach_mulZ_foreach_div_)r   rC   rD   rF   rE   rG   r   r   r   r   r   r    r   r   r!   rH   Zgrouped_tensorsZgrouped_paramsZgrouped_gradsZgrouped_square_avgsZgrouped_grad_avgsZgrouped_momentum_buffer_listZgrouped_state_steps_Zstate_and_gradsr`   Zmomentum_lrr*   re   r+   _multi_tensor_rmspropH  s    





  
        


  rm   )Zsingle_tensor_fnF)r   rC   rD   rF   rE   rG   r   r   r   r!   rH   r   r   r   r   r   r    c                C   s   t j s$tdd |D s$td|dkr>t| |dd\}}|rTt j rTtd|rht j sht}nt	}|| |||||||||||||	||
d dS )	zsFunctional API that performs rmsprop algorithm computation.
    See :class:`~torch.optim.RMSProp` for details.
    c                 s   s   | ]}t |tjV  qd S rc   )rk   r7   r   )rd   tr*   r*   r+   rf     s    zrmsprop.<locals>.<genexpr>zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)Z	use_fusedz6torch.jit.script not supported with foreach optimizers)
r   r   r   r   r   r    r   r!   r   rH   )
r7   rW   rX   rh   rA   r   ZjitZis_scriptingrm   rb   )r   rC   rD   rF   rE   rG   r   r   r   r!   rH   r   r   r   r   r   r    rl   funcr*   r*   r+   r     sF      
)NFFFF)typingr   r   r7   r   Z	optimizerr   r   r   r	   r
   r   r   r   r   r   r   r   __all__r   __doc__r8   rN   rb   rm   r   r*   r*   r*   r+   <module>   s   8 *+BHs
     