U
    MhA                     @   s`  d dl mZmZmZ d dlZd dlmZ ddlmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZ ddgZG dd deZd	d
e de de de	 d	 e_ee ee ee ee ee eeeeeeeedddZee ee ee ee ee eeeeeeeedddZe
eddee ee ee ee ee ee eeeeeeeedddZdS )    )ListOptionalTupleN)Tensor   )_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported_foreach_doc!_get_capturable_supported_devices_get_scalar_dtype_maximize_doc_use_grad_for_differentiable_view_as_real	OptimizerParamsTRproprpropc                       st   e Zd Zddddddeeeeef eeef eee eed fdd	Z fd
dZ	dd Z
edddZ  ZS )r   {Gz?g      ?g333333?gư>2   FN)
capturableforeachmaximizedifferentiable)paramslretas
step_sizesr   r   r   r   c          
   	      s   d|kst d| d|d   k r<d  k r<|d k sZn t d|d  d|d  t|||||||d}	t ||	 d S )	Ng        zInvalid learning rate: r         ?r   zInvalid eta values: z, )r   r   r    r   r   r   r   )
ValueErrordictsuper__init__)
selfr   r   r   r    r   r   r   r   defaults	__class__ C/var/www/html/venv/lib/python3.8/site-packages/torch/optim/rprop.pyr%      s    (	zRprop.__init__c                    s   t  | | jD ]}|dd  |dd |dd |dd |d D ]h}| j|g }t|dkrNt|d sNt	|d }|d rtj
|t |jd	ntj
|t d
|d< qNqd S )Nr   r   Fr   r   r   r   stepdtypedevicer.   )r$   __setstate__param_groups
setdefaultstategetlentorchZ	is_tensorfloattensorr   r/   )r&   r4   grouppZp_stateZstep_valr(   r*   r+   r1   5   s$    
  zRprop.__setstate__c                 C   s  d}|d D ]}|j d krq|t|O }|| |j }	|	jrJtd||	 | j| }
t|
dkr|d rtjdt	 |j
dntjdt	 d|
d	< tj|tjd
|
d< |jjrt|	t|d |d |
d< nt|	|d |
d< ||
d  ||
d  ||
d	  q|S )NFr   z'Rprop does not support sparse gradientsr   r   r*   r-   r0   r,   Zmemory_formatprevr   	step_size)gradr7   
is_complexappendZ	is_sparseRuntimeErrorr4   r6   Zzerosr   r/   Z
zeros_likepreserve_formatr.   Z	full_likecomplex)r&   r:   r   gradsprevsr    state_stepshas_complexr;   r?   r4   r*   r*   r+   _init_groupH   s6    



 
zRprop._init_groupc                 C   s   |    d}|dk	r.t  | }W 5 Q R X | jD ]}g }g }g }g }g }|d \}	}
|d \}}|d }|d }| ||||||}t||||||||	|
|||d |d |d q4|S )	zPerforms a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r    r   r   r   r   )	step_size_minstep_size_maxetaminusetaplusr   r   r   r   rH   )Z _cuda_graph_capture_health_checkr7   Zenable_gradr2   rI   r   )r&   closureZlossr:   r   rE   rF   r    rG   rL   rM   rJ   rK   r   r   rH   r*   r*   r+   r,   n   sP    

     z
Rprop.step)r   r   r   )N)__name__
__module____qualname__r   r8   r   boolr   r%   r1   rI   r   r,   __classcell__r*   r*   r(   r+   r      s,      

&a
  Implements the resilient backpropagation algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \theta_0 \in \mathbf{R}^d \text{ (params)},f(\theta)
                \text{ (objective)},                                                             \\
            &\hspace{13mm}      \eta_{+/-} \text{ (etaplus, etaminus)}, \Gamma_{max/min}
                \text{ (step sizes)}                                                             \\
            &\textbf{initialize} :   g^0_{prev} \leftarrow 0,
                \: \eta_0 \leftarrow \text{lr (learning rate)}                                   \\
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \textbf{for} \text{  } i = 0, 1, \ldots, d-1 \: \mathbf{do}            \\
            &\hspace{10mm}  \textbf{if} \:   g^i_{prev} g^i_t  > 0                               \\
            &\hspace{15mm}  \eta^i_t \leftarrow \mathrm{min}(\eta^i_{t-1} \eta_{+},
                \Gamma_{max})                                                                    \\
            &\hspace{10mm}  \textbf{else if}  \:  g^i_{prev} g^i_t < 0                           \\
            &\hspace{15mm}  \eta^i_t \leftarrow \mathrm{max}(\eta^i_{t-1} \eta_{-},
                \Gamma_{min})                                                                    \\
            &\hspace{15mm}  g^i_t \leftarrow 0                                                   \\
            &\hspace{10mm}  \textbf{else}  \:                                                    \\
            &\hspace{15mm}  \eta^i_t \leftarrow \eta^i_{t-1}                                     \\
            &\hspace{5mm}\theta_t \leftarrow \theta_{t-1}- \eta_t \mathrm{sign}(g_t)             \\
            &\hspace{5mm}g_{prev} \leftarrow  g_t                                                \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to the paper
    `A Direct Adaptive Method for Faster Backpropagation Learning: The RPROP Algorithm
    <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.1417>`_.
    a  
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-2)
        etas (Tuple[float, float], optional): pair of (etaminus, etaplus), that
            are multiplicative increase and decrease factors
            (default: (0.5, 1.2))
        step_sizes (Tuple[float, float], optional): a pair of minimal and
            maximal allowed step sizes (default: (1e-6, 50))
        z	
        z

    )r   rE   rF   r    rG   rJ   rK   rL   rM   r   r   r   rH   c                C   s  t | D ]\}}|| }|	s"|n| }|| }|| }|| }tj s|
rt }|jj|jjkrp|jj|kstd| d|d7 }t|rt	|}t	|}t	|}t	|}|r|
|  }n|
| }|
r6|t|d|| |t|d|| |t|dd| n*|||d< |||d< d||d< |||| |jtjd}|
r|t||d| nd|||< |j| |dd || qd S )NIIf capturable=True, params and state_steps must be on supported devices: .r   r   r<   value)	enumerater7   _utilsis_compilingr   r/   typeAssertionErrorr@   Zview_as_realmulclonesigncopy_wheregtlteqZmul_clamp_rC   Zaddcmul_)r   rE   rF   r    rG   rJ   rK   rL   rM   r   r   r   rH   iparamr?   r=   r>   r,   capturable_supported_devicesr`   r*   r*   r+   _single_tensor_rprop   sJ    






rj   c             
      s:  t | dkrd S |rtdtj s\|
r\t  t fddt| |D s\td  dt	| ||||g}|
 D ]\\}}}}}}|d jrtj|tjddd	dd
 nt|d |rt|||| t||}|	rt| t|| |	rt| |}t| |
rv|D ]T}|t|d|| |t|d|| |t|dd| qn6|D ]0}|||d< |||d< d||d< qzt|| |D ]}||| qt|}tt |D ],}|| t|| |d||  q~dd |D }tj|||dd qxd S )Nr   z#_foreach ops don't support autogradc                 3   s.   | ]&\}}|j j|j jko$|j j kV  qd S N)r/   r\   ).0r;   r,   ri   r*   r+   	<genexpr>4  s   z&_multi_tensor_rprop.<locals>.<genexpr>rT   rU   r!   cpu)r/   )alphar   c                 S   s   g | ]}|  qS r*   )r`   )rl   r?   r*   r*   r+   
<listcomp>~  s     z'_multi_tensor_rprop.<locals>.<listcomp>rV   rW   )r6   r]   r7   rZ   r[   r   allzipr   Z"_group_tensors_by_device_and_dtypevaluesZis_cpuZ_foreach_add_r9   r   Z_foreach_mulZ_foreach_neg_Z_foreach_copy_Z_foreach_sign_ra   rb   rc   rd   re   Z_foreach_mul_rf   listrangeZ_foreach_addcmul_)r   rE   rF   r    rG   rJ   rK   rL   rM   r   r   r   rH   Zgrouped_tensorsZgrouped_paramsZgrouped_gradsZgrouped_prevsZgrouped_step_sizesZgrouped_state_steps_Zsignsr`   r>   rg   Z
grad_signsr*   rm   r+   _multi_tensor_rprop  s    

	

     


    rx   )Zsingle_tensor_fnF)r   rE   rF   r    rG   r   r   r   r   rH   rJ   rK   rL   rM   c
                C   s   t j s$tdd |D s$td|dkr>t| |dd\}}|rTt j rTtd|rht j sht}nt	}|| |||||
|||||||	d dS )	zpFunctional API that performs rprop algorithm computation.

    See :class:`~torch.optim.Rprop` for details.
    c                 s   s   | ]}t |tjV  qd S rk   )
isinstancer7   r   )rl   tr*   r*   r+   rn     s    zrprop.<locals>.<genexpr>zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)Z	use_fusedz6torch.jit.script not supported with foreach optimizers)rJ   rK   rL   rM   r   r   r   rH   )
r7   rZ   r[   rr   rB   r   ZjitZis_scriptingrx   rj   )r   rE   rF   r    rG   r   r   r   r   rH   rJ   rK   rL   rM   rw   funcr*   r*   r+   r     s@      
)NFFFF)typingr   r   r   r7   r   Z	optimizerr   r   r	   r
   r   r   r   r   r   r   r   r   __all__r   __doc__r8   rR   rj   rx   r   r*   r*   r*   r+   <module>   s   8 #
8El	     