U
    MhU                  	   @   s"  d Z ddlZddlZddlmZ ddlZddlmZ dCddZdDddZ	dEd	d
Z
dd Zdd ZdFddZdGeeeeej edddZdHeeeeej edddZdIeeeeeeej edddZeeeddd Zeed!d"d#Zeed!d$d%Zd&d' ZdJd)d*Zd+d, ZdKeeeej ed-d.d/ZdLeeeej ed-d0d1Zd2d3 ZdMeeeeeej d6d7d8ZdNeeeeeej d6d9d:ZdOeej d;d<d=ZdPeej d;d?d@Z dAdB Z!e!eZ"e!eZ#e!eZ$e!eZ%e!eZ&e!eZ'e!eZ(e!eZ)e!eZ*e!eZ+e!e Z,dS )QzHThis file contains utilities for initializing neural network parameters.    N)Tensor)Optionalc              
   C   s0   t   | j|||dW  5 Q R  S Q R X d S N	generator)torchno_graduniform_tensorabr    r   ?/var/www/html/venv/lib/python3.8/site-packages/torch/nn/init.py_no_grad_uniform_   s    
r   c              
   C   s0   t   | j|||dW  5 Q R  S Q R X d S r   )r   r   normal_r   meanstdr   r   r   r   _no_grad_normal_   s    
r   c           	   
   C   s   dd }||d|  k s(||d|  kr6t jddd t  ||| | }||| | }| jd| d d| d |d |   | |td  | 	| | j
||d	 | W  5 Q R  S Q R X d S )
Nc                 S   s   dt | t d  d S )N      ?       @)matherfsqrt)xr   r   r   norm_cdf   s    z(_no_grad_trunc_normal_.<locals>.norm_cdf   zjmean is more than 2 std from [a, b] in nn.init.trunc_normal_. The distribution of values may be incorrect.
stacklevel   r   r   )minmax)warningswarnr   r   r	   Zerfinv_mul_r   r   Zadd_Zclamp_)	r   r   r   r   r   r   r   lur   r   r   _no_grad_trunc_normal_   s     
 
r(   c              
   C   s*   t   | |W  5 Q R  S Q R X d S N)r   r   Zfill_r   valr   r   r   _no_grad_fill_;   s    
r,   c              
   C   s(   t   |  W  5 Q R  S Q R X d S r)   )r   r   zero_r   r   r   r   _no_grad_zero_@   s    
r/   c                 C   s   dddddddg}| |ks"| dkr&d	S | d
kr2dS | dkrDt dS | dkr|dkrZd}n4t|tsnt|tsxt|tr~|}ntd| dt dd	|d   S | dkrdS td|  dS )a  Return the recommended gain value for the given nonlinearity function.

    The values are as follows:

    ================= ====================================================
    nonlinearity      gain
    ================= ====================================================
    Linear / Identity :math:`1`
    Conv{1,2,3}D      :math:`1`
    Sigmoid           :math:`1`
    Tanh              :math:`\frac{5}{3}`
    ReLU              :math:`\sqrt{2}`
    Leaky Relu        :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
    SELU              :math:`\frac{3}{4}`
    ================= ====================================================

    .. warning::
        In order to implement `Self-Normalizing Neural Networks`_ ,
        you should use ``nonlinearity='linear'`` instead of ``nonlinearity='selu'``.
        This gives the initial weights a variance of ``1 / N``,
        which is necessary to induce a stable fixed point in the forward pass.
        In contrast, the default gain for ``SELU`` sacrifices the normalization
        effect for more stable gradient flow in rectangular layers.

    Args:
        nonlinearity: the non-linear function (`nn.functional` name)
        param: optional parameter for the non-linear function

    Examples:
        >>> gain = nn.init.calculate_gain('leaky_relu', 0.2)  # leaky_relu with negative_slope=0.2

    .. _Self-Normalizing Neural Networks: https://papers.nips.cc/paper/2017/hash/5d44ee6f2c3f71b73125876103c8f6c4-Abstract.html
    ZlinearZconv1dZconv2dZconv3dZconv_transpose1dZconv_transpose2dZconv_transpose3dZsigmoidr    tanhg?Zrelur   
leaky_reluN{Gz?znegative_slope z not a valid numberr   Zselug      ?zUnsupported nonlinearity )r   r   
isinstanceboolintfloat
ValueError)nonlinearityparamZ
linear_fnsZnegative_sloper   r   r   calculate_gainE   s"    "
r:           r   )r   r   r   r   returnc                 C   s4   t j| r&t jjt| f| |||dS t| |||S )a  Fill the input Tensor with values drawn from the uniform distribution.

    :math:`\mathcal{U}(a, b)`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the lower bound of the uniform distribution
        b: the upper bound of the uniform distribution
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.uniform_(w)
    r
   )r   	overrideshas_torch_function_variadichandle_torch_functionr	   r   r
   r   r   r   r	   }   s         r	   )r   r   r   r   r<   c                 C   s4   t j| r&t jjt| f| |||dS t| |||S )a  Fill the input Tensor with values drawn from the normal distribution.

    :math:`\mathcal{N}(\text{mean}, \text{std}^2)`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        mean: the mean of the normal distribution
        std: the standard deviation of the normal distribution
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.normal_(w)
    r   )r   r=   r>   r?   r   r   r   r   r   r   r      s         r          r   )r   r   r   r   r   r   r<   c                 C   s   t | |||||dS )a  Fill the input Tensor with values drawn from a truncated normal distribution.

    The values are effectively drawn from the
    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
    with values outside :math:`[a, b]` redrawn until they are within
    the bounds. The method used for generating the random values works
    best when :math:`a \leq \text{mean} \leq b`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        mean: the mean of the normal distribution
        std: the standard deviation of the normal distribution
        a: the minimum cutoff value
        b: the maximum cutoff value
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.trunc_normal_(w)
    r   )r(   )r   r   r   r   r   r   r   r   r   trunc_normal_   s    rA   )r   r+   r<   c                 C   s,   t j| r"t jjt| f| |dS t| |S )zFill the input Tensor with the value :math:`\text{val}`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        val: the value to fill the tensor with

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.constant_(w, 0.3)
    r*   )r   r=   r>   r?   	constant_r,   r*   r   r   r   rB      s    rB   )r   r<   c                 C   s
   t | dS )zFill the input Tensor with the scalar value `1`.

    Args:
        tensor: an n-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.ones_(w)
    r   )r,   r.   r   r   r   ones_   s    
rC   c                 C   s   t | S )zFill the input Tensor with the scalar value `0`.

    Args:
        tensor: an n-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.zeros_(w)
    )r/   r.   r   r   r   zeros_   s    
rD   c              	   C   sB   |   dkrtdt  tj| j| | jd W 5 Q R X | S )a=  Fill the 2-dimensional input `Tensor` with the identity matrix.

    Preserves the identity of the inputs in `Linear` layers, where as
    many inputs are preserved as possible.

    Args:
        tensor: a 2-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.eye_(w)
    r   ,Only tensors with 2 dimensions are supported)outrequires_grad)
ndimensionr7   r   r   eyeshaperG   r.   r   r   r   eye_   s
    
 rK   r    c                 C   s&  |   }|dkrtd|  }|d | dkr8td|d | }t||d }t  |   t|D ]}t|D ]}|dkrd| || | || dd f< qx|dkrd| || | || dd | dd f< qxd| || | || dd | dd | dd f< qxqlW 5 Q R X | S )	aF  Fill the {3, 4, 5}-dimensional input `Tensor` with the Dirac delta function.

    Preserves the identity of the inputs in `Convolutional`
    layers, where as many input channels are preserved as possible. In case
    of groups>1, each group of channels preserves identity

    Args:
        tensor: a {3, 4, 5}-dimensional `torch.Tensor`
        groups (int, optional): number of groups in the conv layer (default: 1)
    Examples:
        >>> w = torch.empty(3, 16, 5, 5)
        >>> nn.init.dirac_(w)
        >>> w = torch.empty(3, 24, 5, 5)
        >>> nn.init.dirac_(w, 3)
    )         z5Only tensors with 3, 4, or 5 dimensions are supportedr   z!dim 0 must be divisible by groupsr    rL   r   rM   )rH   r7   sizer!   r   r   r-   range)r   groups
dimensionssizesZout_chans_per_grpZmin_dimgdr   r   r   dirac_  s2    
" rV   c                 C   sp   |   }|dk rtd| d}| d}d}|   dkrX| jdd  D ]}||9 }qJ|| }|| }||fS )Nr   zNFan in and fan out can not be computed for tensor with fewer than 2 dimensionsr    r   )dimr7   rO   rJ   )r   rR   Znum_input_fmapsZnum_output_fmapsZreceptive_field_sizesfan_infan_outr   r   r   _calculate_fan_in_and_fan_out<  s    


r[   )r   gainr   r<   c                 C   sD   t | \}}|tdt||   }td| }t| | ||S )a  Fill the input `Tensor` with values using a Xavier uniform distribution.

    The method is described in `Understanding the difficulty of training
    deep feedforward neural networks` - Glorot, X. & Bengio, Y. (2010).
    The resulting tensor will have values sampled from
    :math:`\mathcal{U}(-a, a)` where

    .. math::
        a = \text{gain} \times \sqrt{\frac{6}{\text{fan\_in} + \text{fan\_out}}}

    Also known as Glorot initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        gain: an optional scaling factor
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain('relu'))
    r         @)r[   r   r   r6   r   )r   r\   r   rY   rZ   r   r   r   r   r   xavier_uniform_O  s    r^   c                 C   s4   t | \}}|tdt||   }t| d||S )a  Fill the input `Tensor` with values using a Xavier normal distribution.

    The method is described in `Understanding the difficulty of training deep feedforward
    neural networks` - Glorot, X. & Bengio, Y. (2010). The resulting tensor
    will have values sampled from :math:`\mathcal{N}(0, \text{std}^2)` where

    .. math::
        \text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}}

    Also known as Glorot initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        gain: an optional scaling factor
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.xavier_normal_(w)
    r   r;   )r[   r   r   r6   r   )r   r\   r   rY   rZ   r   r   r   r   xavier_normal_n  s    r_   c                 C   sH   |  }ddg}||kr,td| d| t| \}}|dkrD|S |S )NrY   rZ   zMode z" not supported, please use one of )lowerr7   r[   )r   modeZvalid_modesrY   rZ   r   r   r   _calculate_correct_fan  s    rb   rY   r1   r   r   ra   r8   r   c           	   
   C   s   t j| r(t jjt| f| ||||dS d| jkr@td | S t| |}t	||}|t
| }t
d| }t    | j| ||dW  5 Q R  S Q R X dS )a  Fill the input `Tensor` with values using a Kaiming uniform distribution.

    The method is described in `Delving deep into rectifiers: Surpassing
    human-level performance on ImageNet classification` - He, K. et al. (2015).
    The resulting tensor will have values sampled from
    :math:`\mathcal{U}(-\text{bound}, \text{bound})` where

    .. math::
        \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}

    Also known as He initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the negative slope of the rectifier used after this layer (only
            used with ``'leaky_relu'``)
        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
            preserves the magnitude of the variance of the weights in the
            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
            backwards pass.
        nonlinearity: the non-linear function (`nn.functional` name),
            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.kaiming_uniform_(w, mode='fan_in', nonlinearity='relu')
    rc   r   ,Initializing zero-element tensors is a no-opr]   r   N)r   r=   r>   r?   kaiming_uniform_rJ   r#   r$   rb   r:   r   r   r   r	   )	r   r   ra   r8   r   fanr\   r   boundr   r   r   re     s&    #	




re   c              
   C   sj   d| j krtd | S t| |}t||}|t| }t  | j	d||dW  5 Q R  S Q R X dS )a  Fill the input `Tensor` with values using a Kaiming normal distribution.

    The method is described in `Delving deep into rectifiers: Surpassing
    human-level performance on ImageNet classification` - He, K. et al. (2015).
    The resulting tensor will have values sampled from
    :math:`\mathcal{N}(0, \text{std}^2)` where

    .. math::
        \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}

    Also known as He initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the negative slope of the rectifier used after this layer (only
            used with ``'leaky_relu'``)
        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
            preserves the magnitude of the variance of the weights in the
            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
            backwards pass.
        nonlinearity: the non-linear function (`nn.functional` name),
            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.kaiming_normal_(w, mode='fan_out', nonlinearity='relu')
    r   rd   r   N)
rJ   r#   r$   rb   r:   r   r   r   r   r   )r   r   ra   r8   r   rf   r\   r   r   r   r   kaiming_normal_  s    #




rh   r   c           
   	   C   s   |   dk rtd|  dkr$| S | d}|  | }| ||jdd|d}||k rb|  tj	|\}}t
|d}| }	||	9 }||k r|  t   | || | | W 5 Q R X | S )a  Fill the input `Tensor` with a (semi) orthogonal matrix.

    Described in `Exact solutions to the nonlinear dynamics of learning in deep
    linear neural networks` - Saxe, A. et al. (2013). The input tensor must have
    at least 2 dimensions, and for tensors with more than 2 dimensions the
    trailing dimensions are flattened.

    Args:
        tensor: an n-dimensional `torch.Tensor`, where :math:`n \geq 2`
        gain: optional scaling factor
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LAPACK)
        >>> w = torch.empty(3, 5)
        >>> nn.init.orthogonal_(w)
    r   z4Only tensors with 2 or more dimensions are supportedr   r    r   )rH   r7   ZnumelrO   newr   Zt_r   ZlinalgZqrZdiagsignr   Zview_asZcopy_r%   )
r   r\   r   rowscolsZ	flattenedqrrU   phr   r   r   orthogonal_  s&    

rp   r2   c           
   	   C   s   |   dkrtd| j\}}tt|| }t F | jd||d t	|D ]&}t
|}|d| }	d| |	|f< qRW 5 Q R X | S )a  Fill the 2D input `Tensor` as a sparse matrix.

    The non-zero elements will be drawn from the normal distribution
    :math:`\mathcal{N}(0, 0.01)`, as described in `Deep learning via
    Hessian-free optimization` - Martens, J. (2010).

    Args:
        tensor: an n-dimensional `torch.Tensor`
        sparsity: The fraction of elements in each column to be set to zero
        std: the standard deviation of the normal distribution used to generate
            the non-zero values
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.sparse_(w, sparsity=0.1)
    r   rE   r   r   N)rH   r7   rJ   r5   r   ceilr   r   r   rP   Zrandperm)
r   Zsparsityr   r   rk   rl   Z	num_zerosZcol_idxZrow_indicesZzero_indicesr   r   r   sparse_/  s    


rr   c                    sF    j d d  fdd}d d d d|_|_ |S )Nc                     s(   t jd d dtdd  | |S )Nz	`nn.init.z)` is now deprecated in favor of `nn.init.z`.r   r   )r#   r$   FutureWarning)argskwargsmethnew_nameZold_namer   r   deprecated_initZ  s    z(_make_deprecate.<locals>.deprecated_initz
    z_(...)

    .. warning::
        This method is now deprecated in favor of :func:`torch.nn.init.z"`.

    See :func:`~torch.nn.init.z` for details.)__name____doc__)rx   rz   r   rw   r   _make_deprecateV  s    
r}   )N)N)N)N)r;   r   N)r;   r   N)r;   r   r@   r   N)r    )r   N)r   N)r   rY   r1   N)r   rY   r1   N)r    N)r2   N)-r|   r   r#   r   r   typingr   Z	_Optionalr   r   r(   r,   r/   r:   r6   	Generatorr	   r   rA   rB   rC   rD   rK   rV   r[   r^   r_   rb   strre   rh   rp   rr   r}   uniformnormalZconstantrI   ZdiracZxavier_uniformZxavier_normalZkaiming_uniformZkaiming_normalZ
orthogonalsparser   r   r   r   <module>   s   


#
:           
,     !      :    /  6  '