U
    hA                  	   @   s  d dl mZ d dlmZmZmZmZmZmZm	Z	m
Z
 d dlmZ d dlmZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZ dddddddgZG dd dejZG dd dejZ G dd dejZ!G dd dej"Z#G dd dej"Z$G dd dejZ%G dd  d ejZ&G d!d dej"Z'e	e
e#e$f  ee	e
ee!e f   ee( ed"ej"f ee e)ee'd#d$d%Z*d&ed'd(d)Z+G d*d deZ,G d+d deZ-G d,d deZ.e ed-e,j/fd.dd/d0ee, e)ee'd1d2dZ0e ed-e-j/fd.dd/d0ee- e)ee'd1d3dZ1e ed-e.j/fd.dd/d0ee. e)ee'd1d4dZ2dd5lm3Z3 e3e,j/j4e-j/j4e.j/j4d6Z5dS )7    )partial)AnyCallableListOptionalSequenceTupleTypeUnionN)Tensor   )VideoClassification)_log_api_usage_once   )register_modelWeightsWeightsEnum)_KINETICS400_CATEGORIES)_ovewrite_named_paramhandle_legacy_interfaceVideoResNetR3D_18_WeightsMC3_18_WeightsR2Plus1D_18_Weightsr3d_18mc3_18r2plus1d_18c                       sP   e Zd Zd	eeee eedd fddZeeeeeef dddZ  Z	S )
Conv3DSimpleN   	in_planes
out_planes	midplanesstridepaddingreturnc                    s   t  j||d||dd d S )N)r   r   r   FZin_channelsZout_channelskernel_sizer#   r$   biassuper__init__selfr    r!   r"   r#   r$   	__class__ Q/var/www/html/venv/lib/python3.8/site-packages/torchvision/models/video/resnet.pyr+      s    zConv3DSimple.__init__r#   r%   c                 C   s
   | | | fS Nr0   r#   r0   r0   r1   get_downsample_stride'   s    z"Conv3DSimple.get_downsample_stride)Nr   r   
__name__
__module____qualname__intr   r+   staticmethodr   r5   __classcell__r0   r0   r.   r1   r      s            r   c                       sL   e Zd Zd	eeeeedd fddZeeeeeef dddZ  ZS )
Conv2Plus1Dr   Nr   c                    s`   t  tj||dd||fd||fddt|tjddtj||d|ddf|ddfdd d S )	Nr   r   r   r   r   Fr'   r#   r$   r(   TZinplacer   r   r   r*   r+   nnConv3dBatchNorm3dReLUr,   r.   r0   r1   r+   -   s(    
     zConv2Plus1D.__init__r2   c                 C   s
   | | | fS r3   r0   r4   r0   r0   r1   r5   >   s    z!Conv2Plus1D.get_downsample_stride)r   r   )	r7   r8   r9   r:   r+   r;   r   r5   r<   r0   r0   r.   r1   r=   ,   s   r=   c                       sP   e Zd Zd	eeee eedd fddZeeeeeef dddZ  Z	S )
Conv3DNoTemporalNr   r   c                    s(   t  j||dd||fd||fdd d S )Nr>   r   r   Fr&   r)   r,   r.   r0   r1   r+   D   s    zConv3DNoTemporal.__init__r2   c                 C   s
   d| | fS Nr   r0   r4   r0   r0   r1   r5   Q   s    z&Conv3DNoTemporal.get_downsample_stride)Nr   r   r6   r0   r0   r.   r1   rG   C   s            rG   c                       sR   e Zd ZdZd
eeedejf eeej dd fddZ	e
e
ddd	Z  ZS )
BasicBlockr   N.inplanesplanesconv_builderr#   
downsampler%   c                    s   || d d d |d d d|   }t    t|||||t|tjdd| _t||||t|| _tjdd| _|| _	|| _
d S )Nr   Tr@   )r*   r+   rC   
SequentialrE   rF   conv1conv2relurN   r#   r-   rK   rL   rM   r#   rN   r"   r.   r0   r1   r+   Z   s    (
  
zBasicBlock.__init__xr%   c                 C   sB   |}|  |}| |}| jd k	r,| |}||7 }| |}|S r3   )rP   rQ   rN   rR   r-   rU   Zresidualoutr0   r0   r1   forwardm   s    




zBasicBlock.forward)r   Nr7   r8   r9   	expansionr:   r   rC   Moduler   r+   r   rX   r<   r0   r0   r.   r1   rI   V   s     rI   c                       sR   e Zd ZdZdeeedejf eeej dd fddZ	e
e
dd	d
Z  ZS )
Bottleneck   r   N.rJ   c                    s   t    || d d d |d d d|   }ttj||dddt|tjdd| _t|||||t|tjdd| _ttj||| j	 dddt|| j	 | _
tjdd| _|| _|| _d S )Nr   r   F)r'   r(   Tr@   )r*   r+   rC   rO   rD   rE   rF   rP   rQ   rZ   conv3rR   rN   r#   rS   r.   r0   r1   r+   ~   s&    	
(  
  
zBottleneck.__init__rT   c                 C   sL   |}|  |}| |}| |}| jd k	r6| |}||7 }| |}|S r3   )rP   rQ   r^   rN   rR   rV   r0   r0   r1   rX      s    





zBottleneck.forward)r   NrY   r0   r0   r.   r1   r\   {   s     r\   c                       s&   e Zd ZdZdd fddZ  ZS )	BasicStemz$The default conv-batchnorm-relu stemNr%   c              
      s4   t  tjdddddddtdtjdd	 d S )
Nr   @   )r      rb   r   r   r   r>   Fr?   Tr@   rB   r-   r.   r0   r1   r+      s
    
zBasicStem.__init__r7   r8   r9   __doc__r+   r<   r0   r0   r.   r1   r_      s   r_   c                       s&   e Zd ZdZdd fddZ  ZS )R2Plus1dStemzRR(2+1)D stem is different than the default one as it uses separated 3D convolutionNr`   c                    sZ   t  tjdddddddtdtjdd	tjdd
dddddtd
tjdd	 d S )Nr   -   )r   rb   rb   rc   )r   r   r   Fr?   Tr@   ra   rA   r   r   r   )r   r   r   rB   rd   r.   r0   r1   r+      s    

zR2Plus1dStem.__init__re   r0   r0   r.   r1   rg      s   rg   c                	       s   e Zd Zdeeeef  eeeee	e
f   ee edejf eedd fddZeedd	d
Zdeeeef  eeee	e
f  eeeejdddZ  ZS )r     F.N)blockconv_makerslayersstemnum_classeszero_init_residualr%   c                    s  t    t|  d| _| | _| j||d d|d dd| _| j||d d|d dd| _| j||d d|d dd| _| j||d d	|d dd| _	t
d
| _t
d	|j || _|  D ]}t|t
jrt
jj|jddd |jdk	rbt
j|jd qt|t
jr4t
j|jd t
j|jd qt|t
jrt
j|jdd t
j|jd q|r|  D ]$}t|trrt
j|jjd qrdS )a^  Generic resnet video generator.

        Args:
            block (Type[Union[BasicBlock, Bottleneck]]): resnet building block
            conv_makers (List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]]): generator
                function for each layer
            layers (List[int]): number of blocks per layer
            stem (Callable[..., nn.Module]): module specifying the ResNet stem.
            num_classes (int, optional): Dimension of the final FC layer. Defaults to 400.
            zero_init_residual (bool, optional): Zero init bottleneck residual BN. Defaults to False.
        ra   r   r   r4      r      r   i   ri   Zfan_outrR   )modeZnonlinearityNg{Gz?)r*   r+   r   rK   rn   _make_layerlayer1layer2layer3layer4rC   ZAdaptiveAvgPool3davgpoolZLinearrZ   fcmodules
isinstancerD   initZkaiming_normal_weightr(   Z	constant_rE   Znormal_r\   Zbn3)r-   rk   rl   rm   rn   ro   rp   mr.   r0   r1   r+      s2    
zVideoResNet.__init__rT   c                 C   sT   |  |}| |}| |}| |}| |}| |}|d}| |}|S rH   )rn   ru   rv   rw   rx   ry   flattenrz   )r-   rU   r0   r0   r1   rX      s    







zVideoResNet.forwardr   )rk   rM   rL   blocksr#   r%   c           
   	   C   s   d }|dks| j ||j krV||}ttj| j ||j d|ddt||j }g }||| j |||| ||j | _ td|D ]}	||| j || qtj| S )Nr   F)r'   r#   r(   )	rK   rZ   r5   rC   rO   rD   rE   appendrange)
r-   rk   rM   rL   r   r#   rN   Z	ds_striderm   ir0   r0   r1   rt   	  s    
zVideoResNet._make_layer)rj   F)r   )r7   r8   r9   r	   r
   rI   r\   r   r   rG   r=   r   r:   r   rC   r[   boolr+   r   rX   rO   rt   r<   r0   r0   r.   r1   r      s*     4 .)rk   rl   rm   rn   weightsprogresskwargsr%   c                 K   sP   |d k	rt |dt|jd  t| |||f|}|d k	rL||j|dd |S )Nro   
categoriesT)r   Z
check_hash)r   lenmetar   Zload_state_dictZget_state_dict)rk   rl   rm   rn   r   r   r   modelr0   r0   r1   _video_resnet#  s    	r   )r   r   zKhttps://github.com/pytorch/vision/tree/main/references/video_classificationzThe weights reproduce closely the accuracy of the paper. The accuracies are estimated on video-level with parameters `frame_rate=15`, `clips_per_video=5`, and `clip_len=16`.)Zmin_sizer   ZrecipeZ_docsc                	   @   s@   e Zd Zedeedddeddddd	id
dddZeZdS )r   z7https://download.pytorch.org/models/r3d_18-b3b3357e.pthp   r   rq      Z	crop_sizeZresize_sizeiP5Kinetics-400gO@g-T@zacc@1zacc@5gK7YD@g"_@Z
num_paramsZ_metricsZ_ops
_file_sizeurlZ
transformsr   N	r7   r8   r9   r   r   r   _COMMON_METAKINETICS400_V1DEFAULTr0   r0   r0   r1   r   B  s   c                	   @   s@   e Zd Zedeedddeddddd	id
dddZeZdS )r   z7https://download.pytorch.org/models/mc3_18-a90a0ba3.pthr   r   r   iPu r   g{GO@gQU@r   gClE@gtVF@r   r   Nr   r0   r0   r0   r1   r   V  s   c                	   @   s@   e Zd Zedeedddeddddd	id
dddZeZdS )r   z<https://download.pytorch.org/models/r2plus1d_18-91a641e6.pthr   r   r   ir   gʡP@g33333U@r   gOnBD@g1Z^@r   r   Nr   r0   r0   r0   r1   r   j  s   Z
pretrained)r   T)r   r   )r   r   r   r%   c                 K   s.   t | } tttgd ddddgt| |f|S )a  Construct 18 layer Resnet3D model.

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.R3D_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.R3D_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.R3D_18_Weights
        :members:
    r]   r   )r   verifyr   rI   r   r_   r   r   r   r0   r0   r1   r   ~  s    

c                 K   s4   t | } tttgtgd  ddddgt| |f|S )a  Construct 18 layer Mixed Convolution network as in

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.MC3_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.MC3_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.MC3_18_Weights
        :members:
    r   r   )r   r   r   rI   r   rG   r_   r   r0   r0   r1   r     s    

c                 K   s.   t | } tttgd ddddgt| |f|S )a  Construct 18 layer deep R(2+1)D network as in

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.R2Plus1D_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.R2Plus1D_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.R2Plus1D_18_Weights
        :members:
    r]   r   )r   r   r   rI   r=   rg   r   r0   r0   r1   r     s    

)
_ModelURLs)r   r   r   )6	functoolsr   typingr   r   r   r   r   r   r	   r
   Ztorch.nnrC   Ztorchr   Ztransforms._presetsr   utilsr   Z_apir   r   r   _metar   Z_utilsr   r   __all__rD   r   rO   r=   rG   r[   rI   r\   r_   rg   r   r:   r   r   r   r   r   r   r   r   r   r   r   r   Z
model_urlsr0   r0   r0   r1   <module>   sr   (%1_$#$#$$