U
    h                     @   s   d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
 d dlmZ ddlmZmZ ddlmZ ddlmZ G d	d
 d
eZdS )    N)Path)AnyCallableDictListOptionalTupleUnion)Tensor   )find_classesmake_dataset)
VideoClips)VisionDatasetc                       s   e Zd ZdZdeeef eeeee ee	ee
 eeeef  eeeeeedd fdd	Zeeeef d
ddZee eee	ee dddZed
ddZeeeeef dddZ  ZS )UCF101a  
    `UCF101 <https://www.crcv.ucf.edu/data/UCF101.php>`_ dataset.

    UCF101 is an action recognition video dataset.
    This dataset consider every video as a collection of video clips of fixed size, specified
    by ``frames_per_clip``, where the step in frames between each clip is given by
    ``step_between_clips``. The dataset itself can be downloaded from the dataset website;
    annotations that ``annotation_path`` should be pointing to can be downloaded from `here
    <https://www.crcv.ucf.edu/data/UCF101/UCF101TrainTestSplits-RecognitionTask.zip>`_.

    To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5``
    and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two
    elements will come from video 1, and the next three elements from video 2.
    Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all
    frames in a video might be present.

    Internally, it uses a VideoClips object to handle clip creation.

    Args:
        root (str or ``pathlib.Path``): Root directory of the UCF101 Dataset.
        annotation_path (str): path to the folder containing the split files;
            see docstring above for download instructions of these files
        frames_per_clip (int): number of frames in a clip.
        step_between_clips (int, optional): number of frames between each clip.
        fold (int, optional): which fold to use. Should be between 1 and 3.
        train (bool, optional): if ``True``, creates a dataset from the train split,
            otherwise from the ``test`` split.
        transform (callable, optional): A function/transform that takes in a TxHxWxC video
            and returns a transformed version.
        output_format (str, optional): The format of the output video tensors (before transforms).
            Can be either "THWC" (default) or "TCHW".

    Returns:
        tuple: A 3-tuple with the following entries:

            - video (Tensor[T, H, W, C] or Tensor[T, C, H, W]): The `T` video frames
            -  audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
               and `L` is the number of points
            - label (int): class of the video clip
    r   NTr   THWC)rootannotation_pathframes_per_clipstep_between_clips
frame_ratefoldtrain	transform_precomputed_metadatanum_workers_video_width_video_height_video_min_dimension_audio_samplesoutput_formatreturnc                    s   t  | d|  kr dks0n td| d}|| _|| _t| j\| _}t| j||d d| _	dd | j	D }t
|||||	|
|||||d}|| _| ||||| _|| j| _|| _d S )	Nr      z$fold should be between 1 and 3, got )Zavi)Zis_valid_filec                 S   s   g | ]}|d  qS )r    .0xr#   r#   M/var/www/html/venv/lib/python3.8/site-packages/torchvision/datasets/ucf101.py
<listcomp>R   s     z#UCF101.__init__.<locals>.<listcomp>)r   r   r   r   r   r    )super__init__
ValueErrorr   r   r   r   classesr   samplesr   full_video_clips_select_foldindicesZsubsetvideo_clipsr   )selfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r    
extensionsZclass_to_idx
video_listr1   	__class__r#   r'   r*   6   s4    zUCF101.__init__)r!   c                 C   s   | j jS N)r.   metadatar2   r#   r#   r'   r8   h   s    zUCF101.metadata)r4   r   r   r   r!   c           
   	      s   |rdnd}| d|dd}t j||}t  t|8}| }dd |D }fdd|D } | W 5 Q R X  fd	dttD }	|	S )
Nr   testlist02dz.txtc                 S   s   g | ]}|  d d qS ) r   )stripsplitr$   r#   r#   r'   r(   s   s     z'UCF101._select_fold.<locals>.<listcomp>c                    s&   g | ]}t jj jf|d  qS )/)ospathjoinr   r?   r$   r9   r#   r'   r(   t   s     c                    s   g | ]}|  kr|qS r#   r#   )r%   i)selected_filesr4   r#   r'   r(   v   s      )	rA   rB   rC   setopen	readlinesupdaterangelen)
r2   r4   r   r   r   namefZfiddatar0   r#   )rE   r2   r4   r'   r/   l   s    
zUCF101._select_foldc                 C   s
   | j  S r7   )r1   Z	num_clipsr9   r#   r#   r'   __len__y   s    zUCF101.__len__)idxr!   c                 C   sF   | j |\}}}}| j| j|  d }| jd k	r<| |}|||fS )Nr   )r1   Zget_clipr-   r0   r   )r2   rP   ZvideoZaudioinfoZ	video_idxlabelr#   r#   r'   __getitem__|   s
    

zUCF101.__getitem__)r   Nr   TNNr   r   r   r   r   r   )__name__
__module____qualname____doc__r	   strr   intr   boolr   r   r   r*   propertyr8   r   r/   rO   r   r
   rS   __classcell__r#   r#   r5   r'   r      sH   .            
2r   )rA   pathlibr   typingr   r   r   r   r   r   r	   Ztorchr
   folderr   r   Zvideo_utilsr   Zvisionr   r   r#   r#   r#   r'   <module>   s   $