U
    h=C                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZmZ d dlZd dlmZmZmZmZ ddlmZ edZejfeeeeedd	d
ZdejeeeejdddZG dd dZeedddZG dd dZdS )    NFraction)	AnyCallablecastDictListOptionalTupleTypeVarUnion)_probe_video_from_file_read_video_from_file
read_videoread_video_timestamps   )tqdmT)ptstimebase_fromtimebase_to
round_funcreturnc                 C   s   t | d| | }||S )zconvert pts between different time bases
    Args:
        pts: presentation timestamp, float
        timebase_from: original timebase. Fraction
        timebase_to: new timebase. Fraction
        round_func: rounding function.
    r   r   )r   r   r   r   Znew_pts r   R/var/www/html/venv/lib/python3.8/site-packages/torchvision/datasets/video_utils.pypts_convert   s    r   )tensorsizestepdilationr   c                 C   s   |   dkrtd|    | d}|  }|| || f}|||d  d  | d |f}|d dk rtd|f}t| ||S )a   
    similar to tensor.unfold, but with the dilation
    and specialized for 1d tensors

    Returns all consecutive windows of `size` elements, with
    `step` between windows. The distance between each element
    in a window is given by `dilation`.
    r   z*tensor should have 1 dimension instead of r   )dim
ValueErrorZstridenumeltorchZ
as_strided)r   r   r   r   Zo_strider"   Z
new_stridenew_sizer   r   r   unfold   s    	
 r%   c                   @   sR   e Zd ZdZee ddddZedddZee	ee e
e f d	d
dZdS )_VideoTimestampsDatasetz
    Dataset used to parallelize the reading of the timestamps
    of a list of videos, given their paths in the filesystem.

    Used in VideoClips and defined at top level, so it can be
    pickled when forking.
    N)video_pathsr   c                 C   s
   || _ d S Nr'   )selfr'   r   r   r   __init__8   s    z _VideoTimestampsDataset.__init__r   c                 C   s
   t | jS r(   lenr'   r*   r   r   r   __len__;   s    z_VideoTimestampsDataset.__len__idxr   c                 C   s   t | j| S r(   )r   r'   )r*   r2   r   r   r   __getitem__>   s    z#_VideoTimestampsDataset.__getitem__)__name__
__module____qualname____doc__r   strr+   intr0   r
   r	   floatr3   r   r   r   r   r&   /   s   r&   )xr   c                 C   s   | S )zH
    Dummy collate function to be used with _VideoTimestampsDataset
    r   )r;   r   r   r   _collate_fnB   s    r<   c                   @   s  e Zd ZdZd.ee eeee ee	ee
f  eeeeeeeedddd	Zdd
ddZe	ee
f ddddZee	ee
f d
ddZee d dddZed/ejeeee ee eejeee ejf f dddZd0eeee ddddZed
ddZed
ddZed
dd Zeeeef d!d"d#Zeeeeeeejf d$d%d&Zeeejeje	ee
f ef d!d'd(Ze	ee
f d
d)d*Ze	ee
f dd+d,d-Z dS )1
VideoClipsaE  
    Given a list of video files, computes all consecutive subvideos of size
    `clip_length_in_frames`, where the distance between each subvideo in the
    same video is defined by `frames_between_clips`.
    If `frame_rate` is specified, it will also resample all the videos to have
    the same frame rate, and the clips will refer to this frame rate.

    Creating this instance the first time is time-consuming, as it needs to
    decode all the videos in `video_paths`. It is recommended that you
    cache the results after instantiation of the class.

    Recreating the clips for different clip lengths is fast, and can be done
    with the `compute_clips` method.

    Args:
        video_paths (List[str]): paths to the video files
        clip_length_in_frames (int): size of a clip in number of frames
        frames_between_clips (int): step (in frames) between each clip
        frame_rate (float, optional): if specified, it will resample the video
            so that it has `frame_rate`, and then the clips will be defined
            on the resampled video
        num_workers (int): how many subprocesses to use for data loading.
            0 means that the data will be loaded in the main process. (default: 0)
        output_format (str): The format of the output video tensors. Can be either "THWC" (default) or "TCHW".
       r   Nr   THWC)r'   clip_length_in_framesframes_between_clips
frame_rate_precomputed_metadatanum_workers_video_width_video_height_video_min_dimension_video_max_dimension_audio_samples_audio_channelsoutput_formatr   c                 C   s   || _ || _|| _|| _|	| _|
| _|| _|| _| | _	| j	dkrTt
d| d|d krf|   n
| | | ||| d S )N)r?   TCHWz5output_format should be either 'THWC' or 'TCHW', got .)r'   rD   rE   rF   rG   rH   rI   rJ   upperrK   r!   _compute_frame_pts_init_from_metadatacompute_clips)r*   r'   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   r   r   r   r+   d   s    



zVideoClips.__init__r,   c              	      s   g | _ g | _dd l  jjjt| jd| jt	d}t
t|dT}|D ]H}|d tt| \}} fdd|D }| j | | j| qFW 5 Q R X d S )Nr   r>   )Z
batch_sizerD   Z
collate_fn)totalr   c                    s   g | ]} j | jd qS )Zdtype)	as_tensorlong).0r   r#   r   r   
<listcomp>   s     z1VideoClips._compute_frame_pts.<locals>.<listcomp>)	video_pts	video_fpsZtorch.utils.datautilsdataZ
DataLoaderr&   r'   rD   r<   r   r.   updatelistzipextend)r*   dlZpbarbatchZ	batch_ptsZ	batch_fpsr   rW   r   rO      s     
zVideoClips._compute_frame_pts)metadatar   c                 C   sV   |d | _ t| j t|d ks$t|d | _t| j t|d ksHt|d | _d S )Nr'   rY   rZ   )r'   r.   AssertionErrorrY   rZ   )r*   rc   r   r   r   rP      s
    

zVideoClips._init_from_metadatac                 C   s   | j | j| jd}|S )Nr'   rY   rZ   re   )r*   	_metadatar   r   r   rc      s
    zVideoClips.metadata)indicesr   c                    s~    fdd|D } fdd|D } fdd|D }|||d}t  | j j j| j j j j j j	 j
 jdS )Nc                    s   g | ]} j | qS r   r)   rV   ir/   r   r   rX      s     z%VideoClips.subset.<locals>.<listcomp>c                    s   g | ]} j | qS r   )rY   rh   r/   r   r   rX      s     c                    s   g | ]} j | qS r   )rZ   rh   r/   r   r   rX      s     re   )r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   )type
num_framesr   rB   rD   rE   rF   rG   rH   rI   rJ   rK   )r*   rg   r'   rY   rZ   rc   r   r/   r   subset   s,    zVideoClips.subset)rY   rk   r   fpsrB   r   c           	      C   s   |d krd}|d kr|}t | | | }ttt|||}| | } t| ||}| sft	d t
|tr|gt | }nt|||}||fS )Nr   zThere aren't enough frames in the current video to get a clip for the given clip length and frames between clips. The video (and potentially others) will be skipped.)r.   r=   _resample_video_idxr9   mathfloorr%   r"   warningswarn
isinstanceslice)	rY   rk   r   rm   rB   Ztotal_framesZ_idxsclipsidxsr   r   r   compute_clips_for_video   s     
z"VideoClips.compute_clips_for_video)rk   r   rB   r   c           	      C   s   || _ || _|| _g | _g | _t| j| jD ]6\}}| |||||\}}| j	| | j	| q,t
dd | jD }|d | _dS )a  
        Compute all consecutive sequences of clips from video_pts.
        Always returns clips of size `num_frames`, meaning that the
        last few frames in a video can potentially be dropped.

        Args:
            num_frames (int): number of frames for the clip
            step (int): distance between two clips
            frame_rate (int, optional): The frame rate
        c                 S   s   g | ]}t |qS r   r.   rV   vr   r   r   rX      s     z,VideoClips.compute_clips.<locals>.<listcomp>r   N)rk   r   rB   ru   resampling_idxsr_   rY   rZ   rw   appendr#   rT   Zcumsumtolistcumulative_sizes)	r*   rk   r   rB   rY   rm   ru   rv   Zclip_lengthsr   r   r   rQ      s    zVideoClips.compute_clipsc                 C   s   |   S r(   )	num_clipsr/   r   r   r   r0      s    zVideoClips.__len__c                 C   s
   t | jS r(   r-   r/   r   r   r   
num_videos   s    zVideoClips.num_videosc                 C   s
   | j d S )zJ
        Number of subclips that are available in the video list.
        )r~   r/   r   r   r   r     s    zVideoClips.num_clipsr1   c                 C   s6   t | j|}|dkr|}n|| j|d   }||fS )zw
        Converts a flattened representation of the indices into a video_idx, clip_idx
        representation.
        r   r   )bisectbisect_rightr~   )r*   r2   	video_idxclip_idxr   r   r   get_clip_location	  s
    zVideoClips.get_clip_location)rk   original_fpsnew_fpsr   c                 C   sL   || }|  r$t|}td d |S tj| tjd| }| tj}|S )NrS   )	
is_integerr9   rt   r#   ZarangeZfloat32rp   toint64)rk   r   r   r   rv   r   r   r   rn     s    zVideoClips._resample_video_idxc                 C   st  ||   kr&td| d|    d| |\}}| j| }| j| | }ddlm} | }|dkr| jdkrxtd| j	dkrtd| j
dkrtd	| jdkrtd
| jdkrtd|dkr|d  }|d  }	t|||	\}
}}nt|}|j}d}tt|d  }tt|d  }d\}}tdd}t|jj|jj}|jrt|jj|jj}t|||tj}t|||tj}|j}t|| j| j	| j
| j||f|| j| j||f|d\}
}}d|i}|dk	r||d< | j dk	r&| j!| | }t"|t#j$r||d  }|
| }
| j |d< t%|
| j&ksLt'|
j( d| j& | j)dkrh|
*dddd}
|
|||fS )a7  
        Gets a subclip from a list of videos.

        Args:
            idx (int): index of the subclip. Must be between 0 and num_clips().

        Returns:
            video (Tensor)
            audio (Tensor)
            info (Dict)
            video_idx (int): index of the video in `video_paths`
        zIndex z out of range (z number of clips)r   )get_video_backendZpyavz.pyav backend doesn't support _video_width != 0z/pyav backend doesn't support _video_height != 0z6pyav backend doesn't support _video_min_dimension != 0z6pyav backend doesn't support _video_max_dimension != 0z0pyav backend doesn't support _audio_samples != 0r   N)r   r   r   )
Zvideo_widthZvideo_heightZvideo_min_dimensionZvideo_max_dimensionZvideo_pts_rangevideo_timebaseZaudio_samplesZaudio_channelsZaudio_pts_rangeaudio_timebaserZ   	audio_fpsz x rL         )+r   
IndexErrorr   r'   ru   Ztorchvisionr   rE   r!   rF   rG   rH   rI   itemr   r   rZ   r   r9   r   r   	numeratordenominatorZ	has_audior   r   ro   rp   ceilZaudio_sample_rater   rJ   rB   r{   rs   r#   Tensorr.   rk   rd   shaperK   Zpermute)r*   r2   r   r   Z
video_pathZclip_ptsr   backendZ	start_ptsZend_ptsZvideoZaudioinfo_inforZ   r   Zvideo_start_ptsZvideo_end_ptsZaudio_start_ptsZaudio_end_ptsr   r   _Zresampling_idxr   r   r   get_clip!  sz    








&zVideoClips.get_clipc                 C   sn   dd | j D }dd | j D }|r6t|}| }| j }||d< ||d< |d= |d= |d= d	|d
< |S )Nc                 S   s   g | ]}t |qS r   rx   ry   r   r   r   rX   z  s     z+VideoClips.__getstate__.<locals>.<listcomp>c                 S   s   g | ]}| tjqS r   )r   r#   r   )rV   r;   r   r   r   rX     s     video_pts_sizesrY   ru   r{   r~   r   _version)rY   r#   catnumpy__dict__copy)r*   r   rY   dr   r   r   __getstate__y  s    

zVideoClips.__getstate__)r   r   c                 C   sf   d|kr|| _ d S tj|d tjd}tj||d dd}|d= ||d< || _ | | j| j| j d S )Nr   rY   rS   r   r   )r    )	r   r#   rT   r   splitrQ   rk   r   rB   )r*   r   rY   r   r   r   __setstate__  s    zVideoClips.__setstate__)r>   r   NNr   r   r   r   r   r   r   r?   )N)N)!r4   r5   r6   r7   r   r8   r9   r	   r:   r   r   r+   rO   rP   propertyrc   rl   staticmethodr#   r   r
   r   rt   rw   rQ   r0   r   r   r   rn   r   r   r   r   r   r   r   r=   I   sl               %      (Xr=   )r   ) r   ro   rq   	fractionsr   typingr   r   r   r   r   r	   r
   r   r   r#   Ztorchvision.ior   r   r   r   r[   r   r   rp   r9   r   r   r%   r&   r<   r=   r   r   r   r   <module>   s   ,