U
    zh%                    @   s|+  U d dl Z d dlZd dlZd dlZd dlmZ d dl mZmZ d dlm	Z	m
Z
 d dlmZmZmZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlm  mZ d dlmZm Z m!Z! d dl"m#Z# d dl$m%Z% d d	lm&Z&m'Z'm(Z(m)Z)m*Z* d d
l+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z2 d dl3m4Z4 ej5j6Z6g Z7ee8 e9d< ej:j;j<Z<G dd deZ=dXeej>e?dddZ@ee@ej>jAddZBee@ej>jAdZCee@ej>jDdZEe!eFe!dddZGe#e<jHe/deCe!e!dddZHe#e<jIe/deCe!e!dddZIe#e<jJe/deCe!e!eKeKd d!d"ZJe#e<jLe/deCe!eKeKeKe?e!d#d$d%ZLe#e<jMjNgd&d' ZOe#e<jMj!ge!d(d)d*ZPe#e<jQe/ eCe!e!d+d,d-ZQe#e<jRe/deCe!e!d.d/d0ZRe#e<jSe/de!e!eKeKd1d2d3ZSe#e<jTe/ eCe!e!d+d4d5ZTe#e<jUe/ eCe!e!e!d6d7d8ZUe#e<jVe/de!e!eKd9d:d;ZVe#e<jWe/deCe!e!eKe?d<d=d>ZWe#e<jXe/deCdYe!e!e8d@dAdBZXe#e<jYeCe!e!dCdDdEZYe#e<jZe/ eCe!e!d+dFdGZZe#e<j[e/deCe!e!e!d6dHdIZ[e#e<j\e!e!e!dJdKdLZ\e#e<j]e!e!e!ee!e!f dMdNdOZ]e#e<j^e<j^j_`e6jae/ eCdZe!e!eKeKe?eejb e!dRdSdTZ^e#e<jce<jcj_`e6jaeCd[e!e!eKeKe?eejb e!dRdUdVZce#e<jde/ eCe!e!e!eKeKe?e?e!dWdXdYZde#e<jee/deCe!e!e!e!dZd[d\Zee!eFd]d^d_Zfejgd`dadbZhe#e<jie/ eCe=jjjkfe!e!eFe!dcdddeZie#e<jle/deCe!e!e!eFdfdgdhZle#e<jme/ eCe=jjjkdife!e!eFeKdjdkdlZme#e<jnj_eCe!e!e!eFeKdmdndoZne#e<jnjoeCe!e!e!eFeKe!dpdqdrZpe#e<jqj_eCe!e!e!eFeKdsdtduZqe#e<jqjreCe!e!e!eFeKe!dvdwdxZse!e!e!ee! eFeFe!e!dydzd{Zte#e<jue/deCe!e!eFe!d|d}d~Zue#e<jve/de!e!e!ee! eFeFe!e!dyddZve#e<jwe/de!e!e!ee! eFeFe!e!dyddZwe#e<jxe/ eCde=jjjkfe!e!ee! eFe!dddZxe#e<jye/deCde=jjjkfe!e!e!ee! eFe!dddZye#e<jze/ eCe=jjjkfe!e!eFe!dddZze#e<j{e/deCe=jjjkfe!e!e!eFe!dddZ{e#e<j|e/ d\e!e!eKdddZ|e#e<j}e/ e!e!e!dddZ}e#e<j~e/ e!eeF eFeFeFeFdddZ~e#e<jj!d]e!eFeeF eeF eFdddZe#e<je/ e!eeF eFeFdddZe#e<je/ e!eeF eFeFeFdddZe!e!ejgdddZe#e<je/deBe!e!eFejgdddZe#e<je/ eBe!e!eFejgdddZdd Ze#e<je/ e!eeF eeF eeF eeF e!dddZe#e<je/ eCe!eeF eeF eeF eeF eeF e!dddZe#e<je/ e!e!eKdddZe#e<je/ e!eeF eFeFeFe!dddZe#e<jj_eCd^e!e!eeK e!dddZe#e<je<jj_`e6je<jj_`e6je!eKee? dddZe#e<je/dde!eKee? dddZe#e<je/ e!eFe?dÜddńZe#e<je/ e!eFe?dÜddǄZe#e<je/ d_e!e!eFe?e?e!dɜdd˄Ze#e<je/ e!e!eFeFe?d̜dd΄ZeeF dϜddфZee! eFeFee! dҜddԄZee! d՜ddׄZee! eFd؜ddڄZee! eFeFdۜdd݄Ze#e<jj_e<jjrgd`ee! eFeFee! e!dޜddZe#e<jdae!eeF eFee! dddZe#e<jj_e<jjrgdbe!eeF eFeee!  eee!  dddZe#e<jj!dce!eFeFee!df dddZe#e<jj_dde!eeF eFee!df dddZe#e<jj!dee!eFeFee!df dddZe<jj`e6jdfe!e!eFee!df dddZe#e<je/ eCdge!e!e!eFeFdddZe#e<je/ eCdhe!e!e!eFeFe?dddZe#e<je/ eCdie!e!e!eFeFdddZe#e<jj_eCe!e!e!e!ee! eFeFeFeFee? eee! ee! ee! f dddZe#e<jjre!e!e!e!ee! eFeFeFeFee? ej!ej!ej!eee! ee! ee! f d ddZee! ee! dddZe#e<jj_e!e!eeF e!e!ee! ee! ee? eee! ee! ee! f d	ddZe#e<jjre!e!eeF e!e!ee! ee! ee? ej!ej!ej!eee! ee! ee! f d	d
dZe!ee! ee! ee! ee! e?eKeKe?ee!e!e!ee! ee! f d
ddZe#e<je/ddde!ee! ee! ee! ee! e?eKeKee!e!e!f d	ddZe<jj_`e6je<jj_`e6je!ee! ee! ee! ee! e?eKeKee!e!e!f d	ddZe<jj_`e6jdjee! dddZe#e<jj_e!ee! ee! e!e!eKeKee!e!e!f dddZe#e<jj_e!ee! ee! e!e!e?eKeKee!e!e!f d	ddZe#e<jje!ee! ee! e?eKeKee!e!e!f dd d!Ze#e<jj_e!ee! ee! e!e!e?eKeKee!e!e!e!e!f d	d"d#Ze!ee! ee! e!e!eKe?e!d$d%d&Ze#e<jj_e!ee! ee! e!e!eKeKee!e!e!e!f dd'd(Ze#e<jj_e!ee! ee! e!e!eKeKee!e!e!e!e!e!f dd)d*Ze#e<jj_e!ee! ee! e!e!eKeKee!e!e!e!f dd+d,Ze#e<je/ddeCdkd-d.Ze#e<je/ ddddddd/e!eejg eej e?e?eej d0d1d2Ze#e<je<je<jge/ d3d4 Ze<jj_`e6je#e<je/ddd5d6e!e!ee! ee! ee! e?eKeKd7d8d9Zd:d; Ze#e<jj_e!e!ee! ee! ee! ee! ee! e?eKee? e!ee!ee! ee! f d<d=d>Ze#e<jj_e!e!ee! ee! ee! ee! ee! e?eKee? ee!ee! ee! f d?d@dAZe#e<jjre!e!ee! ee! ee! ee! ee! e?eKee? ej!ej!ej!ee!ee! ee! f dBdCdDZe#e<jƃe/ddd5e!e!e!ee! ee! ee! ee! eKdEdFdGZe#e<jǃe/ddd5e!e!e!ee! ee! ee! ee! eKe!dH	dIdJZe#e<jȃe/ eCe!eeFeFf dKdLdMZe#e<jʃddNe)eFe)e)e'dOdPdQZe#e<j˃e/ ddNe)eFe)e)e'dOdRdSZddNe)eFe)e)e?e'dTdUdVZe#e<jj_e<jj_`e6jdldXdYZe#e<j΃e)eFe)e)dZd[d\Ze#e<jσe/ e)eFe)e)dZd]d^Ze)eFe)e)e?d_d`daZe#e<jуe/dbdceCe!ee!e!f d+dddeZe#e<j҃e/ dme!ee?eFeKf ee?eFeKf eejb dfdgdhZe#e<jӃdndidjZӐdkdl ZԐdmdn Ze#e<jj׃e#e<jj׃e#e<jj׃e<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6je!eeeF  eeeK  e!dodpdqZe#e<jj׃e#e<jj׃e#e<jj׃e<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6je!eeeF  eeeK  e!dodrdsZސdodtduZe#e<jj_e<jjrge<jj_`e6je<jj_`e6je/dddvdpe!eeF eeK e!dwdxdyZe#e<jj_e<jjrge<jj_`e6je<jj_`e6je/dddvdqe!eeF eeK e!dwdzd{Ze#e<jj_e<jjrge<jj_`e6je<jj_`e6je/dddvdre!eeF eeK eeK e!d|d}d~Ze#e<jj_e<jjrge<jj_`e6je<jj_`e6je/dddvdse!eeF eeK eeK e!d|ddZe#e<jj_e<jjrge<jj_`e6je<jj_`e6je/dddvdte!eeF eeK eeK eeK e!dddZe#e<jj_e<jjrge<jj_`e6je<jj_`e6je/dddvdue!eeF eeK eeK eeK e!dddZeCdve!eeF eeeK  e?e!dddZdd Zdd Zdd Zdd ZdwddZdd Zdd ZdxddZdyddZdd Ze#e<jje<jj`e6je<jj`e6jdd Ze#e<jje<jj`e6je<jj`e6jdd Ze#e<jje<jj`e6je<jj`e6jdd Ze#e<jje<jj`e6je<jj`e6jdd Zdd ZdzddZd{ddZdd Ze#e<jje<jj`e6je<jj`e6jdd Ze#e<jje<jj`e6je<jj`e6jdd Zdd Zdd Ze#e<jje<jj`e6je<jj`e6jdd Ze#e<jje<jj`e6je<jj`e6jdd Ze#e<j j׃e<j jנ`e6je<j jנ`e6jdd Ze#e<jj׃e<jjנ`e6je<jjנ`e6jdd Ze#e<jj׃e#e<jj׃e<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6jdd Ze#e<jj_e<jjrge/ d|e!eeF e?eeK e!dddZe#e<jj_e<jjrge<jj_`e6je/ d}e!eeF e?eeK eeK e!ddÐdĄZe#e<jj_e<jjrge/ d~e!eeF e?eeK eeK eeK e!dŜdƐdǄZddȐdɄZdʐd˄ Z	ee! ee! e!e!d̜d͐d΄Z
e*e!dϜdАdфZeCe!eeF e?eeeK  e!dҜdӐdԄZe#e<jj_e!e!e?d՜d֐dׄZe#e<je<jge/ dؐdل Ze#e<jgdڐdۄ Ze!e!ee! eFeFee!e!f dܜdݐdބZe#e<je/dbd߃e!e!ee! eFeFee!e!f dܜddZe#e<je/dbd߃e!e!ee! eFeFee!e!f dܜddZe!eKe!dddZe!eKe!dddZe!e*dddZe*e!e!dddZee! e!dddZeFe?ejgejdddZe!eFeFe?dddZe!eFeFeFe?dddZe!eeF e?dddZe!eeF e?dddZe#e<je/ eCe!eeF e?dd dZde!e!eFeFe?e?e!dddZ e#e<j!e/ eCde!e!eFeFe?e!dddZ!e#e<j"e/ eCdd	 Z"e#e<j#e/ dde=jjjkfd
dZ#ej!ej!e?e?dddZ$e<j%j_`e6je<j%jr`e6je/ddddddZ%e#e<j&j_e<j&jrge<j&j_`e6je/ eCde!eeFeFf e?eeK eeK e!dddZ'e#e<j&j׃e<j&jנ`e6je<j&jנ`e6je/ eCde!eeeFeFf  e?eeeKeKf  e!dddZ(e#e<j)e#e<j*e#e<j+eCe/ e!eeFdf e!dddZ,e#e<j-e#e<j.e#e<j/eCe/ e!eeFdf e!dddZ0e!eeFdf eeFeFeFge!f e!ddd Z1e#e<j2e/d!d"ddd#d$d%Z2e#e<j3e/ ddd`d&d'Z3e#e<j4j_e<j4jrge/ dej5ddd(e'eejg ej6eej e?d)d*d+Z7e#e<j4j8gdej5ddd(e'e'eejg ej6eej e?d,d-d.Z9e#e%d/d0 Z:e#e<j;e<j;j_`e6je/ ddde=jjjkfe!e!e'e'ee! eFe!d1d2d3Z;e#e<j<e<j<j_`e6je/dbd4e!e!eFee!e!f dd5d6Z<e#e<j=j_dddd7e!e!e!eKe?ee! eeK ee!e!f d8d9d:Z>d;d< Z?e#e<j@ge/ eCdd=d>Z@e#e<jAe/ d?d@ ZAe#e<jBdAdB ZBe#e<jCj_e<jCjrgdddCe!eejg ee! e!dDdEdFZDe#e<jEj_e<jEjFgde!eeF dGdHdIZGe#ej;j<jHddJdKZHe#e<jIe/ dddLdMdNZIddOdPdQZJdddLdRdSZKe#e<jLe/ dTdU ZLe#e<jMddVdWZMe?e<jNe<jO e?e<jPe<j e?e<jQe<j e?e<jRe<j@ e?e<jSe<jM e?e<jTe<jU e?e<jVe<jT e?e<jWe<jX e?e<jYe<jQ e?e<jZe<j[ e?e<j\e<j] e?e<j^e<j_ e?e<j`e<ja e?e<jbe<jc e?e<jde<je e?e<jfe<jg e?e<jhe<ji e?e<jje<jk e?e<jle<jm e?e<jne<jo e?e<jpe<jq e?e<jre<js e?e<jte<ju e?e<jve<jw e?e<jxe<jZ dS (      N)Enum)partialreduce)chainproduct)AnyCallablecastIterableListOptionalTupleUnion)	sym_floatsym_intTensorregister_decomposition)	out_dtype)IntLike
NumberTypesuggest_memory_format
TensorLikeTensorSequenceType)_maybe_convert_to_dtype_maybe_resize_out_safe_copy_outout_wrapper)_pytree)tree_map__all__c                   @   s   e Zd ZdZdZdZdS )	Reductionr         N)__name__
__module____qualname__NONEMEANSUM r*   r*   N/var/www/html/venv/lib/python3.8/site-packages/torch/_decomp/decompositions.pyr!   +   s   r!   F)ftype_promotioncompute_dtype_onlyc                    s   t  fdd}|S )Nc                     sn   dd t j| |D }tj|di\  fdd}fdd}t|| t||}r`|S t||S d S )Nc                 S   s   g | ]}t |tr|qS r*   )
isinstancer   .0xr*   r*   r+   
<listcomp>;   s    
 z-type_casts.<locals>.inner.<locals>.<listcomp>type_promotion_kindc                    s   t | tr|  S | S d S Nr/   r   tor2   computation_dtyper*   r+   increase_precC   s    

z0type_casts.<locals>.inner.<locals>.increase_precc                    s   t | tr|  S | S d S r5   r6   r8   )result_dtyper*   r+   decrease_precI   s    

z0type_casts.<locals>.inner.<locals>.decrease_prec)pytreeZarg_tree_leavesutilselementwise_dtypesr   )argskwargsZ	flat_argsr;   r=   rr.   r,   r-   )r:   r<   r+   inner9   s    

ztype_casts.<locals>.inner)	functoolswraps)r,   r-   r.   rE   r*   rD   r+   
type_casts4   s    rH   T)r-   r.   )r-   )r2   dimreturnc                 C   s$   t ||   D ]}| d} q| S N)rangerI   	unsqueeze)r2   rI   _r*   r*   r+   _unsqueeze_to_dimf   s    rP   
grad_inputout_gradyc                 C   s   | d||     S Nr"   Zconj_physicalrR   r*   r*   r+   tanh_backwardl   s    rW   c                 C   s   | |d|     S rU   rV   rR   r*   r*   r+   sigmoid_backwards   s    rX   )rS   r2   beta	thresholdc                 C   s.   ||   }t|| |k| | | |d  S N      ?)exptorchwhere)rS   r2   rY   rZ   zr*   r*   r+   softplus_backwardz   s    ra   )grad_outputalphascaleinput_scale	is_resultself_or_resultc           	      C   sf   || }|}|}|r6t |dk| | ||  | | S t |dk| | | t ||  | | S d S Nr   )r^   r_   r]   )	rb   rc   rd   re   rf   rg   ZnegcoefZposcoefZ
negiptcoefr*   r*   r+   elu_backward   s    ri   c                 C   s   t | |S r5   )r^   Z	full_likeselfvaluer*   r*   r+   fill_scalar   s    rm   rl   c                    s(   t   dk fdd t|  S )Nr   c                      s   d    dS )Nz@fill only supports 0-dimension value tensor but got tensor with z dimensionsrI   r*   rn   r*   r+   <lambda>       zfill_tensor.<locals>.<lambda>)r^   _checkrI   atencopyrj   r*   rn   r+   fill_tensor   s
    

ru   )rk   rJ   c                 C   s    t jt j| d ddddd S N   r   min   maxr^   clamprk   r*   r*   r+   hardsigmoid   s    r   rb   rk   c                 C   s   t |dk|dk @ | d dS )Ng      g      @gUUUUUU?        r^   r_   r   r*   r*   r+   hardsigmoid_backward   s
    r   rb   rk   Zmin_valZmax_valc                 C   s   t ||k||kB d| S )Nr   r   r   r*   r*   r+   hardtanh_backward   s    r   c                 C   s$   | t jt j| d dddd d S rv   r}   r   r*   r*   r+   	hardswish   s    r   )rb   rk   rJ   c              
   C   s,   t |dk dt |dk| |d d  | S )Nr   rw         ?r   r   r*   r*   r+   hardswish_backward   s
    r   rb   rk   rZ   c                 C   s   t ||kd| S rh   r   r   r*   r*   r+   threshold_backward   s    r   rb   rk   negative_slopeself_is_resultc                 C   s   t |dk| | | S rh   r   r   r*   r*   r+   leaky_relu_backward   s    r   none)gradrk   approximatec                 C   s   d}d}d}|dkr|| d }d}|| }|| }	||||	   }
t |
}d| }d| }d| }d||  }|dd| |   }|| | }| ||  S |}|| d }ddt ||   }|t || d	  }| |||   S d S )
Ng;f?g;f?gmBP?tanhr   gHm?r"   rw   g      )r^   r   erfr]   )r   rk   r   ZM_SQRT2Z	M_SQRT1_2Z
M_2_SQRTPIZkBetaZkKappaZx_sqZx_cuberE   Z
tanh_innerleftrightZleft_derivativeZtanh_derivativeZinner_derivativeZright_derivativeZkAlphaZcdfZpdfr*   r*   r+   gelu_backward   s,    
r   )rb   inputc                 C   s:   t t|}t |}|| d||   }| ||  S rU   )r^   r   FZsoftplussigmoid)rb   r   Zinput_tanh_softplusZinput_sigmoidoutr*   r*   r+   mish_backward  s    
r   c                 C   s   | t |  S r5   )r^   r   r   r*   r*   r+   silu  s    r   c                 C   s,   ddt |   }| | d|d|    S rU   )r^   r]   )rb   rk   r   r*   r*   r+   silu_backward  s    r   )rk   weightrJ   c                 C   s   t | dk| ||  S rh   r   )rk   r   r*   r*   r+   _prelu_kernel   s    r   )rb   rk   r   rJ   c                 C   s4   t |dk| ||  }t |dkd||  }||fS )Nr   r   r   )rb   rk   r   Z
input_gradZweight_gradr*   r*   r+   _prelu_kernel_backward%  s    r         ?UUUUUU?)rk   noiseloweruppertraining	generatorrJ   c           
      C   sl   |d kst |rP| dk}t| ||}t|| | | }|t||d |S || d }	t| |	S d S )Nr   r"   r#   )AssertionErrorrs   uniformr^   r_   copy_
leaky_relu)
rk   r   r   r   r   r   Znot_positiverC   outputr   r*   r*   r+   rrelu_with_noise0  s    r   c              	   C   s   |  t| |||||S r5   )r   r   )rk   r   r   r   r   r   r*   r*   r+   rrelu_with_noise_H  s    r   )rb   rk   r   r   r   r   r   rJ   c                 C   s:   |r|| dkr|  |S || d }t| |||S d S )Ngư>r#   )mulrs   r   )rb   rk   r   r   r   r   r   r   r*   r*   r+   rrelu_with_noise_backwardV  s    
   r   )rb   rk   bufferrJ   c                 C   sN   |dk }t |dd}t |dd}t t | }| |||d|     S )Nr   r"   rL   )r^   r_   r]   abs)rb   rk   r   Zin_negativeZ	max_derivsignr`   r*   r*   r+   log_sigmoid_backwardk  s
    r   loss	reductionc                 C   s4   |t jjkrt| S |t jjkr,t| S | S d S r5   )r!   r(   rl   r^   meanr)   sumr   r*   r*   r+   apply_loss_reductionx  s
    

r   dtypec                 C   s4   | t jkrt jS | t jkr t jS | t jkr0t jS d S r5   )r^   Z	complex32Zfloat16Z	complex64float32Z
complex128Zfloat64r   r*   r*   r+   to_real_dtype  s    


r   )rk   targetr   rJ   c                 C   s   | | d }t ||S )Nr#   )r   )rk   r   r   r   r*   r*   r+   mse_loss  s    r   )rb   r   r   r   c                 C   s,   |t jjkrd|  nd}|||  |  S )N       @)r!   r(   rl   numel)rb   r   r   r   normr*   r*   r+   mse_loss_backward  s    r   r\   )rk   r   r   rY   c                 C   s<   | |   }t||k d|d  | |d|  }t||S )Nr   r#   )r   r^   r_   r   )rk   r   r   rY   r   r*   r*   r+   smooth_l1_loss  s    	&r   )rb   rk   r   r   rY   c           	      C   sZ   |t jjkrd|  nd}|| }t|}||  }t||k || | |t| S r[   )r!   r(   rl   r   r^   r   r_   r   )	rb   rk   r   r   rY   r   r2   Zabs_xZ	norm_gradr*   r*   r+   smooth_l1_loss_backward  s    

r   )rb   rk   r   r   rY   rQ   c                 C   s*   t | ||||}t||j t||ddS NTZ	copy_fromZcopy_toexact_dtype)r   r   shaper   )rb   rk   r   r   rY   rQ   resultr*   r*   r+   smooth_l1_loss_backward_out  s    
r   )rb   rk   r   r   deltac              
   C   s`   |t jjkrd|  nd}|| }t|| k | |  | t||k||  | || |  S r[   )r!   r(   rl   r   r^   r_   )rb   rk   r   r   r   r   r2   r*   r*   r+   huber_loss_backward  s     r   )rb   rk   r   r   r   rQ   c                 C   s*   t | ||||}t||j t||ddS r   )r   r   r   r   )rb   rk   r   r   r   rQ   r   r*   r*   r+   huber_loss_backward_out  s    
r   )rb   rk   r   r   r   ignore_indextotal_weightrJ   c                 C   s   |  dk rdnd}|tjjkr(| | } ||}t||k|d}t|}	t|	||d}	|	  |     krzdkrn n
| |} |d k	rdd t	|  D }
|j
d |
|< ||
}| | } t||k| d} |	|  S )Nr#   r   r"   g      c                 S   s   g | ]}d qS r"   r*   r1   rO   r*   r*   r+   r3     s     z&_nll_loss_backward.<locals>.<listcomp>)rI   r!   r(   rl   rN   r^   r_   
zeros_likescatterrM   r   reshape)rb   rk   r   r   r   r   r   channel_dimsafe_targetrQ   Z	new_shaper*   r*   r+   _nll_loss_backward  s     	

 

r   )rb   rk   rI   rJ   c           
      C   s   |  dkstdt|  |}||}|d dksNtd| d| |d }||d|}||||}t|}d| | | |  }	||  }tj||	g|dS )Nr   z*glu does not support 0-dimensional tensorsr#   z.Halving dimension must be even, but dimension z	 is size r\   ro   )	rI   r   r?   canonicalize_dimsizenarrowr^   r   cat)
rb   rk   rI   Zwrap_dimZnInZ	inputSizeZ	firstHalfZ
secondHalfZgradInputFirstHalfZgradInputSecondHalfr*   r*   r+   glu_backward  s    


r   c                 C   sx  d|    krdks"n td|  dks6td|  dkoL|  dk}|s|jd |jd kstd|j d|j d| dkstd	|j d
|  df|d ks| |jd kstd|tjjkr8|  dkr8|   dkr| jd |jd ksdtd|jd  d|    d| jd  n,|   dkrT|  dksdtd| j t| ||||||S )Nr   r#   input tensor should be 1D or 2Dr"   ;0D or 1D target tensor expected, multi-target not supportedsize mismatch (got input: 
, target: ):expected total_weight to be a single element tensor, got: z (z
 elements)rL   z<weight tensor should be defined either for all or no classesz7Expected a tensor of dimension 1 and tensor.size[0] == z but got: dimension z and tensor.size[0] == z7Expected a single element grad_output tensor, but got: )rI   r   r   r   r!   r'   rl   r   )rb   rk   r   r   r   r   r   no_batch_dimr*   r*   r+   nll_loss_backward$  sP    "
&$


      r   c                 C   s   |  dkstd|   |  dks<td|   |jd |jd krx|jd |jd krx|jd |jd kstd|j d	|j | dkstd
|j d|  dt| ||||||S )N   zSonly batches of spatial inputs supported (4D tensors), but got input of dimension: rw   zUonly batches of spatial targets supported (3D tensors) but got targets of dimension: r   r#   r"   r   r   r   z ( z, elements))rI   r   r   r   r   )rb   rk   r   r   r   r   r   r*   r*   r+   nll_loss2d_backwardP  s8    

      r   )rk   r   r   r   rJ   c              	   C   s\   |d t t |  | dd |t t | | dd  }|d k	rR|| }t||S )Nr"   r*   i)r^   maximumlog1pnew_fulllogr   )rk   r   r   r   r   r*   r*   r+   binary_cross_entropys  s    

 
r   )rb   rk   r   r   r   rJ   c                 C   sR   d}| ||  t j|d|  |d }|d k	r6|| }|tjjkrN||  }|S )Ng-q=r"   rx   )r^   r~   r!   r(   rl   r   )rb   rk   r   r   r   ZEPSILONr   r*   r*   r+   binary_cross_entropy_backward  s    
"r   )r   r   r   rJ   c                 C   s    t t |  | }t||S r5   )r^   r   r]   r   )r   r   r   r   r*   r*   r+   soft_margin_loss  s    r   )rb   rk   r   r   rJ   c                 C   s6   ||  t || d  }|tjjkr2||  }|S rU   )r^   r   r!   r(   rl   r   )rb   rk   r   r   rQ   r*   r*   r+   soft_margin_loss_backward  s    	r   r#   r   otherpc                 C   s   t j| | |dS )N)r   )rs   r   r   r*   r*   r+   dist  s    r   )x1x2rJ   c           	      C   s   |  ddd}tj|tjd}| ddd}tj|tjd}t| d||gd}t|||gd}||j}|	d
 S )Nr#   rL   Tmemory_formatr   )powr   r^   	ones_likecontiguous_formatr   r   matmulmT	clamp_minsqrt)	r   r   Zx1_normZx1_padZx2_normZx2_padZx1_Zx2_r   r*   r*   r+   _euclidean_dist  s    r  )rb   input_sizesrI   startendstepc                 C   s   |  |}t|| ||||S r5   )	new_zerosr^   Zslice_scatter)rb   r  rI   r  r  r  rQ   r*   r*   r+   slice_backward  s    

r	  r"   )rk   rI   r  r  r  c                 C   sB  |   }|dkrtdt|   |}t|  }t|  }|dkrPtd|d k	r\|nd}|d k	rl|ntj}	|dk r||| 7 }|	dk r|	|| 7 }	|dk rd}n||| kr|| }|	|k r|}	n|	|| kr|| }	| 	 |||   }
|	| }|| d | ||< ||  |9  < | j
r0tdn| |||
S d S )Nr   z,slice() cannot be applied to a 0-dim tensor.zslice step must be positiver"   z<Slice decomposition for quantized tensors aren't implemented)rI   RuntimeErrorr?   r   listr   stridesysmaxsizestorage_offsetZis_quantizedNotImplementedErrorZ
as_strided)rk   rI   r  r  r  ndimsizesstridesZ	start_valZend_valr  lenr*   r*   r+   slice_forward  s>    	r  )rb   r  rI   indexc                 C   s   |  |}t|| ||S r5   )r  r^   select_scatter)rb   r  rI   r  rQ   r*   r*   r+   select_backward  s    
r  )rb   r  offsetdim1dim2c                 C   s   |  |}t|| |||S r5   )r  r^   Zdiagonal_scatter)rb   r  r  r  r  rQ   r*   r*   r+   diagonal_backward  s    
r  rb   rQ   input_dtypec                 C   s   | j |kr||}|S r5   )r   r7   r  r*   r*   r+   _cast_grad_to_input_dtype  s    

r  )rb   r   rI   r  c                 C   s0   | | }||t j||dd  }t| || S NTrI   keepdim)r^   r   r  
contiguous)rb   r   rI   r  Znew_grad_outputrQ   r*   r*   r+   _softmax_backward_data#  s      
r$  c                 C   s*   | t |t j| |dd  }t| ||S r   )r^   r]   r   r  )rb   r   rI   r  rQ   r*   r*   r+   _log_softmax_backward_data5  s      
r%  c           
      C   sZ   | |d  ||d   }t tjtj|d}|d||d}|d|| |d}	||	 S )z/Utility function to implement im2col and col2imr#   r"   r   devicer   rL   )r   r^   arangeint64rN   )
Zinput_dZkernel_dZ
dilation_dZ	padding_dZstride_dr'  Zblocks_dZ	arange_kwZblocks_d_indicesZkernel_gridr*   r*   r+    _im2col_col2im_indices_along_dimA  s
    r*  )r   kernel_sizedilationpaddingr  rJ   c              	      s(  t tdkdd  t t dkdd  t tdkdd  t tdkdd  ddd	}|d
 | d | ddd |d | jt}t |dkotdd dd  D fdd tdd tdd   D t tdd D  fdd |dk}|s@| d} | j\}}	}
}\}}\}} \}}\}}t|
||||| j	}t|||||| j	}t
| ||||f}|dd}|d d d d ||f }|dddddd}|d}|d}|||	| | || }|s$|d}|S ) Nr#   c                   S   s   dS )Nz"im2col(): only 2D kernel supportedr*   r*   r*   r*   r+   rp   ]  rq   zim2col.<locals>.<lambda>c                   S   s   dS )Nz$im2col(): only 2D dilation supportedr*   r*   r*   r*   r+   rp   ^  rq   c                   S   s   dS )Nz#im2col(): only 2D padding supportedr*   r*   r*   r*   r+   rp   _  rq   c                   S   s   dS )Nz"im2col(): only 2D stride supportedr*   r*   r*   r*   r+   rp   `  rq   Tc                 S   s<   |rt dd | D nt dd | D }t|dd  d S )Nc                 s   s   | ]}|d kV  qdS r   Nr*   r1   r   r*   r*   r+   	<genexpr>c  s     z1im2col.<locals>.check_positive.<locals>.<genexpr>c                 s   s   | ]}|d kV  qdS r.  r*   r/  r*   r*   r+   r0  c  s     c                   S   s   dS )Nz<{param_name} should be greater {'than' zero, but got {param}r*   r*   r*   r*   r+   rp   e  rq   z0im2col.<locals>.check_positive.<locals>.<lambda>allr^   rr   param
param_namestrictcondr*   r*   r+   check_positiveb  s
    ( zim2col.<locals>.check_positiver+  r,  r-  Fr6  r  rw   r   c                 s   s   | ]}|d kV  qdS r.  r*   r1   dr*   r*   r+   r0  p  s     zim2col.<locals>.<genexpr>r   c                      s   dt   S )NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tupler*   r   r*   r+   rp   q  rq   c                 s   s<   | ]4\}}}}}d |d|  ||d    d  |  V  qdS )r"   r#   Nr*   r1   r   padZdilZkerstr*   r*   r+   r0  t  s   r   c                 s   s   | ]}|d kV  qdS r.  r*   )r1   cr*   r*   r+   r0  {  s     c                      s6   dt dd   d d  d d d dS )	Nz!Given an input with spacial size r   , kernel_size=, dilation=
, padding=	, stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.r=  r*   r,  r+  output_sizer-  r   r  r*   r+   rp   |  rq   r   r   rL   r"   rw      )T)r^   rr   r  r   r2  r>  ziprN   r*  r'  r   rA  permuter   r   squeeze)r   r+  r,  r-  r  r8  r  batched_inputZ	batch_dimr   Zinput_hZinput_wstride_hstride_w	padding_h	padding_w
dilation_h
dilation_wkernel_hkernel_wZblocks_row_indicesZblocks_col_indicesZpadded_inputr   Znum_blocks_rowZnum_blocks_colr*   rH  r+   im2colT  s    	



 

    
          

 
 
rW  )r   rI  r+  r,  r-  r  rJ   c              
      s  t tdkdd  t tdkdd  t tdkdd  t tdkdd  t tdkdd  d$d	d
}|d |d |ddd |d |d | jt}t |dkotdd dd  D fdd d d  }t d | dkfdd dd tD }	|	d |	d   t d  k fdd t  dk fdd |dk}
|
s| d} | j\}}\}}\}}\}}\}}| d d | g |	 } | dddd dd!} t	|||||| j
}t|d }t	|||||| j
}d"d tD }| d d t g| }d d ||f}tj||| dd#}t|| | | | f}|
s|d}|S )%Nr#   c                   S   s   dS )Nzonly 2D output_size supportedr*   r*   r*   r*   r+   rp     rq   zcol2im.<locals>.<lambda>c                   S   s   dS )Nzonly 2D kernel supportedr*   r*   r*   r*   r+   rp     rq   c                   S   s   dS )Nzonly 2D dilation supportedr*   r*   r*   r*   r+   rp     rq   c                   S   s   dS )Nzonly 2D padding supportedr*   r*   r*   r*   r+   rp     rq   c                   S   s   dS )Nzonly 2D stride supportedr*   r*   r*   r*   r+   rp     rq   Tc                 S   s<   |rt dd | D nt dd | D }t|dd  d S )Nc                 s   s   | ]}|d kV  qdS r.  r*   r/  r*   r*   r+   r0    s     z1col2im.<locals>.check_positive.<locals>.<genexpr>c                 s   s   | ]}|d kV  qdS r.  r*   r/  r*   r*   r+   r0    s     c                   S   s   dS )Nz9{param_name} should be greater than zero, but got {param}r*   r*   r*   r*   r+   rp     rq   z0col2im.<locals>.check_positive.<locals>.<lambda>r1  r3  r*   r*   r+   r8    s
    ( zcol2im.<locals>.check_positiver+  r,  r-  Fr9  r  rI  )r#   rw   c                 s   s   | ]}|d kV  qdS r.  r*   r;  r*   r*   r+   r0    s     zcol2im.<locals>.<genexpr>r   c                      s   dt   S )NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: r=  r*   r?  r*   r+   rp     rq   r   r"   c                      s   dd  d  S )Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = r   z and kernel_size=r*   r*   )r+  r   r*   r+   rp     rq   c                 S   s:   g | ]2\}}}}}d |d|  ||d    d  |  qS r"   r#   r*   r@  r*   r*   r+   r3     s   zcol2im.<locals>.<listcomp>rL   c                      s4   d d d d d d  dd  d	S 
NzGiven output_size=rD  rE  rF  rG  z , expected input.size(-1) to be 	 but got rL   .r*   r*   Lr,  r+  rI  r-  r   r  r*   r+   rp     rq   c                      s4   d d d d d d  dd  d	S rY  r*   r*   r\  r*   r+   rp     rq   rw   r   rJ  c                 S   s   g | ]\}}|d |  qS )r#   r*   )r1   or   r*   r*   r+   r3     s     
accumulate)T)r^   rr   r  r   r2  rK  rN   r   rL  r*  r'  rP   r  prodrs   _unsafe_index_putr   rA  rM  )r   rI  r+  r,  r-  r  r8  r  Zprod_kernel_sizecolrN  Zout_hZout_wrO  rP  rQ  rR  rS  rT  rU  rV  Zindices_rowZindices_colZoutput_padded_sizer   idxr*   r\  r+   col2im  s    




 
    

"     
     
re  )rb   maskrd   c                 C   s$   | | | |  jt| d}|S Nr   )type_ascloner?   r   )rb   rf  rd   rC   r*   r*   r+   native_dropout_backward	  s    	rj  )r   
input_size	dimensionr   r  rJ   c           	      C   s   t |dkrt| dS tt ||}tj|| | jtjd}|d||	 }| 
d|d 	||d } | |}d| |f }tj||| dd S )Nr   r'  r   rL   r"   r5   Tr_  )r  r^   Zsqueeze_copyr?   r   r(  r'  int32ZunfoldflattenZmovedimr  rs   rb  r#  )	r   rk  rl  r   r  rI   rd  rQ   r  r*   r*   r+   unfold_backward  s    
rp  )rb   rk   epsrJ   c              	   C   sx   |d k	r>|}d| }t t ||k||k| |d|   dS t t |dk|dk| |d|   |dtdS d S )Nr\   r   r*   nan)r^   r_   logical_andr   float)rb   rk   rq  lohir*   r*   r+   logit_backward+  s    rw  r   r   trainc                 C   s*   |r|dkrt | ||d S |  S d S rh   )rs   native_dropoutri  rx  r*   r*   r+   dropout@  s    r{  out0out1c                 C   s   |rl|dkrl|dkr.t | t j| t jdfS | jjs>tdt | |k}||  tdd|   }||fS | t j| t jdfS d S )Nr   r"   r   z?result type Float can't be cast to the desired output type Longr\   )	r^   r   boolr   is_floating_pointr
  	rand_likert  r   )r   r   ry  Z	bool_maskresr*   r*   r+   rz  J  s    rz  )r2   rI   half_to_floatc                 C   s   |   } |r| jtjksttj| tjjd\}}| 	|} | 
 dkrTt| }ntj| |dd}t| | }|tj||dd }|s|	|}|S Nr4   r   Tr"  )r#  r   r^   halfr   r?   r@   ELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTr7   r   r]   amaxr   )r2   rI   r  r:   r<   Zunnormalizedx_maxr   r*   r*   r+   _softmax[  s      


r  c           	      C   s   |   } |r| jtjksttj| tjjd\}}| 	|} | 
 dkrN| }ntj| |dd}| | }ttjt||dd}|| }|s|	|}|S r  )r#  r   r^   r  r   r?   r@   r  r  r7   r   r  r   r   r]   )	r2   rI   r  r:   r<   Zshiftedr  Zshifted_logsumexpr   r*   r*   r+   _log_softmaxr  s"     


r  rL   )r   indicespadding_idxscale_grad_by_freqsparserJ   c                 C   sN   |   dkstd|jdkrB| d|}|jdkr>|d}|S | | S d S )Nr#   z'weight' must be 2-Dr"   r   )rI   r   r  Zindex_selectrM  )r   r  r  r  r  r   r*   r*   r+   	embedding  s    	


r  )rb   r  num_weightsr  r  c                 C   s   t j| t jjd\}}| |} t|tj}|rp||f}t	|}t
j||g|dd}|| }	| |	d } t||k| j}
| |
d}| |f| j|jd   }t
j||g|dd|S )Nr  Tr_  rL   r   )r?   r@   r  r  r7   r   r^   longr  r   rs   rb  rN   rP   r  masked_fillr   )rb   r  r  r  r  r:   r<   countsonesZgrad_weights_scalerf  r   grad_weightr*   r*   r+   embedding_dense_backward  s(    	 


r  r8   c                 C   s   d}| D ]}||9 }q|S rU   r*   )r2   rC   ir*   r*   r+   ra    s    
ra  )tensorsrI   
num_chunksrJ   c           	      C   s   g }| D ]}|  }|| | d | | }||| krndgd |j| d  d|||  g }t||d}|d | t|dg }||| q|S )Nr"   r   r#   rL   )r   r  rs   constant_pad_ndr^   Sizeappendview)	r  rI   r  padded_tensorstensortensor_sizeZpad_along_dimrA  Z	view_sizer*   r*   r+   
_pad_chunk  s    
r  )r  c                 C   s(   | d j }| D ]}|j |kr dS qdS )Nr   FTr  )r  r  r  r*   r*   r+   have_same_ndims  s
    

r  )r  rI   c                 C   sB   | d   d | }| D ]$}t|  d | |kdd  qd S )Nr   c                   S   s   dS )NzG_chunk_cat expects same sizes of 0,...,dim-1 dimensions for all tensorsr*   r*   r*   r*   r+   rp     rq   z+leading_dimension_matches.<locals>.<lambda>)r   r^   rr   )r  rI   Zleading_dim_sizesr  r*   r*   r+   leading_dimension_matches  s    r  )r  rI   r  c                 C   s   t |dkdd  t t| dkdd  | d j}| d j}| D ]H}t | dkdd  t |j|kdd  t |j|kdd  qDt| rt| d 	 |}n4t |dkd	d  | D ]}t ||j
k d
d  qt| | |S )Nr"   c                   S   s   dS )Nz&_chunk_cat expects positive num_chunksr*   r*   r*   r*   r+   rp     rq   z._preprocess_chunk_cat_inputs.<locals>.<lambda>r   c                   S   s   dS )Nz0_chunk_cat expects a non-empty input tensor listr*   r*   r*   r*   r+   rp     rq   c                   S   s   dS )Nz#_chunk_cat expects non-empty tensorr*   r*   r*   r*   r+   rp     rq   c                   S   s   dS )Nz8_chunk_cat expects all input tensors with the same dtyper*   r*   r*   r*   r+   rp     rq   c                   S   s   dS )Nz8_chunk_cat expects all inputs tensors on the same devicer*   r*   r*   r*   r+   rp     rq   c                   S   s   dS )NzK_chunk_cat expects non-negative dim when input tensors have different ndimsr*   r*   r*   r*   r+   rp     rq   c                   S   s   dS )Nz3_chunk_cat expects dim < ndim for all input tensorsr*   r*   r*   r*   r+   rp   
  rq   )r^   rr   r  r   r'  r   r  r?   r   rI   r  r  )r  rI   r  Zexpected_dtypeZexpected_devicer  r*   r*   r+   _preprocess_chunk_cat_inputs  s<    
 


r  )r  rI   r  r   rJ   c                 C   sL   t | ||}t| ||}|d kr0t||d S tj||d |d |S d S )Nr"   )r   )r  r  r^   r   )r  rI   r  r   r  r*   r*   r+   
_chunk_cat  s    r  )rk   split_sizesrI   rJ   c           	         s   t tD ]}t| dd  qtttj  k fdd t}g }d}ddlm	} t |D ]>}| }||| j  k |
 || ||7 }qr|S )Nc                   S   s   dS )NzCsplit_with_sizes expects split_sizes have only non-negative entriesr*   r*   r*   r*   r+   rp   )  rq   z"split_with_sizes.<locals>.<lambda>c                      s   dt  dj   S )NzSplit sizes add up to z but got the tensor's size of )r   r   r*   rI   rk   r  r*   r+   rp   .  rq   r   )expect_true)rM   r  r^   Z_check_is_sizeZ_check_with
ValueErrorr   r   %torch.fx.experimental.symbolic_shapesr  r  r   )	rk   r  rI   r  Z
num_splitssplitsZ	start_idxr  lengthr*   r  r+   split_with_sizes   s(    
r  )rk   r  rI   r   rJ   c                 C   sZ   t | ||d}|d kr$dd |D S t||D ]"\}}t||j t||dd q.d S d S )Nro   c                 S   s   g | ]}|j tjd qS )r   )ri  r^   r   )r1   sr*   r*   r+   r3   M  s     z)split_with_sizes_copy.<locals>.<listcomp>Tr   )r  rK  r   r   r   )rk   r  rI   r   r  r   splitr*   r*   r+   split_with_sizes_copyB  s    	r  .)r   
split_sizerI   rJ   c                 C   s   t j| ||S r5   )rs   r  r   )r   r  rI   r*   r*   r+   unsafe_splitU  s    r  )r   r  rI   rJ   c                 C   s   t j| ||S r5   )rs   r  default)r   r  rI   r*   r*   r+   unsafe_split_with_sizesZ  s    r  )rk   r  rI   rJ   c                    s   | j }|| } dkr(|dks"t| fS |  d   }ddlm} ||} fddt|D }  | |  |d< t| ||S )Nr   r"   )	guard_intc                    s   g | ]} qS r*   r*   r1   r  r  r*   r+   r3   n  s     zsplit.<locals>.<listcomp>rL   )r   r   r  r  rM   r^   r  )rk   r  rI   r  dim_sizechunksr  r  r*   r  r+   r  a  s    r  )rk   tensor_indices_or_sectionsrI   rJ   c                    s   |j jdkst|jtjks t|  t dkp: dk fdd  dkrr| }t	|t
sft| ||S dd |D }| ||S d S )Ncpur"   r   c                      s   d  dS )Nz{tensor_split expected tensor_indices_or_sections to be a zero-dimensional or one-dimensional tensor, but got a tensor with z dimsr*   r*   Z	split_dimr*   r+   rp     rq   zAtensor_split_tensor_indices_or_sections_py_impl.<locals>.<lambda>c                 S   s   g | ]}|  qS r*   )itemr  r*   r*   r+   r3     s     zCtensor_split_tensor_indices_or_sections_py_impl.<locals>.<listcomp>)r'  typer   r   r^   r)  rI   rr   r  r/   r   tensor_split)rk   r  rI   sectionsr  r*   r  r+   /tensor_split_tensor_indices_or_sections_py_impls  s    

r  )rk   mat1mat2rY   rc   c                 C   sH   |   s |  s t|}t|}|t|| }|dkr<|S |||   S rh   )r  
is_complexintr^   mm)rk   r  r  rY   rc   r   r*   r*   r+   addmm  s    r  )rk   r  r  rY   rc   use_geluc                 C   s<   t | ||||}|r2| jr(tj|ddS t|S t|S )Nr   )r   )r  is_cudars   gelurelu)rk   r  r  rY   rc   r  r   r*   r*   r+   _addmm_activation  s    
r  )rk   r  vecrY   rc   c                 C   sH   |   s |  s t|}t|}|t|| }|dkr<|S |||   S rh   )r  r  r  r^   mv)rk   r  r  rY   rc   r   r*   r*   r+   addmv  s    r  )rb   r   r   rstdgammaNCHxWgroupoutput_maskrJ   c
              	      s  t j| ||dd t j|| dd t j|dd t|    k fdd tjfkfdd td kp  k fdd t \}
}t|dk fdd t| |	 j
d	gd
}| 	 j
d	gd
}d }d }d }|	d rvd|
  }d k	rt|d|

d	}t|d|

d	}t|dd|
}nL||

d	}||

d	}t|dtjd|
f|jd}| | | | | | }|  || |  }|d}t|d}t|d}t| |
|t||
| | }||j|j}|	d r|	|
|	|
d  |d j
dgd
 }|	d	 r|j
dgd
}|||fS )NF)Zallow_cpu_scalar_tensorsc                      s   d    dS )NzExpect input to have z	 elementsr*   r*   )r  r  r  r*   r+   rp     rq   z,native_group_norm_backward.<locals>.<lambda>c                      s   d  d dj  S )NzExpect mean to have shape (, z
, but got r?  r*   )r  r  r   r*   r+   rp     rq   c                      s    d  dd k	r  nd S )NzExpect gamma to have z elements but got rL   )r   r*   )r  r  r*   r+   rp     rq   r   c                      s   d  d S )NzExpect number of channels z, to be evenly-divisible by number of groups r*   r*   )r  r  r*   r+   rp     rq   r#   ro   r\   rL   r"   r'  r   )r?   Zcheck_same_deviceZcheck_same_shaper^   rr   r   r   divmodr   r  r   rN   r   r  r'  rP   r7   r   )rb   r   r   r  r  r  r  r  r  r  ZcpgZ_remZdsdbd_inputZd_gammad_biasr  Zds_valZdb_valc1c2c3r*   )r  r  r  r  r  r   r+   native_group_norm_backward  s         

""



$
r  )rb   r   r   r  r  r  r  r  r  r  r|  r}  out2rJ   c
                C   sd   t | |||||||||	
}|
||f}t|D ]2\}}|d k	r,t|| |j t||| dd q,|S r   )r  	enumerater   r   r   )rb   r   r   r  r  r  r  r  r  r  r|  r}  r  r   rQ   r  rC   r*   r*   r+   native_group_norm_backward_out$  s$             
r  )r2   rJ   c                 C   s   | d k	r|  |S | S r5   r7   )r2   r   r*   r*   r+   _maybe_castA  s    
r  )	grad_outr   normalized_shaper   r  r   biasr  rJ   c           !         sX  |j }| }	t|j  fdd| |||fD \}
}}}|
d k	sHt|	t| }||d  }|d | }g }g }t|	D ]"}||kr|| q||| q|t	|}t	|}|dks|dkr|d r|
|nd |d r|
||d  nd |d r|
||d  nd fS t|| }t|| }|| | }|d k	rP|
| }n|
}|| }t||d}t||}t||d}t||}|| | }d }d }d } |d r|| | }|d r|d k	rt|dkrt|
| |d}n|
| }|d r6|d k	r6t|dkr.t|
|d} n|
 } t||jt||jt| |jfS )Nc                 3   s(   | ] }|d k	r|   n|V  qd S r5   )r7   r#  r0   r9   r*   r+   r0  V  s   z-native_layer_norm_backward.<locals>.<genexpr>r   r"   r#   TF)r   rI   r?   get_computation_dtyper   r   r  rM   r  ra  r  rP   r^   r   r   ri  r  )!r  r   r  r   r  r   r  r  input_shapeZ
input_ndimgrad_out_cast
input_castweight_castZ	bias_castaxisZ
inner_dimsZ
outer_dimsZinner_dim_indicesZouter_dim_indicesr  r  MZx_hatZ
grad_x_hatabr  r  r  rE   r  Zd_weightr  r*   r9   r+   native_layer_norm_backwardH  sh    







r  )r  r   r  r   r  r   r  r  r|  r}  r  rJ   c             	   C   s`   t | |||||||}||	|
f}t|D ]2\}}|d k	r(t|| |j t||| dd q(|S r   )r  r  r   r   r   )r  r   r  r   r  r   r  r  r|  r}  r  r   rQ   r  rC   r*   r*   r+   native_layer_norm_backward_out  s            
r  )
r   r   r  running_meanrunning_varr   momentumrq  
functionalrJ   c	                 C   sb  dgt td|   }	t| j}
|}|}|rt| j}
| j|
d}tj||	ddd\}}t	|| }| | | }t
||	}t
||	}|d k	r|| d| |  }|s|| |d k	r|  | jd  }t
||	}|||d   }|| d| |  }|s|| n|d k	r.|d k	s2t|j|
dd}|}|j|
dd}|}|}dt||  }| jjdkr|}|}n| d	}| d	}t||  d }t||  d }| | | }|d k	r| }t||  d }|| }|d k	r | }t||  d }|| }| jjdkrJ|j| jd}|j| jd}|j| jd||||fS )
Nr   r#   r   T)rI   Z
correctionr"  r"   )r   rt   r  r   )r  rM   rI   r?   r  r   r7   r^   Zvar_meanrsqrtrM  r   r   r   r   r  r'  r  r  rP   ro  )r   r   r  r  r  r   r  rq  r  Zreduction_dimsr:   new_running_meannew_running_varZ	input_accZ
biased_varr   r  r   	save_mean	save_rstdnZsqueezed_varZunbiased_varinvstdr*   r*   r+   native_batch_norm_helper  sx       






r  r   r  save_invstd)	r   r   r  r  r  r   r  rq  rJ   c              
   C   s,   t | |||||||d	\}}	}
}}||	|
fS NFr  r   r   r  r  r  r   r  rq  r   r  r   rO   r*   r*   r+   native_batch_norm  s            r  c              
   C   sz   |d kr$|d kr$t | |||||S |d kr4td|d krDtd|r`t | |||||||S t | ||||||S d S )Nz`running_mean is None, but running_var is provided. They should both be None or both be provided.z`running_var is None, but running_mean is provided. They should both be None or both be provided.)rs   _native_batch_norm_legitr
  $_native_batch_norm_legit_no_training)r   r   r  r  r  r   r  rq  r*   r*   r+   native_batch_norm_decomposition   sJ                      r  )rJ   c                    s|   |  |}|| d |   dkrh|dkrh fdd|D }  | |  ||d < tjjj| ||S tjjj|  |S )Nr"   r   c                    s   g | ]} qS r*   r*   r   r  r*   r+   r3   K  s     z(unsafe_chunk_py_impl.<locals>.<listcomp>)r   r^   opsrs   r  r  r  r   )r  r  rI   r  r  r*   r  r+   unsafe_chunk_py_implE  s    
r  )r   r   r  r  r  r  rq  rJ   c              
   C   s   t j| ||||d||S r  )rs   r	  r  )r   r   r  r  r  r  rq  r*   r*   r+   r
  Q  s    
r
  c              
   C   s,   t | |||||||d	\}}	}
}}||	|
fS r  r  r  r*   r*   r+   r	  g  s            r	  )r   r   r  r   r  rq  rJ   c           
   
   C   s,   t | ||d d |||d	\}}}}	}	|||fS r  r  )
r   r   r  r   r  rq  r   r  r   rO   r*   r*   r+   !_native_batch_norm_legit_no_statsx  s    	        r  c              
   C   sP   t | |||||||d	\}}	}
}}|d k	s2td|d k	sBtd||	|
||fS )NT#new_running_mean should not be None"new_running_var should not be None)r  r   )r   r   r  r  r  r   r  rq  r   r  r   r  r  r*   r*   r+   #_native_batch_norm_legit_functional  s(            r  )r   r   r  r  r  rq  r   rJ   c           	   	   C   sP   t j| ||||d|}d}|t jjjkr8t j| |}t j|t j| j| j	dS )a  
    Return a reserve tensor for batch norm, used only by cudnn to pass forward state to the
    backward pass. This is needed for `_batch_norm_with_update` and `_batch_norm_no_update`,
    which support a variety of backends including cudnn. We create this tensor here to get
    the correct shape in the traced graph if we detect that will call the cudnn kernel,
    and rely on DCE to avoid materializing this tensor.
    Tr   )r   layoutr'  )
r^   _CZ_select_batch_norm_backendZ_BatchNormBackendZCudnnZ(_get_cudnn_batch_norm_reserve_space_sizeemptyuint8r  r'  )	r   r   r  r  r  rq  r   backendZreserve_sizer*   r*   r+   _get_batch_norm_reserve_tensor  s$             r  c              
   C   sD   t | ||||d||d	\}}}	}
}
t| |||||dd}|||	|fS )NTFr   r  r  r   r   r  r  r  r  rq  r   r  r   rO   reserver*   r*   r+   _batch_norm_with_update  s*    
      r  c              
   C   sh   t | ||||d||d	\}}}	}
}t| |||||dd}|
d k	sHtd|d k	sXtd|||	||
|fS )NTr  r  r  )r  r  r   )r   r   r  r  r  r  rq  r   r  r   Znew_rmZnew_rvr  r*   r*   r+   "_batch_norm_with_update_functional  s:                  r  c              
   C   sD   t | ||||d||d	\}}}	}
}
t| |||||dd}|||	|fS )NFr  r  r  r*   r*   r+   _batch_norm_no_update  s*    
      r  c                 C   sB   |d kst t| |k jtjd}|| |  d|  }||fS )Nr   r\   )r   r^   r  r7   r  rh  )r   r   r   rf  r  r*   r*   r+   _fused_dropout_decomposition  s    r  )r   r  r'  
pin_memorynon_blockingr   )r2   r   r'  r   r!  r   c                C   s   |r|t jkstd|r"td|d krB|d krB|d krB|  S d}|d k	r|| jkr|d k	r||jdkr|t j| |} d}t j| |} |d k	r|st j| |} d}|d k	rt j| |dS | S )NTODOFr  Tr   )	r^   stridedr   ri  r'  r  _primsZconvert_element_typeZ
device_put)r2   r   r  r'  r   r!  r   Zdtype_convertedr*   r*   r+   _to_copy  s     r%  c                 C   s
   t | S r5   )rs   aliasr8   r*   r*   r+   nop_decomposition;  s    r'  r  Zout3)r   r   r  r  r  r   exponential_average_factorepsilonc              
   C   s^   t | |||||||\}}	}
|r:||	|
| jdtjdfS ||d|d| jdtjdfS )Nr  r   )rs   r  r  r^   r  )r   r   r  r  r  r   r(  r)  r  r  rC  r*   r*   r+   cudnn_batch_normC  s"    
r*  c                 C   s@   t |D ]2\}}|dkr|| jk r0| j| |ks| |} q| S rU   )r  r  r   rN   )r2   broadcast_maskr  rf  r*   r*   r+   _broadcast_batch_norm_backwarde  s     r,  )r  r   r   r  r  r  r  ry  rq  r  r  rJ   c                 C   s   t | |||||||||	
S r5   )native_batch_norm_backward)r  r   r   r  r  r  r  ry  rq  r  r  r*   r*   r+   batch_norm_backwardl  s    r.  )r  r   r   r  r  r  r  ry  rq  r  rJ   c
           &         s  |j }
|d k	r|j }n|
}t|j   fdd| ||||||fD \}}}}}}}|j}| }|dksrtdd}tt|||  }|}|}|r|d k	r|d k	stn&|d k	r|d k	st|}t	|| }dg| }|| ||< g }t
|D ]}||kr|| qt||}d| }t||}t|||  |}t|| |}tt|| || |} |d krt||d }!nt|| |}!|r|| |  }"||" | |! }#n||! }#|	d r|| }$nd }$|	d r|}%nd }%|#|
t|$|t|%|fS )Nc                 3   s$   | ]}|d k	r|  n|V  qd S r5   r  r0   r9   r*   r+   r0    s   z-native_batch_norm_backward.<locals>.<genexpr>r#   z$rank of the input must be at least 2r"   r\   )r   r?   r  r   rI   r   ra  r  r^   r  rM   r  r,  r   r   r7   r  )&r  r   r   r  r  r  r  ry  rq  r  r  Zweight_dtyper  r  r  Zrunning_mean_castZrunning_var_castZsave_mean_castZsave_invstd_castr  Z
input_rankr  Znum_featuresr   r  r+  Zreduction_axesr  r   Zgrad_output_sumZdot_pZ	grad_meanZ
proj_scaleZ
grad_scaleZprojrQ   r  Z	grad_biasr*   r9   r+   r-    s    	



 


r-  )r  r   r   r  r  r  r  ry  rq  r  r|  r}  r  rJ   c
                C   sd   t | |||||||||	
}|
||f}t|D ]2\}}|d k	r,t|| |j t||| dd q,|S r   )r-  r  r   r   r   )r  r   r   r  r  r  r  ry  rq  r  r|  r}  r  r   rQ   r  rC   r*   r*   r+   native_batch_norm_backward_out  s$    
r/  r   rb   r   r  r  r  save_varr)  c                 C   s"   t || |||||d|dddg
S NTrs   r-  r0  r*   r*   r+   miopen_batch_norm_backward	  s    r4  	r   rb   r   r  r  r  r1  r)  ZreserveSpacec	           	      C   s"   t || |||||d|dddg
S r2  r3  r5  r*   r*   r+   cudnn_batch_norm_backward-	  s    r6  )r   rI  c                    s  | j  | jttdkfdd | jdd  D ]}t|dkfdd q:d |d  dkrԈd |d  dkrtdd	 tdd  |D }td
d	 tdd  ||D }tjj	| ||S dd dd  fdd}|d |d \}}}}	|d |d \}
}}}| dt
|d|
f }|	sV|sVtj|ddS dd }|||||	dd\}}|||||dd\}}d }tt|jd t|jd D ]B\}}|d kr|d|d d |f }n||d|d d |f  }q|||  S )Nr:  c                      s
   d  S )Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r*   r*   r  r*   r+   rp   R	  rq   z%adaptive_avg_pool2d.<locals>.<lambda>r   r   c                      s   dt   dS )Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape r[  r=  r*   r?  r*   r+   rp   W	  rq   rL   c                 s   s   | ]\}}|| V  qd S r5   r*   )r1   r  r^  r*   r*   r+   r0  ]	  s     z&adaptive_avg_pool2d.<locals>.<genexpr>c                 s   s$   | ]\}}}||d  |  V  qdS )r"   Nr*   )r1   r  r^  r  r*   r*   r+   r0  ^	  s    c                 S   s   t j| | |ddS )NtruncZrounding_moder^   divr  r  rC  r*   r*   r+   start_indexc	  s    z(adaptive_avg_pool2d.<locals>.start_indexc                 S   s    t j| d | | d |ddS )Nr"   r7  r8  r9  r;  r*   r*   r+   	end_indexf	  s    z&adaptive_avg_pool2d.<locals>.end_indexc                    s   t j| t jd}||| }| | d }| | }|dkpD|| dk }|rV|d7 }n|dkrf|d8 }t j| t jd}|d| }|rt j| d |j|jd}	t ||	}||| }
|
| }n|}||||fS )Nrm  r"   r   rL   r&  )r^   r(  r)  rN   Zscalar_tensorr   r'  minimum)in_sizeout_sizeZorangeZi0Z	maxlengthZin_size_modadaptive	range_maxrd  maxvali1r  )r'  r=  r<  r*   r+   compute_idxi	  s,    
  
z(adaptive_avg_pool2d.<locals>.compute_idx.r   )r   rL   ro   c                 S   sd   t |tr| |fS |dk st||dk}|dkr>t|d}t| |d} t|| }| |fS d S )Nr   rL   r   r   r   )r/   r   r   rN   rP   r^   r  )valsr  rB  rA  rI   rf  r*   r*   r+   
maybe_mask	  s    

z'adaptive_avg_pool2d.<locals>.maybe_mask)rA  rI   r   )r'  r   r  r^   rr   r>  rK  nnr  Z
avg_pool2drP   r   r   rM   )r   rI  r<  r  ZkernelrE  ZidxhZlength_hZrange_max_hZ
adaptive_hZidxwZlength_wZrange_max_wZ
adaptive_wrF  rG  retr  jr*   )r'  r=  r  r   r<  r+   adaptive_avg_pool2dH	  s^    

(      
    
&
rK  )rc   r2   rI   r  r  rc   c                C   s   t | |||d|dS )NTinplacerc   
_index_addrL  r*   r*   r+   
index_add_	  s    	rQ  c                C   s   t | |||d|dS )NFrM  rO  rL  r*   r*   r+   	index_add	  s    
rR  )r2   rI   r  r  rN  rc   c                   s*  t | jtjdkfdd jdkr<dnd|jdkrT|ndtkfdd  dkrt | jttkpt 	t
  fdd |  }| jdk}|r| dn| }d f }|rtjntj}	|	|||dd	}
|r| S |r|
dS |
 S d S )
Nr"   c                      s   d j  dS Nz(Index should have dimension 1 or 0 (got r   r  r*   r  r*   r+   rp   	  rq   z_index_add.<locals>.<lambda>r   c                      s   d d d S )NzNumber of indices (z') should be equal to tensor.size(dim) (z), for dim=r*   r*   )rI   
index_sizer  r*   r+   rp   	  rq   c                      s   dt   d dS )Nzalpha argument of type z cannot be safely cast to type !)r  r*   )rc   python_typer*   r+   rp   	  rq   r5   Tr_  )r?   canonicalize_dimsr  r^   rr   r   Zdtype_to_typer   r~  Zis_weakly_lesser_typer  rN   rs   
index_put_	index_putrM  r#  )r2   rI   r  r  rN  rc   zero_dimr   rd  rZ  r   r*   )rc   rI   r  rU  rW  r  r+   rP  	  s6    	

rP  r   c              
   C   s   t t| dkdd  t| }| d  }|dd  }tdd | D }|rX||f}n||f}|| }| d ||}dt| }	t|D ]V}
| |
 }t||	d||d f |}|rtj	||d|
d}qtj	||d|
d}q|S )	Nr   c                   S   s   dS )Nz#received an empty list of sequencesr*   r*   r*   r*   r+   rp   	  rq   zpad_sequence.<locals>.<lambda>r"   c                 s   s   | ]}| d V  qdS r.  r   r0   r*   r*   r+   r0  	  s     zpad_sequence.<locals>.<genexpr>)r   r   rI   r  )
r^   rr   r  r   r|   r   rM   rs   r  r  )	sequencesbatch_firstZpadding_valueZsequences_sizemax_sizeZtrailing_dimsmax_lenZout_dimsr   Zdim_paddingsr  Zcurrseqrowr*   r*   r+   pad_sequence	  s,    
  rc  r2   rI   r  r  c                 C   s   t | |||ddS )NTrN  _index_copyrd  r*   r*   r+   index_copy_
  s    rh  c                 C   s   t | |||ddS )NFre  rf  rd  r*   r*   r+   
index_copy
  s    ri  )r2   rI   r  r  rN  c          
         s   t | j|}t jdk fdd | jdk}|r@| dn| } jdkrX dn  d|  f }|rttjntj}||||}	|r| S |r|		dS |	
 S d S )Nr"   c                      s   d j  dS rS  r  r*   rT  r*   r+   rp   
  rq   z_index_copy.<locals>.<lambda>r   r5   )r?   rX  r  r^   rr   rN   rs   rY  rZ  rM  r#  )
r2   rI   r  r  rN  r[  r   rd  rZ  r   r*   rT  r+   rg  
  s    

rg  r   r   c                 C   sL   t | d| }t t |  }| jr6| d}n|}|t | |fS )Nr*   r  )r^   r>  r  r]   r   r  r   )rk   ry   r`   r   r*   r*   r+   log_sigmoid_forward.
  s    rj  r2   lowhighr   c                 C   s$   t j| jt|t|| j| j|dS )N)rl  rm  r   r'  r   )primsZ_uniform_helperr   r   r   r'  rk  r*   r*   r+   r   ;
  s    r   c                 C   s   |  t| |||S r5   )r   r   )rk   rl  rm  r   r*   r*   r+   uniform_M
  s    ro  c                 C   s   t | d }|d k	rDt|d kdd  tt ||kdd  |S |d k	rt|d kdd  tt ||kdd  g }t|D ]J\}}t||kr|| |d  t|  q|t| |d  |  q|S tddd  d S )	Nr#   c                   S   s   dS Nz9Must specify exactly one of output_size and scale_factorsr*   r*   r*   r*   r+   rp   X
  rq   z.upsample_compute_output_size.<locals>.<lambda>c                   S   s   dS N r*   r*   r*   r*   r+   rp   Z
  rq   c                   S   s   dS rp  r*   r*   r*   r*   r+   rp   `
  rq   c                   S   s   dS rq  r*   r*   r*   r*   r+   rp   b
  rq   Fc                   S   s   dS rp  r*   r*   r*   r*   r+   rp   k
  rq   )r  r^   rr   r  r  r  r   )rk  rI  scale_factorsZspatial_dimensionsr  r  r*   r*   r+   upsample_compute_output_sizeS
  s0     rt  c                 C   s   | d krd S | | S r5   r*   )scalesrd  r*   r*   r+   get_scale_valueo
  s    rv  )r   rI  rs  rJ   c                 C   s2   t |  ||}|r|nd gt| }t| ||S r5   rt  r   r  _upsample_nearestr   rI  rs  osizeru  r*   r*   r+   _upsample_nearest_vecu
  s    r{  c                 C   s6   t |  ||}|r|nd gt| }t| ||ddS NTexactrw  ry  r*   r*   r+   _upsample_nearest_exact_vec
  s    r  c                 C   s   g }t |}|rdnd}t|D ]}|| }| j| |  }	|| d k	rX|	|	||   n|	| }
tj|tj| jd}|| |
 tj}t|d | D ]}|	d}q|
| q |S )Nr   r   r&  r"   rL   )r  rM   r   r^   r(  r   r'  r7   r)  rN   r  )r   rI  ru  r~  r  Znum_spatial_dimsr  r<  rz  isizerd   Zoutput_indicesZinput_indicesrO   r*   r*   r+   !_compute_upsample_nearest_indices
  s    $r  )Zpreserve_memory_formatr   )r   rI  ru  rJ   c                 C   s   t | ||gS r5   rx  r   rI  ru  r*   r*   r+   upsample_nearest1d
  s    	r  c                 C   s   t | ||gddS r|  r  r  r*   r*   r+   upsample_nearest_exact1d
  s    r  )r   rI  scales_hscales_wrJ   c                 C   s   t | |||gS r5   r  r   rI  r  r  r*   r*   r+   upsample_nearest2d
  s    
r  c                 C   s   t | |||gddS r|  r  r  r*   r*   r+   _upsample_nearest_exact2d
  s    r  )r   rI  scales_dr  r  rJ   c                 C   s   t | ||||gS r5   r  r   rI  r  r  r  r*   r*   r+   upsample_nearest3d
  s    r  c                 C   s   t | ||||gddS r|  r  r  r*   r*   r+   _upsample_nearest_exact3d  s       r  )r   rI  ru  r~  rJ   c           	      C   sp   t | |||d}d d g| }t| |}|jdkrlt| }| jd }| jjdkr`|dk r`t	j
}|j|d}|S )Nr}  r   r"   cudar   )r  rs   _unsafe_indexr  r?   r   r   r'  r  r^   r   r#  )	r   rI  ru  r~  Zspatial_indicesr  r   r   
n_channelsr*   r*   r+   rx    s       


rx  c                    sb   |r|rd n|rd n|r"d nd t   dksBtt  fddtdt  D S )NrJ  r   rw   r#   r   c                    s    g | ]}t ||   qS r*   r=  r  Z
group_sizeparamsr*   r+   r3   ;  s    z!gather_params.<locals>.<listcomp>)r  r   rM   )r  
has_biaseshas_projectionsr*   r  r+   gather_params0  s    r  c                 C   sh   |rB| d|  |d|   }}| d| d  |d| d   }}n| | ||  }}d\}}||||fS )Nr#   r"   NNr*   )r  hiddensr  bidirectional
cur_params
cur_hiddenbidir_paramsbidir_hiddenr*   r*   r+   params_hiddens@  s    $r  c                 C   s2   ||kst || d|||  | dd|S rh   )r   r  r   )r  last_batch_size
batch_sizer  r*   r*   r+   update_hidden_for_packedK  s    r  c              	   C   s4   ||kr| S ||k st t| |d||| fS rh   )r   r^   concatr   )r  r  r  Z
inp_hiddenr*   r*   r+    update_hidden_for_packed_reverseQ  s    r  c                 C   s&  |d }|d }|r|d nd }	|r,|d nd }
g }g }|rD|d n|d }| dd|}t| t|}|r||d d d }|D ]Z} | jd }||krn"|rt||||}nt||||}|| |||	||
}|}|| q|r|  n|| |  t	|d}|st	|dn|}||fS )Nr   r"   r#   rw   rL   )
r   r^   r  r  r   r  r  r  reverser   )inphiddenr  r  	hidden_fnbatch_sizesr  	ih_weight	hh_weightih_biashh_biasstep_outputr  r  r  	split_inpr  r   
hidden_outr*   r*   r+   one_layer_rnn_data_  sL    
      

r  c                    s    fdd}|S )Nc                    s    t ||||  S r5   r   linearr  r  r  r  r  r  nonlinearityr*   r+   rE     s    zrnn_cell.<locals>.innerr*   r  rE   r*   r  r+   rnn_cell  s    r  c                    s    fdd}|S )Nc                    s$   t | ||}  t ||||  S r5   r  r  r  r*   r+   rE     s    zrnn_cell_data.<locals>.innerr*   r  r*   r  r+   rnn_cell_data  s    r  c                 C   s   |d }|d }|r|d nd }|r,|d nd }	t | ||}
|rL|
dn|
}
|d}g }|
D ] }|||||||	}|| qb|r|  t|d}||dfS )Nr   r"   r#   rw   )	r   r  fliprN   r  r  r^   r   rM  )r  r  r  r  r  r  r  r  r  r  precomputed_inputr  r  r  r   r*   r*   r+   one_layer_rnn  s    
r  c                 C   s   |d }|d }|r&|d }|d }nt | }t | }|d d}	|d d}
g }d}|	d}d}d}d}d}|  } |	 }	|
 }
t jjj| |||||	|
|||||||||}|d |d |d   }}}||	d|	dffS )Nr   r"   r#   rw   F)
r^   Zzerosr   rN   r#  r  rs   Zmkldnn_rnn_layerr  rM  )r  r  r  r  r  w0w1w2w3hxcxr  modeZhidden_size
num_layersr  r_  ry  outputsrT   hycyr*   r*   r+   mkldnn_one_layer_lstm  sN    


r  c
                 C   s   |r|  ddn| } g }
t|D ]}t||||\}}}}|rN||d k rN|nd}|	| |||\}}|
| |r|	| |||dd\}}|
| |rt||g| d } n|} |dkr |r ||d k r tj| |dd} q |r|  ddn| } | |
fS )Nr   r"   r   T)r  )ry  )	transposerM   r  r  r^   r   rI   r{  )r   r  r  r  r  r{  ry  r  r_  layer_fnfinal_hiddensr  r  r  r  r  Zfwd_inpZ
fwd_hiddenZbwd_inpZ
bwd_hiddenr*   r*   r+   _rnn_helper  s8       
    

r  c	                 C   sR   | d}	t||d}t| |	|||||||ttttjd
\}
}|
t|dfS Nr   Fr  )	unbindr  r  r   r  r  r^   r   stackr   r  r  r  r  r{  ry  r  r_  r  r   r  r*   r*   r+   rnn_tanh_input  s    
r  c	                 C   sR   | d}	t||d}t| |	|||||||ttttjd
\}
}|
t|dfS r  )	r  r  r  r   r  r  r^   r  r  r  r*   r*   r+   rnn_relu_input,  s    
r  c	                 C   sT   | d}	t||d}t| |	||||||dtt|ttjd
\}
}|
t|dfS Nr   Fr  r  )	r  r  r  r   r  r  r^   r  r  datar  r  r  r  r  r{  ry  r  r  r   r  r*   r*   r+   rnn_relu_dataK  s&    
r  c	                 C   sT   | d}	t||d}t| |	||||||dtt|ttjd
\}
}|
t|dfS r  )	r  r  r  r   r  r  r^   r   r  r  r*   r*   r+   rnn_tanh_datan  s&    
r  c                 C   s   t ||||  }|d|}|d  }	|d  }
|d  }|d  }|
| |	|  }||  }|d krv|nt ||d }||fS )Nr   r   r"   r#   rw   r   r  chunkr   r   )r  r  r  r  r  	hr_weight	chunk_dimZgatesZchunked_gatesZin_gateZforget_gateZ	cell_gateZout_gater  r  r*   r*   r+   	lstm_cell  s    r  c              
   C   s   |d }|d }|r|d nd }|r,|d nd }t |dkrD|d nt |dkrX|d nd }	|d d}
|d d}t| ||}|r|dn|}g }|D ](} t| |
||||	dd\}
}||
 q|r|  t	|d}||

d|
dffS )Nr   r"   r#   rw   rJ  r   r  )r  rN   r   r  r  r  r  r  r^   r   rM  )r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r*   r*   r+   one_layer_lstm  s$    *r  c              
   C   s  |d }|d }|r|d nd }|r,|d nd }	t |dkrD|d nt |dkrX|d nd }
g }g }|rp|d n|d }t| t|}|r|d d d }|d }|d }|dd||dd| }}|D ]} | jd }t| ||} ||k r:||d||| |d||| f |dd||dd| }}||krt	||d||| fd}t	||d||| fd}t
| ||||	|
dd\}}|}|| q|r|  ||f}n:|||f |  t| \}}t|dt|df}t|d}||fS )	Nr   r"   r#   rw   rJ  r   rL   r  )r  r^   r  r  r   r   r   r  r  r  r  r  rK  r   )r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  Zorig_hxZorig_cxr  r  r  r  Zhidden0Zhidden1r   r*   r*   r+   one_layer_lstm_data  sd    *  


  
r  c                 C   s    dd }|| ||rt S tS dS )a*  Check whether we could use decompose lstm with mkldnn_rnn_layer.
    All the below conditions need to be met:
        * ``torch._C._get_mkldnn_enabled()`` returns ``True``.
        * All the input args are on CPU.
        * The dtypes of args are either torch.float or torch.bfloat16.
        * Inference.
        * ``has_projections`` returns ``False``.

    Args:
        * input: the input sequence to LSTM
        * hx: a tuple of the input hidden state and cell state ``(h_0, c_0)`` to LSTM
        * params: the weight and bias tensors of LSTM
    c           	      S   s   t j sdS | gt| tt| }dd |D }t|dkrHdS | }|t dkrbdS dd |D }|D ]}|t j	t j
fkrt dS qt| jrdS |d d|d dk}|rdS d	S )
NFc                 S   s   h | ]
}|j qS r*   r  r1   tr*   r*   r+   	<setcomp>  s     zEselect_one_layer_lstm_function.<locals>.use_mkldnn.<locals>.<setcomp>r"   r  c                 S   s   h | ]
}|j qS r*   r   r  r*   r*   r+   r    s     r   r#   T)r^   r  Z_get_mkldnn_enabledr  r   from_iterabler  popr'  rt  Zbfloat16requires_gradr   )	r   r  r  r  Zdevicesr'  Zdtypesr   r  r*   r*   r+   
use_mkldnn  s&    
z2select_one_layer_lstm_function.<locals>.use_mkldnnN)r  r  )r   r  r  r  r*   r*   r+   select_one_layer_lstm_function  s    r  c	                 C   s   t |dkstdt|||d d|d dk}tt|d |d }	t| ||}
t| |	||||||||

\}}tt| }|t	|d dt	|d dfS )Nr#   lstm expects two hidden statesr   r"   )
r  r   r  r   r  rK  r  r  r^   r  )r   r  r  r  r  r{  ry  r  r_  r  r  r   r  r*   r*   r+   	lstm_impl+  s$    $r  c	                 C   s   t |dkstdt|||d d|d dk}tt|d |d }	t| |	||||||dtt|d
\}
}tt| }|
t	
|d dt	
|d dfS )Nr#   r  r   r"   F)r  )r  r   r  r   r  rK  r  r   r  r^   r  r  r*   r*   r+   lstm_data_implM  s"    $
r  c                 C   sr   |  dd}t||| dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nrw   r"   r#   r   )r  r   r  r   r   r  r  r  r  r  r  Zchunked_igatesZchunked_hgatesZ
reset_gateZ
input_gateZnew_gater*   r*   r+   gru_celln  s    r  c                 C   s|   t | ||dd}t |||dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nrw   r"   r   r#   r  r  r*   r*   r+   gru_cell_dataw  s    r  c	                 C   sJ   t ||d}t| |d||||||dtt|td
\}	}
|	t|
dfS )NFr   r  )r  r  r  r   r  r  r^   r  )r  r  r  r  r  r  r{  ry  r  r   r  r*   r*   r+   gru_impl_data  s    r  c	                 C   sH   t ||d}t| |d|||||||tttd
\}	}
|	t|
dfS )NFr   r  )r  r  r  r   r  r  r^   r  )r   r  r  r  r  r{  ry  r  r_  r   r  r*   r*   r+   gru_impl  s    
r  c                 C   s:   t |  ||}t|d}t|d}tjj| ||||S Nr   r"   )rt  r   rv  r^   r  rs   _upsample_bilinear2d_aar   rI  align_cornersrs  rz  scale_hscale_wr*   r*   r+   upsample_bilinear2d_aa_vec  s    

    r  c                 C   s:   t |  ||}t|d}t|d}tjj| ||||S r  )rt  r   rv  r^   r  rs   _upsample_bicubic2d_aar  r*   r*   r+   upsample_bicubic2d_aa_vec  s    

    r  c                 C   s4   t |  ||}|r|nd gt| }t| |||S r5   )rt  r   r  _upsample_linear)r   rI  r  rs  rz  ru  r*   r*   r+   _upsample_linear_vec  s    	r  )r   rI  r  r  rJ   c                 C   s   t | |||gS r5   r  )r   rI  r  r  r*   r*   r+   upsample_linear1d  s    r  )r   rI  r  r  r  rJ   c                 C   s   t | ||||gS r5   r  )r   rI  r  r  r  r*   r*   r+   upsample_bilinear2d  s    r  )r   rI  r  r  r  r  rJ   c                 C   s   t | |||||gS r5   r  )r   rI  r  r  r  r  r*   r*   r+   upsample_trilinear3d  s       r  c                 C   sD   |r |dkr| d |d  S dS |d k	r8|dkr8d| S | | S d S )Nr"   r\   r   r*   )r?  r@  r  rd   r*   r*   r+   _compute_scale  s    r   c                 C   s    |r| | S | |d  d S d S Nr   r*   )rd   Z	dst_indexr  r*   r*   r+   _compute_source_index  s    r  )srcweightsweights_precisionrJ   c                 C   sB   t dd t| |D d|d >  }||? }t|ddtjS )Nc                 s   s*   | ]"\}}| tj| tj V  qd S r5   )r7   r^   rn  )r1   r  rC  r*   r*   r+   r0    s    z%_sum_tensors_uint8.<locals>.<genexpr>r"   r      )_sum_tensorsrK  r^   r~   r7   r  )r  r  r  r   r*   r*   r+   _sum_tensors_uint8  s    
r  )r  rJ   c                 C   sJ   t |  }d}t j||jd}d|d|d >   }|dk}||  S )N   r  r   r"   i   )r^   r  r|   r(  r'  r   )r  Z
max_weightZmax_weight_precisionZ
precisionsvaluesrf  r*   r*   r+   _compute_weight_precision%  s    r  )r   rI  r  ru  rJ   c                    s  j d d \}}j dd  }t|tjtjjd\}fddfddtt|||D }tt| \}	g }
t	ddgg  D ]F d d g fd	dt
D  }t|}t|}|
| qtt
D ]N}|	| |  d
dfddt|
d d d |
dd d D }
qt|
dksLt|
d }t}jjdkr||dk r|tj}t|tjst|j|d} s| }|S )Nr#   r  c           	         s   t | | |}tj|jdjd}t|| jdd}|j|jd fdg|  }|tj	}|d j| d d}|||fS )Nr  r   r   rx   r   r"   r{   )
r   r^   r(  r'  r7   r  r~   r   r   r)  )	inp_sizer@  ru  ZnsqueezeZscale_factorr  Zx_f32r2   Zxp1)r  r   r   r*   r+   
get_values?  s    z$_upsample_linear.<locals>.get_valuesc                    s,   g | ]$\}\}}} |||d  | qS r   r*   )r1   r  r  r@  ru  )r  n_dimsr*   r+   r3   L  s   z$_upsample_linear.<locals>.<listcomp>r   r"   c                    s(   g | ] } | d kr| n| qS r  r*   )r1   k)r  xp1sxsr*   r+   r3   V  s     r   r\   c                    s$   g | ]\}}|t ||   qS r*   )r^   r   )r1   Zv1Zv2)xscaler*   r+   r3   ]  s   r     r   )r   r  r?   r@   r  INT_TO_FLOATr  rK  r  r   rM   rs   r  r   r  reversedr~   r7   r   r   r'  r  r^   r   r/   r   r#  r  round)r   rI  r  ru  Zn_batchr  Z	inp_sizesrO   r
  Zxs_f32vsrd  vr  r   r   r*   )	r  r  r   r  r   r  r  r  r  r+   r  .  sF    

"



r  )r  r  rJ   c                 C   s   | j |j kS r5   r?  )r  r  r*   r*   r+   is_same_sizex  s    r  c                 G   s   t | |S r5   )rs   r  )r2   r   rA   r*   r*   r+   _reshape_alias}  s    r  c                 C   s   t | |S r5   )rs   r  )r2   r  r*   r*   r+   _index  s    r  )rk   r   r   r   r   rJ   c                 C   sV  |   }d}|dk rd}|d k	rX|dkrLdg| }|jd ||< ||}n|}| | } t||k|d}	|	|}
t| ||
| }t||k|d}|tj	j
kr|dkr| dd}||fS |d k	r
|| j}t|||
|}t||k|d}| }n||k | }|tjj
kr4| }n|tjj
krN| | }||fS )Nr"   r#   r   r*   r   )rI   r   r  r^   r_   rN   gatherrM  r!   r'   rl   r   expandr   r7   r)   r(   )rk   r   r   r   r   r  r   r   wr   Zsafe_target_r   r   Zwsumr*   r*   r+   _nll_loss_forward  s@    




r  r   c                 C   s   |   dkr|   dks td|  dks4td|   dkoJ|  dk}|s~| jd |jd ks~td| j d|j d| jd	 }|d ks|  dkr| |kstd
| d|j t| ||||S )Nr   r#   r   r"   r   r   r   r   rL   z/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: )rI   r   r   r   r  )rk   r   r   r   r   r   Z	n_classesr*   r*   r+   nll_loss_forward  s(    	 



r   c                 C   s   t | ||||S r5   )r  )rk   r   r   r   r   r*   r*   r+   nll_loss2d_forward  s    	r!  )r2   ArJ   c                 C   s    |d |  |d  |  |  d S )Nr#   rw   r"   r*   r2   r"  r*   r*   r+   _upsample_cubic_convolution1  s    r$  c                 C   s(   ||  d|  |  d|  |  d|  S )NrJ     r   r*   r#  r*   r*   r+   _upsample_cubic_convolution2  s    r&  )r  rJ   c           
      C   s   d}| j t dkrtj| d|  gdd}tj| d d|  gdd}t||}t||}tj|dd\}}tj|dd\}}	|||	|fS t| d |t| |td|  |td|  |fS d S )Ng      r  r\   r   ro   r   )r'  r^   r  r&  r$  r  )
r  r"  Ztt1Ztt2Zw03Zw12r  r  r  r  r*   r*   r+    _upsample_get_cubic_coefficients  s    

r'  )coeffstsrJ   c                 C   s    t |}tdd t| |D S )Nc                 s   s   | ]\}}|| V  qd S r5   r*   r1   r  r  r*   r*   r+   r0    s     z+_upsample_cubic_interp1d.<locals>.<genexpr>)r'  r  rK  )r(  r)  Zcoeffs2r*   r*   r+   _upsample_cubic_interp1d  s    r+  )r)  rJ   c                 C   s   t tj| S r5   )r   r^   add)r)  r*   r*   r+   r  	  s    r  )	num_stepsr  r   r'  c                 C   sB   | dkrt jd||dS |s(| d |  nd}t j| || ||dS )Nr"   r   rm  )Zstepsr'  r   )r^   r  Zlinspace)r-  r  r   r'  r  r*   r*   r+   _linspace_from_neg_one  s    r.  )thetahr  r  c           	      C   s   | j }| j}t||||d|d}t|||||dd}tjd||d}tjjj|dddd}tjjj|dddd}tjjj|d	ddd}|| | S )
Nr"   )r"   r"   r"   r&  )r   r#   constantr   rA  r  rl   )r"   r"   )r#   r   	r   r'  r.  r  r^   r  rH  r  rA  )	r/  r0  r  r  r   r'  grid_xgrid_ygrid_oner*   r*   r+   _make_base_grid_4d  s    r7  )r/  r<  r0  r  r  c                 C   s   | j }| j}t||||dd|d}t||||d|dd}t|||||ddd}	tjd||d}
tjjj|dddd}tjjj|dddd}tjjj|	d	ddd}	tjjj|
d
ddd}
|| |	 |
 S )Nr"   )r"   r"   r"   r"   r&  )r   rw   r1  r   r2  rX  r#   r"   )rw   r   r3  )r/  r<  r0  r  r  r   r'  r4  r5  Zgrid_zr6  r*   r*   r+   _make_base_grid_5d(  s    r9  r/  r   r  c           	      C   sL   |\}}}}t | |||d}|ddd| jd d}||||dS )Nr  rL   rw   r"   r   r#   )r7  r  r   rN   r   )	r/  r   r  r  rO   r0  r  	base_gridgridr*   r*   r+   _affine_grid_generator_4d9  s     r>  c           
      C   sR   |\}}}}}t | ||||d}|ddd| jd d}	|	||||dS )Nr;  rL   r   r"   r   rw   )r9  r  r   rN   r   )
r/  r   r  r  rO   r<  r0  r  r<  r=  r*   r*   r+   _affine_grid_generator_5dC  s     r?  c                 C   sD   t t|dkdd  t|dkr2t| ||dS t| ||dS d S )N)r   rJ  c                   S   s   dS )NzCaffine_grid_generator needs 4d (spatial) or 5d (volumetric) inputs.r*   r*   r*   r*   r+   rp   S  rq   z'affine_grid_generator.<locals>.<lambda>r   r;  )r^   rr   r  r>  r?  r:  r*   r*   r+   affine_grid_generatorM  s    
r@  )r  r=  interpolation_modepadding_moder  _expand_gridrJ   c                    s(  t dkfdd t dkfdd tttdfddttttdd	d
tttdfddtttdfdd}j\ |j\}}|dkstr|d| d}tttdfddt jj	ddddt j j	dd ddtttt
d fddtttdfdd
|d }	|d }
dkrB||	}||
}| |  d  }}d  }}|| }}|| ||  }|| ||  }|| ||  }| |  }t
fdd|f|||f|||f|||ffD S dkr|||	}||
}| }| }
||dS |	}|
}| | | | }sʈd|d}tttd
fd d!ttd"fd#d$	t	fd%dtd&D }t||S d S )'N)r   r"   r#   c                      s
   d  S )NzInvalid interpolation mode r*   r*   )rA  r*   r+   rp   l  rq   z"_grid_sampler_2d.<locals>.<lambda>c                      s
   d  S )NzInvalid padding mode r*   r*   )rB  r*   r+   rp   o  rq   )coordsr   rJ   c                    s0    r|d d n|d }|d d }| | | S r  r*   )rD  r   r   ofsr;  r*   r+   unnormalizer  s    z%_grid_sampler_2d.<locals>.unnormalize)rD  	twice_low
twice_highrJ   c                 S   sv   ||krt | S |d }|| d }| |  }t ||}||  jt jd}t |d@ dk|| || | S )Nr#   r   r"   r   )r^   r   r   fmodfloorr7   Zint8r_   )rD  rG  rH  Z
coords_minZcoords_spanZcoords2extraZflipsr*   r*   r+   reflect_coordinates}  s    

  
z-_grid_sampler_2d.<locals>.reflect_coordinatesc                    sj   dkr| S dkr&t | d|d S  r@| dd|d  }n| dd| d }t |d|d S d S )Nr   r"   r#   rL   r}   )rD  r   Zcoords_reflected)r  rB  rL  r*   r+   compute_coordinates  s    z-_grid_sampler_2d.<locals>.compute_coordinatesc                    s   | |} ||S r5   r*   )rD  r   Z	coords_un)rM  rF  r*   r+   compute_source_index  s    
z._grid_sampler_2d.<locals>.compute_source_indexr#   r"   )r  ysrJ   c                    s,   t d| kt | k t d|k| k S rh   )r^   rs  )r  rO  )iHiWr*   r+   in_bounds_cond  s     z(_grid_sampler_2d.<locals>.in_bounds_condr  )r  rO  wsrJ   c                    sN   | |rnd t  fdd| jtjd|jtjd|fD S )Nr"   c                 3   s(   | ] }t |d  V  qdS r.  )r^   r_   r  r  )r  rC  r7  oHoWr*   r+   r0    s   z1_grid_sampler_2d.<locals>.clip.<locals>.<genexpr>r   )r>  r7   r^   r)  )r  rO  rS  )r  r  rC  rR  rT  rU  )rC  r7  r+   clip  s
    
z_grid_sampler_2d.<locals>.clip)ixiyrJ   c                    s&   | ||\}}} ||f | S r5   r*   )rW  rX  r  Zidx_xZidx_yZw_)C_idxN_idxr  rV  r*   r+   get_summand  s    z%_grid_sampler_2d.<locals>.get_summand).r   ).r"   r   c                 3   s    | ]\}}} |||V  qd S r5   r*   )r1   rW  rX  r  )r[  r*   r+   r0    s   z#_grid_sampler_2d.<locals>.<genexpr>c                    s     | } |}||dS rU   r*   )rW  rX  r2   rT   )rM  r[  rP  rQ  r*   r+   get_value_bounded  s    

z+_grid_sampler_2d.<locals>.get_value_bounded)rE  rJ   c                    sF   | d  } d | | d | d |f}t |S )Nr"   r#   )r+  )rE  Ziy_ofscs)r\  ix_nwiy_nwtxr*   r+   	get_coeff  s    z#_grid_sampler_2d.<locals>.get_coeffc                 3   s   | ]} |V  qd S r5   r*   )r1   rE  )ra  r*   r+   r0    s     r   )r^   rr   r   r  r   r   r  r  r(  r'  r   rJ  r  r  rN   r>  rM   r+  )r  r=  rA  rB  r  rC  rN  rO   twor2   rT   rW  rX  Zix_neZiy_neZix_swZiy_swZix_seZiy_seZw_nwZw_neZw_swZw_seZ
ix_nearestZ
iy_nearesttyr(  r*   )r  rY  r  rZ  rC  r  r  rV  rM  ra  r[  r\  rP  rQ  rR  rA  r^  r_  rT  rU  rB  rL  r`  rF  r+   _grid_sampler_2d[  sz    
 
 "



	







rd  )r  r=  rA  rB  r  rJ   c                 C   s   t | ||||dS )N)r=  rA  rB  r  )rd  )r  r=  rA  rB  r  r*   r*   r+   grid_sampler_2d  s    
re  c                    s`   t   dko dk fdd t  ddk fdd   jddS )Nr#   r"   c                      s   d    d   S )Nzmatrix @ vector expected, got r  ro   r*   rk   r  r*   r+   rp     rq   zmv.<locals>.<lambda>r   c                      s*   d  d d  d d d dS )Nzsize mismatch, got input (r   r2   r"   z), vec (r   r\  r*   rf  r*   r+   rp     rq   ro   )r^   rr   rI   r   r   rf  r*   rf  r+   r    s    r  c                 C   sd   |d k	r4|d | d }d| |  |t |   }nd| |  t |  }|d k	rZ|| }t||S rU   )r   Z
logsigmoidr   )rk   r   r   Z
pos_weightr   Z
log_weightr   r*   r*   r+    binary_cross_entropy_with_logits#  s    rg  )tensor1tensor2is_outrJ   c                 C   s   | j |j kr| |fn|| f\}}ddlm} |j dkr@|j dksDdS |jrR|sRdS | j dkr`dS || dkrtdS |j}| }tdd t|d d	 |d
d |d
d D S )Nr   )guard_size_obliviousrw   r#   FTc                 s   s    | ]\}}}||| kV  qd S r5   r*   )r1   Zst1Zst2s2r*   r*   r+   r0  F  s   zshould_fold.<locals>.<genexpr>r   r"   rL   )	r  r  rk  r  r   r   r  r2  rK  )rh  ri  rj  t1t2rk  Zt1_shapeZ	t1_strider*   r*   r+   should_fold4  s     

"ro  )Zpass_is_out)rj  c                C   s  |   }|  }|dkr |dks$t|dkr@|dkr@t| |S |dkr\|dkr\t| |S |dkr|dkrttt| d|dS |dkr|dkrt| |S t| ||rv||k}|r|j	n| }|s|n|dkr| 
 n| }|j}t|d d }	ttj|	}
|  dk}|r(|	|jd  ||
|d }|rb|||	}|r^|j	 S |S |||	S n|dkrn|dkrn|dkr| dnd}| d}| jd d }|dkr|dn|d}|dkr|dnd}g }t|d D ]}||| q |dkr|dkr|d |d kr|d dkrd| jrdt| d|S |d dkr|jrt| |dS tt||}|||g }t|}| ||||}|dk}|r||g }||||d}n |||g }|||||}|}	|dkr,|	| |dkr@|	| |r\||d|	S |||	S ntddd	  d S )
Nr   r"   r#   rL   r   rw   Fc                   S   s   dS )Nz/both arguments to matmul need to be at least 1Dr*   r*   r*   r*   r+   rp     rq   zmatmul.<locals>.<lambda>)rI   r   r^   dotr  rM  r  rN   ro  r   r  r   r  r   operatorr   r  r   r  r#  r   rM   r  r   Zbroadcast_shapesra  r  bmmrr   )rh  ri  rj  Zdim_tensor1Zdim_tensor2r  rm  rn  Zsizes_1Zoutput_shapeZfolded_dim1Zt2_is_matrixZ	t1_foldedr   r  m1Zbatch_tensor1m2r   Zbatch_tensor2r  Zexpand_batch_portionZtensor1_expand_sizeZexpand_batch_productZtensor1_expandedZ
vector_rhsZtensor2_expand_sizeZtensor2_expandedr*   r*   r+   r   L  s    	


  

 
  



r   )r   rI  r  r  r  rJ   c                    s
  j \}}t|d ||}t|d ||}tjtjjd\}}tj|d jdj	|d}	tj|d jdj	|d}
t
||
|}t
||	|}|d}| }| }|| dd}|| dd}|	tj}|	tj}|d ||d |d	 f}|d ||d |d	 ft|t|}d
\jtjkrttt|fddD fdd|D }fddfdd t fdd|D }jtjkr؈d k	stt||}ntdd t||D }t}|j|d}|S )Nr   r"   r  r  r   rL   r   r\   r#   r  c                    s.   g | ]&}|d  >  t |d  t jqS r"   r   r^   r   r7   Zint16r1   r  )weights_precision_xr*   r+   r3     s   z.upsample_bicubic2d_default.<locals>.<listcomp>c                    s.   g | ]&}|d  >  t |d  t jqS ru  rv  rw  )weights_precision_yr*   r+   r3     s   c                    s<   t | d d }t |dd }td d ||g}|S r  )r^   r~   rs   r  )rO  r  Zy_idxZx_idxr  )in_hin_wr   r*   r+   load_bounded  s    z0upsample_bicubic2d_default.<locals>.load_boundedc                    sT   t  fddD }jtjkr<d k	s0tt|S tdd t|D S )Nc                 3   s   | ]} |V  qd S r5   r*   )r1   Zx_ofs)r|  rT   r*   r+   r0  
  s     zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>c                 s   s   | ]\}}|| V  qd S r5   r*   r*  r*   r*   r+   r0    s     )r>  r   r^   r  r   r  r  rK  )rT   Zsrc_x)r   ixs_ofsr|  rx  	weights_x)rT   r+   get_x_interp	  s
    z0upsample_bicubic2d_default.<locals>.get_x_interpc                 3   s   | ]} |V  qd S r5   r*   )r1   Zy_ofs)r  r*   r+   r0    s     z-upsample_bicubic2d_default.<locals>.<genexpr>c                 s   s   | ]\}}|| V  qd S r5   r*   r*  r*   r*   r+   r0    s     r   )r   r   r?   r@   r  r  r^   r(  r'  r7   r  rN   rJ  r~   r)  r'  r   r  r  r>  r   r  r  rK  r   r#  )r   rI  r  r  r  rO   Zh_scale_factorZw_scale_factorr   r  rJ  Zx_floatZy_floatr2   rT   Zyscaler  Ziys_ofsZ	weights_yZsrc_yr   r   r*   )	r  rz  r{  r   r}  r|  rx  ry  r~  r+   upsample_bicubic2d_default  sT     




r  )r  rI  r  rs  rJ   c                 C   s   t t|t| dkdd  |d krd|d k	s4tttttf tdd t| j	dd  |D }|rl|nd\}}t
| ||||S )Nr"   c                   S   s   dS )Nz:Must specify exactly one of output_size and scale_factors.r*   r*   r*   r*   r+   rp   *  rq   z(upsample_bicubic2d_vec.<locals>.<lambda>c                 s   s"   | ]\}}t t|| V  qd S r5   )r   r   )r1   r  rd   r*   r*   r+   r0  0  s   z)upsample_bicubic2d_vec.<locals>.<genexpr>r#   r  )r^   rr   r~  r   r	   r   r  r>  rK  r   r  )r  rI  r  rs  r  r  r*   r*   r+   upsample_bicubic2d_vec  s    
r  )r  r-  rJ   c                    s    fdd}t  ||S )Nc                    s4   t j|  ||  jd}|d |d |    S )Nr  r"   )r^   r(  r'  r   r   middler   Zdim_idxr  r*   r+   rd  ?  s    z_reflection_pad.<locals>.idx_reflection_or_replication_padr  r-  rd  r*   r  r+   _reflection_pad9  s    r  c                    s    fdd}t  ||S )Nc                    s*   t j|  ||  jd}t |d|d S )Nr  r   r"   )r^   r(  r'  r~   r  r  r*   r+   rd  P  s    z_replication_pad.<locals>.idxr  r  r*   r  r+   _replication_padJ  s    r  )r  r-  idx_fnrJ   c                    s   t d  t|   d  d fk fdd | j  d  }|    } fddt D } fddt D }| }t D ]>}d g|  }	||| || || |	|| < t||	}qt	|}
|j
|
d}|S )	Nr#   r"   c                      s    d  d d  d d  dS )NZreflection_padzd requires r"   zD or r#   zD inputr*   r*   ro   r*   r+   rp   c  rq   z0_reflection_or_replication_pad.<locals>.<lambda>c                    s    g | ]}d  d |   qS r8  r*   r  rI   r-  r*   r+   r3   h  s     z2_reflection_or_replication_pad.<locals>.<listcomp>c                    s$   g | ]}d  d |  d  qS r8  r*   r  r  r*   r+   r3   i  s     r   )r  r^   rr   rI   r   rM   rs   r  r?   r   r#  )r  r-  r  Z	inp_shapeZnc_dimpadding_leftpadding_rightr   r  rd  r   r*   r  r+   r  [  s"    
 
r  ry   r|   r!  c                C   s(   t j| ||d}t j| ||d}||fS )Nr!  )r^   aminr  )rk   rI   r"  r  r  r*   r*   r+   aminmaxw  s    r  c                C   s"   t jtt| d| |||dS )Nr   r   )rs   r   r^   r_   isnan)rk   rI   r"  r   r*   r*   r+   nansum  s    r  r   r  r'  r   r  r   r  r'  r   c             	   C   s   t jjd| d||||dS )Nr   r"   r  rs   r(  Z
start_stepr  r*   r*   r+   arange_default  s    
      r  r  r  r   r  r'  r   c             	   C   s   t jj| |d||||dS )Nr"   r  r  r  r*   r*   r+   arange_start  s    
      r  c                  O   s   ddl m} || |S )Nr   )out_dtype_dense)!torch._higher_order_ops.out_dtyper  )rA   rB   r  r*   r*   r+   out_dtype_decomp  s    r  )r   r   r   marginr   r   rJ   c           	         s  t t jd jd  t |dkp:|dkdd  t jdkoX dkfdd t jdko~ kfdd d k	rt t jdko  k fdd dt jdd	}||  }|	d}|dkr|n|| }d k	r$|  }t j
 jd
}t |k|d}|tjjkr\| S |tjjkr|| |jd  S |jddS d S )Nr   r"   r#   c                   S   s   dS )Nz only p == 1 and p == 2 supportedr*   r*   r*   r*   r+   rp     rq   z#multi_margin_loss.<locals>.<lambda>c                      s   d j  S NzMExpected non-empty vector or matrix with optional 0-dim batch size, but got: r?  r*   )r   r*   r+   rp     rq   c                      s   d  dj  S )Nz#inconsistent target size, expected rZ  r?  r*   )nframer   r*   r+   rp     rq   c                      s   d  dj  S )Nz#inconsistent weight size, expected rZ  r?  r*   )rI   r   r*   r+   rp     rq   r]  r  ro   )r^   
atleast_2dZ
atleast_1dr   rr   r  r   rN   r  r  r(  r'  r_   r!   r(   rl   r   r)   r   )	r   r   r   r  r   r   ur`   rd  r*   )rI   r   r  r   r   r+   multi_margin_loss  sB    








r  	is_targetc                    s  | j  |j t| } t|}| j d }tt dko@|dk fdd ttdkod k fdd tj||jd}|dk}tjt|||dd	d
}||k }t||d}tj	| d|d}	t||d}
tj
||
jddkdd}d|	jjdd |  }|d}|| }t|d|}|tjjkrJ|jdd }n$|tjjkrb| }n|jdd}|| j}||fS )Nr"   r#   r   c                      s
   d  S r  r*   r*   )orig_input_shaper*   r+   rp     rq   z0multilabel_margin_loss_forward.<locals>.<lambda>c                      s   d d  S )Nzinconsistent target size: z for input of size: r*   r*   r  Zorig_target_shaper*   r+   rp     rq   r  rL   Tr!  r]  ro   r\   )r   rL   )r   r^   r  rr   r  r(  r'  r  r_   r  anyrN   Tr  r!   r(   rl   r   r   r)   r7   r   r   )r   r   r   rI   rd  Zis_endZend_idxZtarget_maskZtidx0r  Ztidx1r  r`   r*   r  r+   multilabel_margin_loss_forward  s@    





r  )	attn_maskrd   )querykeyrl   	dropout_p	is_causalr  rd   rJ   c          
   
      s   j }ttfdd t dkoF dkoF dkfdd t dk fdd tjd jd kojd jd kdd  tjj| |d |d	\}}	|	d
dj
tjd}|	d
d|	fS )Nc                      s   d j  S )Nz-query must be FP32, FP64, BF16, FP16 but got r   r*   )r  r*   r+   rp   &  rq   z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>r   c                      s"   d   d    d   S )Nz,q, k, v must be a 4 dimensional tensor, got r  ro   r*   )r  r  rl   r*   r+   rp   *  rq   r   c                      s
   d  S )Nz&dropout probability must be zero, got r*   r*   )r  r*   r+   rp   -  rq   rw   c                   S   s   dS )Nz&q, k, v should have the same head sizer*   r*   r*   r*   r+   rp   1  rq   )r  r  r  Zdropout_maskrd   r"   r#   r   )r   r^   rr   r  rI   r   rs   Z"_scaled_dot_product_attention_mathr  r  r#  r   )
r  r  rl   r  r  r  rd   r   r   Zattnr*   )r  r  r  rl   r+   *scaled_dot_product_flash_attention_for_cpu  s:    
" 
&
+r  c                    s   t |  fdd}|S )Nc                     s    | |}| d  |S rh   )r   )rA   rB   r   outplace_opr*   r+   
inplace_opd  s    
z$register_inplace.<locals>.inplace_opr   )Zaten_opr  r  r*   r  r+   register_inplacec  s    r  c                 C   sx   |   s |  s t|}t|}t||}t|tjr@|dkrH|| }|dkrT|S t|tjrh|dkrp| | } | | S )Nr"   r   )r  r  r  r^   rr  r/   numbersNumber)rk   Zbatch1Zbatch2rY   rc   r   r*   r*   r+   baddbmml  s    r  c                 C   s   t j| |ddS )NrJ  r8  r9  )rk   r   r*   r*   r+   floor_divide}  s    r  c                 C   s   t tj| jdS rU   )rF   r   rq  r   r   )r  r*   r*   r+   	sym_numel  s    r  r   r   )rk   r   r   rJ   c                C   s2   |d krt jj| g |dS t jj| g ||dS d S )Nr   r  )rs   r   Zdim_IntListZIntList_out)rk   r   r   r*   r*   r+   sum_default  s    r  rk   rI   c                 C   s6   |d kr"t j| tt|  S t j| |gS d S r5   )rs   rM  Zdimsr  rM   rI   r  r*   r*   r+   squeeze_default  s    r  c                    s@   t  fddtt| jD }| jd|dd}| ||  |fS )Nc                 3   s   | ]}| kr|V  qd S r5   r*   r  ro   r*   r+   r0    s      z)_weight_norm_interface.<locals>.<genexpr>r#   Tr  )r>  rM   r  r   r   )r2   rT   rI   Zkeep_dimr   r*   ro   r+   _weight_norm_interface  s     r  assume_uniqueinvertc                C   st   t | tjstj| |jd} t |tjs8tj|| jd}| dt|  d k r`t| ||dS t| |||dS d S )Nr  g      $@g(\?r  r  )	r/   r^   r   r  r'  r   r   isin_defaultisin_sorting)elementstest_elementsr  r  r*   r*   r+   isin  s       r  r  c                C   sl   |   dkrtj| tjdS | j| jd|j  }|s@||k}n||k}ttd|j d d}|j	|dS )Nr   r   r   rL   r"   ro   )
r   r^   
empty_liker~  r  r   r  r>  rM   r  )r  r  r  r2   cmprI   r*   r*   r+   r    s    
r  c                C   s   |   }|  }|rt||g}tj|dd\}}|dd  |d d k}	t|	ddgd}	|rj|	 }	t|	}
|
d||	}
|
d|   S t|\}}t	||}t
|| k |d}|| |k}|r| n|}|| jS d S )NT)Zstabler"   rL   r   F)ro  r^   r   sortr  Zlogical_notr  ri  r   Zsearchsortedr_   r   r   )r  r  r  r  Zelements_flatZtest_elements_flatZall_elementsZsorted_elementsZsorted_orderZduplicate_maskrf  Zsorted_test_elementsrO   rd  Ztest_idxr  r*   r*   r+   r    s$    
r  c                 C   s   |  d}|| S rK   )r   )rk   r  Z	flattenedr*   r*   r+   take  s    
r  c                 C   s2   |d krt j}|t jkr t|}tj| |j|dS rg  )r^   r   Zpreserve_formatr   rs   resizer   )rk   r   r   r*   r*   r+   	resize_as  s
    
r  )F)r   )r   r   FN)r   r   FN)r#   )r   NNr"   )N)rL   FF)N)r   )r   N)r   )r   )r   )r   )r"   r"   )r"   r"   F)r"   r"   )r   )N)Fr   )r   r\   N)r   r"   N)F)N)N)NN)NN)NNN)NNN)F)F)F)F)F)F)N)NN)NNN)N)r   r   FT)r   r   F)NN)N)NF)r   F)r"   r"   )N)r   )N(y  rF   r  rq  r  enumr   r   r   	itertoolsr   r   typingr   r   r	   r
   r   r   r   r   r^   Ztorch._primsr$  rn  Ztorch._prims_commonZ_prims_commonr?   Ztorch.nn.functionalrH  r  r   r   r   r   Ztorch._decompr   r  r   r   r   r   r   r   Ztorch._prims_common.wrappersr   r   r   r   Ztorch.utilsr   r>   Ztorch.utils._pytreer   r  ZDispatchKeyr    str__annotations__Z_opsr  rs   r!   r  r~  rH   r  Zcompute_only_pw_cast_for_opmathZpw_cast_for_opmathr  Zpw_cast_for_int_to_realr  rP   rW   rX   ra   rt  ri   fillZScalarrm   ru   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  Zpy_implZAutogradCUDA	Generatorr   r   r   r   r   r   r   r(   rl   r   r   r   rQ   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r	  slicer  r  r  r  r$  r%  r*  rW  re  rj  rp  rw  r{  ZCompositeImplicitAutogradZAutogradrz  r  r  r  r  ra  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  Zunsafe_chunkr  r
  r	  Zno_statsr  r  r  r  r  r  Z_fused_dropoutr  r%  r'  r   detachZliftZ
lift_freshr'  r*  r,  r.  r-  r/  r4  r6  Z_adaptive_avg_pool2drK  rQ  rR  rP  rc  rh  ri  rg  rj  r   ro  rt  rv  r  r  r  r  r{  Z_upsample_nearest_exact1dr  r  r  r  r  rx  r  r  r  r  r  r  r  r  r  r  Zrnn_tanhr   r  Zrnn_relur  r  r  r  r  r  r  r  Zlstmr  r  r  r  Zgrur  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  Z_unsafe_viewr  r  r  r   r!  r$  r&  r'  r+  r  r.  r7  r9  r>  r?  r@  rd  re  r  rg  ro  r   Zupsample_bicubic2dr  r  Zreflection_pad1dZreflection_pad2dZreflection_pad3dr  Zreplication_pad1dZreplication_pad2dZreplication_pad3dr  r  r  r  r(  r#  r  r  r  r  r  r  r  Z+_scaled_dot_product_flash_attention_for_cpur  r  r  r  r  r   r  rM  rI   r  r  r  r  r  r  r  Zaddbmm_ZaddbmmZaddmm_Zaddmv_Zbaddbmm_Zfill_Zgelu_r  Z
hardswish_Z	hardtanh_ZhardtanhZhardsigmoid___iand____and____ilshift__
__lshift__rY  rZ  Zindex_reduce_Zindex_reduce__ior____or____irshift__
__rshift____ixor____xor__Zleaky_relu_r   Zlogit_ZlogitZrelu_r  Zrenorm_ZrenormZround_r  Zscatter_r   Zscatter_add_Zscatter_addZscatter_reduce_Zscatter_reduceZsilu_r*   r*   r*   r+   <module>   s
   (
$  
      

      
	     
    

    
 *!	
   2            	P`    
    
%  ! 


"
  

"
     

W

N
R#






#

d
%$f(
    "
  




$




$  	 	  
  
.2)


 
 ?2

		

			 
    
	
I
5



   



$    '
  

   
w 
S


,


,



,

< 
J	

"


