U
    zh                    @   sD&  d dl Z d dlmZ d dlmZmZmZmZmZ d dl	Z	d dl
mZ d dl	mZmZmZ d dlmZmZmZmZ d dlmZ d dlmZmZ d dl
mZmZmZmZmZmZm Z m!Z! d d	l"m#Z#m$Z$m%Z%m&Z&m'Z' d d
l(m)Z)m*Z* d dl+m,Z- e	j.j/Z/e	j01dddZ2dd Z3edddZ4dd Z5dd Z6e3e/j7e/j8ge' ddde	j9ddfddZ:e3e/j;j<e/j;j=ge' dd Z>e3e/j?j<e/j?j=ge' dddd Z?e3e/j@e' d!d" Z@e3e/jAj<e/jAj=e/jBj<e/jBj=ge'd#d$d%d& ZCe3e/jDj<e/jDj=ge' d'd( ZDd)d* ZEe3e/jFj<e/jFj=ge' d+d, ZGe3e/jHj<e/jHj=ge' d-d. ZIe3e/jJjKdd/d0d1ZLe3e/jJj<e	jMdddd2d3d4ZNe3e/jOj<e/jOj=ge' e	jMdddd2d5d6ZPe3e/jOjQe/jOjRge' e	jMdddd2d7d8ZSe3e/jTj<e/jTj=ge' ddddd2d9d:ZUe3e/jVj<e/jVj=ge' d;d< ZWe3e/jXj<dd=d>ZYd?d@ ZZe3e/j[j<dAdB Z\e3e/j]deeeee ee^ ee	j_ dCdDdEZ`e3e/jadeeeee	j_ dFdGdHZbe3e/jcdIdIddJeeeeee	j_ dKdLdMZde3e/jede	je	jee ee ee	j_ efdNdOdPZge3e/jhj<dQdReeiee	je^efedSdTdUZje3e/jkj<dQdReeiee	je^efedSdVdWZle' e3e/jmj<dXdY Zne3e/joj<dddd dddZee^ee ee ee eiefed[d\d]Zpe3e/jqj<e/jqjrge' d^d_ Zse3e/jqjtdd`daZue3e/jvj<e/jvjrge' dbdc Zwe3e/jvjtddddeZxe3e/jyj<dfdg Zze3e/jyj=dhdi Z{e3e/j|j<djdk Z}e3e/j|j~dldm Ze3e/jj<dndo Ze3e/jj<ddddddpdqdrZe3e/jj<ddsdtZe3e/jj<ddudvZe3e/jj<ddwdxZe3e/jj<dydz Ze3e/jj~d{d| Zee^d}d~dZeee^dddZdee^efdddZdee^e^dddZeeefe^dddZde^eee^dddZe^dddZe3e/jj<e/jjge'dddee^efdddZe3e/jj<e/jj=ge' eedddZe3e/jge'ddedddZeedddZe3e/je' eeefedddZe3e/je' deeefedddZe3e/je' deefedddZe3e/je' deefedddZe3e/jj<deefefdddZe3e/jj<e/jj=ge' eeedddZe3e/jj<deefdddZe3e/jj<e/jj=ge'ddddddeefefeeeef dddZe3e/jj<e/jj=ge' ddeeeefedddZe3e/jj<e/jj=ge'ddddQdÜeefeeeef dĜddƄZe3e/jj<e/jj=ge'ddddQddȜeefefeeeef dɜdd˄Ze3e/jj<e/jj=ge' dQdd̜eeeefefed͜ddτZe3e/je'ddddeeefefeeeef dМdd҄Ze^eefeff dӜddՄZe3e/jj<e/jj=ge'dd׃dee^eeef dٜddۄZe3e/jj<e/jjge'ddddeeeeeef dޜddZe3e/jj<deefefee^ dddZeeeeei eei f dddZeeee^ eeef dddZeeefdddZe3e/jdQddddddeeefefee ee ee ee eeeeef d	ddZe3e/jj<e/jj=gdQdddeeefefefee edddZe3e/je'dddeeefefefeeef dddZe3e/jj<dd Ze3e/je' deeeefefedddZdd  Zdd Ze3e/jăe' dd Ze3e/jƃe' dd Zǐdd Ze3e/jɃe'd	d
d Ze3e/j˃e'd	dd Z̐dd Ze3e/j΃e' dd Ze3e/jЃe' dd Ze3e/jj<e/jje/jj<e/jjge'd	dd ZՐdd Ze3e/j׃e' dd Ze3e/jكe' dd Ze3e/jj<e/jje/jj<e/jjge'd	dd Ze3e/jރe' deeedd d!Ze3e/je' eeeeed"d#d$Ze3e/jj<e/jj=ge' dIdId%d&d'Ze3e/jj<e/jj=ge' dd/d(d)Ze3e/jj߃dd+d,Ze3e/jjdd-d.Ze3e/jj<dd/d0Ze3e/je' d1d2 Zd3d4 Zd5d6d7d8Zde	je	jeeei eif eeei eif eeei eif efeieeeei eif  d9d:d;Zd<d= Ze3e/jj<e	je	je	jeei eei eei efeei eid>	d?d@Ze	jjre	j01dAddZe3e	j.jjj<dBdC Ze3e	j.jjj<dDdE Ze	jjr"e	j01dFddZe3e	j.jj dGdH Ze	j01dIddZe3e	j.jjj<dJdK Ze3e	j.jjj<e3e	j.jjjdLdM Ze	j01dNddZ	e3e	j.j
jddRdSZdTdU Ze3e/jj<ddVdWZdXdY Ze3e/jj<dZd[ Ze3e/je' dd\d]Ze3e/je'd	d^d_ Ze3e/jj<d`da Ze3e/jj<dbdc Ze3e/jj<ddde Ze3e/je'd	dfdg Zee^dhdidjZe3e/j e'dkd$dldm Z!e3e/j"e'd	dndo Z#e3e/j$e'dkd$dpdq Z%e3e/j&e'd	drds Z'e3e/j(jddtduZ)e3e/j*j<e/j*j=ge' dvdw Z+e3e/j,j<e/j,j=ge' ddxeieidydzd{Z,e3e/j-je/j.jgd|d} Z/e3e/j0j<gd~d Z1e3e/j2j<e/j2j=ge' dIdId%ddZ3e3e/j4j<gdddddZ5e3e/j6j<gdddddZ7e3e/j8ge' dd Z9e3e/j:gdd Z;e3e/j<gdd Z=e3e/j>gdd Z?e3e/j@j<dd ZAe3e/jBe' dd ZCe3e/jDj<dddZEe3e/jFj<dd ZGdddZHe3e/jIj<e/jIj=ge' dddddZJe3e/jKj<e/jLj<gdd ZMe3e/jKjte/jKjNe/jLjte/jLjNe/jOj<e/jOjPge'd#d$dddZQe3e/jRj<dd ZSe3e/jTj<dd ZUe3e/jVj<dd ZWe3e/jXjYe/jZjYe/jXje/jZje/j[j<e/j\j<e/j]j<gdd Z^e3e/j_jYe/j`jYe/j_je/j`jgdddZae3e/jbj<e/jbjcgdd Zddd Zee3e/jfje/jfjYgdd Zge3e/jhje/jhjYgdd Zie3e/jjj<dd Zke3e/jlje/jljYgdd Zme3e/jnje/jnjYgdd Zoe3e/jpj<dd Zqe3e/jrj<e/jsj<gdddZte3e/jujYdd Zve3e/jwj<dd Zxe3e/jydd Zze3e/j{e' ddÄ Z|e3e/j}dĐdń Z~e3e/jj<ddƐdǄZe3e/jj<dȐdɄ Zddʐd˄Ze3e/jj<d̐d̈́ Zdΐdτ ZdАdф ZdҐdӄ ZdԐdՄ Zdeeieieieieieieieieieieieieieieieieieieie^efd֜dאd؄Zdِdڄ Zeeeieieieieieieieieieieieieieieieie^dۜdܐd݄Zdސd߄ Ze3e/jj<dd Ze3e/jj<dddZe3e/jj<dd Ze3e/je' dd Zdd Ze3e/je' dd Ze3e/je'dkd$dddZe3e/je'd	dd ZeedddZG dd deZeeeidddZe3e/jj<dd Ze3e/je' dd Ze3e/je'd	ddd Ze3e/jj<gdd  Ze3e/jj<dÐddZe3e/jjidd Ze3e/jj<dd Ze3e/jj<dĐddZdeieiefd	d
dZdd Zdd Ze3e/jj<dƐddZdǐddZdȐddZdd ZdɐddZdʐddZe3e/jj<dd Ze3e/jdd Ze3e/jje/jje/jje/jjge' dːd d!Ze3e/jje/jje/jje/jjgd̐d"d#Ze3e/jgdeeeeeeeeeieieefeeee d$d%d&Ze3e/jgdeeeeefee ee d(d)d*Ze3e/jgdeeeeeeeefee ee d+
d,d-Ze3e/jgdeeeeee eeeeeeef efee d.d/d0Ze3e/jgdeeeeeeeeeieieefeeee eei eei d1d2d3Ze3e/jgdeeeeee ee ee e	je	jeeeeeiefee eei efd4d5d6Ze3e/jj<gde	je	jee	j ee	j_ ee	j ee	j ee	j efd7d8d9Ze3e/jѐje/jѐjge' dԐd:d;Ze3e/jՐj҃dՐd<d=Ze3e/jj<e/jj=ge' ddd/d>d?Zؐd@dA ZِdBdC Ze3e/jj<e/jj<gdאdDdEZe3e/jj<e/jj<gdؐdFdGZe3e/jj<e/jj<gdeeeeie	jf  eeeie	jf  ee ee dHdIdJZe3e/jj<e/jj<gdڐdKdLZe3e/jj<e/jje/jjPe/jjgdېdMdNZe3e/jjdddOdPdQZdRdS Ze3e/jj<dܐdTdUZe3e/jj<dVdW Ze3e/jj<dXdY ZdZd[ Zd\d] Ze3e/jj<e/jj<gdݐd^d_Ze3e/jj<dސd`daZe3e/jj<dߐdbdcZe	jZddde Ze3e/jj<dfdg Ze3e/jj<dhdi Ze3e/jj<djdk Ze3e/jj<dldm Ze3e/jje/jjge' dddndodpZ e3e/jge' ddrdsZe3e/jj<e/jj<gddtduZe3e/jj<dvdw Ze3e/jj<e/jj=ge' ddxdyZe3e	j.j/jdzd{ Ze3e	j.j/j	d|d} Z	e3e/j
e' ddddd~ddZdd Ze3e/jdddZe3e/jdddZe3e/je' dddddZe3e/je' eieedddZe3e/jj<dd Ze3e/jedddZdd Zdd Zee/j ee/j ee/j ee/j ee/j ee/j  ee/j! ee/j" ee/j# ee/j$ ee/j% ee/j& ee/j' ee/j( ee/j) ee/j* ee/j+ ee/j, ee/j- ee/j. ee/j/ d dl(Z	d dl0Z	d dl1Z	dd Z2e2  dS (      N)Enum)ListOptionalSequenceTupleUnion)SymBoolSymFloatTensor)_add_op_to_registry_convert_out_paramsglobal_decomposition_table
meta_table)
OpOverload)_prim_elementwise_meta$ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND)corresponding_complex_dtypecorresponding_real_dtypeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDIntLikemake_contiguous_strides_forNumber
TensorLike)_maybe_convert_to_dtype_maybe_resize_out_resize_output_check_safe_copy_outout_wrapper)_broadcast_shapes_maybe_broadcast)_pytreeatenZIMPLMetac                    s    fdd}|S )Nc                    s$   t    fdd}t|  S )Nc                    s   t t|   d S N)r   r   opfn K/var/www/html/venv/lib/python3.8/site-packages/torch/_meta_registrations.pyregister0   s    z0register_meta.<locals>.wrapper.<locals>.register)r   pytreeZ	tree_map_)r(   r+   r%   r'   r*   wrapper-   s    zregister_meta.<locals>.wrapperr)   )r&   r-   r)   r%   r*   register_meta,   s    	r.   type_promotionc                    s>   t j|d| i\}  fdd|D }t| }t|dtjiS )Ntype_promotion_kindc                    s   g | ]}t | qS r)   )r   .0xresult_dtyper)   r*   
<listcomp>B   s     z$elementwise_meta.<locals>.<listcomp>r0   )utilsr   r    r   r   DEFAULT)r0   args_r)   r5   r*   elementwise_meta9   s    
r<   c                 C   s(   t jt jt jt jt jt ji}|| | S r$   )torchZ	complex32halfcfloatfloatcdoubledoubleget)dtypeZfrom_complexr)   r)   r*   toRealValueTypeM   s       rE   c                    s0   t tf|  t k fdd d S )Nc                      s   d d  S )Nzoutput with shape z# doesn't match the broadcast shape r)   r)   Zbroadcasted_shape
self_shaper)   r*   <lambda>Z       z)check_inplace_broadcast.<locals>.<lambda>)tupler   r=   _check)rG   Z
args_shaper)   rF   r*   check_inplace_broadcastV   s
    rL   Fc	           	         s  t tjr$t dkdd  t tjrHt dkdd  tdd fD rtt  d kr| qtt	 fdd npt t tj
sttt tfdd t tsttdkd	d  tjf|d
||dS )Nr   c                   S   s   dS Nz:linspace only supports 0-dimensional start and end tensorsr)   r)   r)   r)   r*   rH   n   rI   z(meta_linspace_logspace.<locals>.<lambda>c                   S   s   dS rM   r)   r)   r)   r)   r*   rH   s   rI   c                 s   s   | ]}t |tV  qd S r$   )
isinstancecomplex)r3   argr)   r)   r*   	<genexpr>v   s     z)meta_linspace_logspace.<locals>.<genexpr>c                      s   d  d S )Nzlinspace(): inferred dtype z& can't be safely cast to passed dtype r)   r)   )default_complex_dtyperD   r)   r*   rH      rI   c                      s*   dt j dt  j dt j dS )Nz4received an invalid combination of arguments - got (, ))type__name__r)   )endstartstepsr)   r*   rH      s   c                   S   s   dS )Nz$number of steps must be non-negativer)   r)   r)   r)   r*   rH      rI   meta)rD   layoutdevice
pin_memoryrequires_grad)rN   r=   r
   rK   dimanyr8   r   Zget_default_dtypeis_complex_dtyperD   AssertionError_check_typer   empty)	rX   rW   rY   baserD   r\   r[   r]   r^   r)   )rR   rD   rW   rX   rY   r*   meta_linspace_logspace^   sH    

rf   c                    sN   t  jt jk fdd t |  dko6  dk dd  |  jS )Nc                      s   d j  S )Nz2take(): Expected a long tensor for index, but got rD   r)   indexr)   r*   rH      rI   zmeta_take.<locals>.<lambda>r   c                   S   s   dS )Nz*take(): tried to take from an empty tensorr)   r)   r)   r)   r*   rH      rI   )r=   rK   rD   long_check_indexnumel	new_emptyshape)selfri   r)   rh   r*   	meta_take   s    

rp   r_   c                   sh   j }j }t||kdd  t dko> dk fdd tjj}|S )Nc                   S   s   dS )Nz=linalg.cross: inputs must have the same number of dimensions.r)   r)   r)   r)   r*   rH      rI   zlinalg_cross.<locals>.<lambda>   c                      s"   d  d   d   S )Nzlinalg.cross: inputs dimension z must have length 3. Got  and sizer)   r_   otherro   r)   r*   rH      s    )ndimr=   rK   rv   r   rn   rm   )ro   rx   r_   Zx_dZy_d	out_shaper)   rw   r*   linalg_cross   s    r{   c                 C   s$   t | d t| d tj| tjdS )Nzlinalg.matrix_expmemory_format)squareCheckInputscheckFloatingOrComplexr=   
empty_likecontiguous_formatro   r)   r)   r*   linalg_matrix_exp   s    

r   valuesindicesc                 C   sV   t j| j| j| jd}t j| j| jt jd}|  dkrN| jdkrNt|| j ||fS )Nr\   rD   r   )	r=   rd   rn   r\   rD   int64rl   ry   maybe_wrap_dim)ro   r_   r   r   r)   r)   r*   	cummaxmin   s
    r   c                 C   s   t || j t|  S r$   )r   ry   r=   r   
contiguous)ro   r_   r)   r)   r*   logcumsumexp   s    r   c                    s  |j }t|}|| }tt|}dd t|D }	|D ]}
d|	|
< q8g g  }}|D ]"}
|	|
 sl||
 qT||
 qT|| }t|}|  |d | }|j fdddd |||d   }||}dgt|j|d   }|	|}|
d}||d< |}tt|D ]}|||  ||d	 < q| 	|} d
d t|D }d	}|d	 }|dkr|| d ||| < ||||  9 }|d	8 }qTt||D ] }| d	||  ||| < q| |||  S )Nc                 S   s   g | ]}d qS )Fr)   r3   r;   r)   r)   r*   r7      s     z_exec_fft.<locals>.<listcomp>Tc                    s    |  S r$   r)   r4   Zself_stridesr)   r*   rH      rI   z_exec_fft.<locals>.<lambda>keyreverserq   r      c                 S   s   g | ]}d qS r   r)   r   r)   r)   r*   r7     s     )ry   lenlistrangeappendstridesortpermutern   Zreshaperv   
as_stridedstorage_offset)outro   	out_sizesr_   forwardry   Zsignal_ndim
batch_dimsZdim_permuteZis_transformed_dimdleftrightZ	batch_endtmpinputZbatched_sizes
batch_sizeZbatched_out_sizesiZout_stridesZbatch_numelr)   r   r*   	_exec_fft   sJ    






r   c                    sb   | j jst| j}| |}|s$|S |d d  }|   |j fdddd t|| |||}|S )Nc                    s    |  S r$   r)   r   r   r)   r*   rH     rI   zmeta_fft_c2c.<locals>.<lambda>Tr   )rD   
is_complexrb   rn   rm   r   r   r   )ro   r_   normalizationr   r   outputZsorted_dimsr)   r   r*   meta_fft_c2c  s    
r   c                 C   sR   | j jstt|  }|r<|d }|| d d }|||< | j|t| j dS )Nrq      r   rg   )rD   is_floating_pointrb   r   rv   rm   r8   r   )ro   r_   r   Zonesidedoutput_sizesZlast_dimZlast_dim_halfsizer)   r)   r*   meta_fft_r2c#  s     
r   )	generatorc                C   s   t |t| gS r$   )r   r=   Size)nr   r   r)   r)   r*   meta_randperm3  s    r   rD   r[   r\   r]   c                C   s   t j| ||||dS Nr   r=   rd   )r   rD   r[   r\   r]   r)   r)   r*   meta_randperm_default8  s        r   c                C   s   t j|||||dS r   r   )highrv   rD   r[   r\   r]   r)   r)   r*   meta_randintA  s        r   c                C   s   t j|||||dS r   r   )lowr   rv   rD   r[   r\   r]   r)   r)   r*   meta_randint_lowK  s        r   c                C   s   t j| ||||dS r   r   )rv   rD   r[   r\   r]   r)   r)   r*   meta_rand_default\  s        r   c                 C   s8   | j jstt|  }|||d < | j|t| j dS )Nrq   rg   )rD   r   rb   r   rv   rm   rE   )ro   r_   r   Zlastdimr   r)   r)   r*   meta_fft_c2rd  s    r   c                 C   sR   t | dkrtdt|trN|| |}|  | krNtj	||   | S )Nr   zQmore than one element of the written-to tensor refers to a single memory location)
r=   Z_debug_has_internal_overlapRuntimeErrorrN   r
   torv   r"   Zexpand_copydefault)ro   srcZnon_blockingZintermediater)   r)   r*   
meta_copy_m  s    
r   c                 C   sX   t |  }t |  }||  kr(dn|| ||  }||d ||| ||fS Nr   )r   rv   r   r_   insert)tensorr_   Zresult_sizesZresult_strides
new_strider)   r)   r*   inferUnsqueezeGeometry  s     r   c                 C   s0   t ||  d }t| |\}}| || | S r   )r   r_   r   as_strided_)ro   r_   Zg_sizesZ	g_stridesr)   r)   r*   meta_unsqueeze_  s    r   )r   weight_metabias_activation_opt	out_dtypec           	      C   s   t | j}|d k	r.|d|dks.td|d| dd ksJt|d|d< t| jdksntdd| df}|d k	r| jtjkr|tjkstd| j	||d kr| jn|d
||}|S )	Nr   zoutput size mismatchr   rq   r   z*we can only handle the squashed input case9out_dtype is only supported for i8i8->i32 linear operatorrg   )r   rn   rv   rb   r   rD   r=   int8int32rm   r   )	r   r   r   r   r   r   r   Ztransposed_stridesr   r)   r)   r*   meta_sparse_structured_linear  s,    	

 r   )mat1	mat1_metamat2r   c                 C   s   t | jdkstt |jdks$tt |jdks6t| d|dd ksRt| d|dg}|d k	r|jtjkr|tjkstd|j||d kr|jn|d}|S )Nr   r   r   r   rg   	r   rn   rb   rv   rD   r=   r   r   rm   )r   r   r   r   r   r   r)   r)   r*   meta_sparse_structured_mm  s"    
r   r   )alphabetar   )r   r   r   r   r   c          	      C   s   t | jdkstdt |jdks(tt |jdks:tt |jdksLt| d|dkshtd|d|dd kst|d|dg}|d k	r|jtjkr|tjkstd|j||d kr|jn|d}|S )Nr   zEonly input broadcasted to columns of mat1 * mat2 product is supportedr   r   r   rg   r   )	r   r   r   r   r   r   r   r   r   r)   r)   r*   meta_sparse_structured_addmm  s4    
r   )compressed_Adense_Br   r   r   transpose_resultc                 C   s   |j tjtjtjtjhks"td| j |j ks6tdt|jdksLtd| j tjk}|r`dnd}|	d}|	d}	| 
 d	 ||  }
|d k	r|
|	dkst|d k	r|r|tjtjtjhkstd
|r|	|
fn|
|	f}|j||d}|S )Nz2_cslt_sparse_mm only supports fp16, bf16, and int8zinputs must have the same dtyper   z'_cslt_sparse_mm only supports 2d inputs
   	   r   r      z?out_dtype is only supported for i8i8->fp16, bf16, or i32 matmulrg   )rD   r=   float32float16bfloat16r   rb   r   rn   rv   rl   r   rm   )r   r   r   r   r   r   Zis_int8_input_typeZcompression_factorkr   moutput_shaperesultr)   r)   r*   meta__cslt_sparse_mm  s8    	

r   T)include_self)ro   r_   ri   sourcereducer   returnc                C   s   t j| t jdS Nr|   )r=   r   r   ro   r_   ri   r   r   r   r)   r)   r*   meta_index_reduce  s    
r   c                C   s   | S r$   r)   r   r)   r)   r*   meta_index_reduce_!  s    
r   c                 C   s.   t |  }|  dkr$| ||< | |S )Nr   )r   rv   r_   rl   rm   )ro   r_   ri   result_sizer)   r)   r*   meta_index_select/  s    r   )lengthsr   offsetsaxisunsafeinitial)datar   r   r   r   r   r   r   c          
         sj   |d k	rt d fdd}|d k	r0||jS |d k	r^|jd d |jd d f }	||	S tdd S )Nz?segment_reduce(): indices based reduction is not supported yet.c                    s(   t j| j d d   jdt jdS )Nr   rZ   rD   r\   r}   )r=   rd   rn   rD   r   )lengths_shaper   r   r)   r*   segment_reduce_lengths_tensorI  s    z:meta_segment_reduce.<locals>.segment_reduce_lengths_tensorrq   r   z<segment_reduce(): Either lengths or offsets must be defined.)NotImplementedErrorrn   r   )
r   r   r   r   r   r   r   r   r   r   r)   r   r*   meta_segment_reduce8  s    
r   c                 C   s
   |  dS Nr)   rm   r   r)   r)   r*   meta_max\  s    r  c                 C   s6   t | j|f}t| ||}| || j|tjdfS Nrg   r8   reduction_dimsrn   _compute_reduction_shaperm   r=   rj   ro   r_   keepdimr   r)   r)   r*   meta_max_dimb  s
    r	  c                 C   s
   |  dS r   r  r   r)   r)   r*   meta_minl  s    r
  c                 C   s6   t | j|f}t| ||}| || j|tjdfS r  r  r  r)   r)   r*   meta_min_dimr  s
    r  c                 C   s4   |   rt| j}nt| tjd\}}tj| |dS )Nr1   rg   )r   r   rD   r   r   INT_TO_FLOATr=   r   )ro   r6   r;   r)   r)   r*   
meta_angle|  s    
r  c                 C   s$   t ||  | j |t | S r$   )r=   Z_resize_output_rv   r\   copy_angle)ro   r   r)   r)   r*   meta_angle_out  s    r  c                 C   s   d S r$   r)   )valr)   r)   r*   assert_async  s    r  c                 C   s   d S r$   r)   )r  
assert_msgr)   r)   r*   assert_async_meta  s    r  c                 C   s   d S r$   r)   )sr)   r)   r*   
print_meta  s    r  rD   r[   r\   r]   r}   c                 C   s   t jdddS )Nr   rZ   r\   r   r  r)   r)   r*   make_dep_token  s    	r  c                 C   s4   ddl m} t| ttfr"td|| ||d d S )Nr   )constrain_range'Constraining SymFloat or Symbool is nyiminmax)%torch.fx.experimental.symbolic_shapesr  rN   r	   r   
ValueError)rv   r  r  r  r)   r)   r*   sym_constrain_range  s    r"  c                 C   s   t j| ||d |S Nr  )r"   r"  rv   r  r  	dep_tokenr)   r)   r*   functional_sym_constrain_range  s    r&  c                 C   s4   ddl m} t| ttfr"td|| ||d d S )Nr   )_constrain_range_for_sizer  r  )r   r'  rN   r	   r   r!  )rv   r  r  r'  r)   r)   r*   sym_constrain_range_for_size  s    r(  c                 C   s   t j| ||d |S r#  )r"   r(  r$  r)   r)   r*   'functional_sym_constrain_range_for_size  s    r)  c                 C   s   |S r$   r)   )r  r  r%  r)   r)   r*   functional_assert_async_meta  s    r*  ro   f_namec                 C   sX   |   dkst| d| d| dksTt| d| d d| d dd S )Nr   z3: The input tensor must have at least 2 dimensions.rq   z5: A must be batches of square matrices, but they are  by 	 matrices)r_   rb   rv   r+  r)   r)   r*   r~     s    
 r~   ro   Anamec                    s   t j jk fdd t j jk fdd t  d dk fdd t  ddk fdd d S )Nc                      s   dj  d j  dS )Nz:Expected b and A to be on the same device, but found b on z
 and A on 	 instead.r  r)   r1  ro   r)   r*   rH     s    z(linearSolveCheckInputs.<locals>.<lambda>c                      s   dj  d j  dS )Nz=Expected b and A to have the same dtype, but found b of type z and A of type r3  rg   r)   r4  r)   r*   rH     s    rq   r-  c                      s   d  d d  d dS )Nz3A must be batches of square matrices, but they are r-  r.  rq   r/  ru   r)   r1  r)   r*   rH     s    c                      s:   d d  d d  d d d d d 
S )NzIncompatible matrix sizes for z: each A matrix is rq   r.  z but each b matrix is r-  ru   r)   r1  r2  ro   r)   r*   rH     s    )r=   rK   r\   rD   rv   r0  r)   r6  r*   linearSolveCheckInputs  s     


r7  tr,  allow_low_precision_dtypesc                    sZ   | j  t|  p|   fdd |sVt tjtjtjtjfk fdd d S )Nc                      s    d  S )Nz<: Expected a floating point or complex tensor as input. Got r)   r)   rD   r,  r)   r*   rH   	  rI   z(checkFloatingOrComplex.<locals>.<lambda>c                      s    d  S )Nz*: Low precision dtypes not supported. Got r)   r)   r;  r)   r*   rH     rI   )	rD   r=   rK   r   r   r@   rB   r?   rA   r8  r)   r;  r*   r     s    r   r1  r1  r,  arg_namec                    s"   t |  dk fdd d S )Nr   c                      s    d  dS )Nz: The input tensor z! must have at least 2 dimensions.r)   r)   r=  r,  r)   r*   rH     rI   zcheckIsMatrix.<locals>.<lambda>)r=   rK   r_   r<  r)   r>  r*   checkIsMatrix  s    
r?  r1  Br   r,  c                    sZ   t   t tr0 ddkn ddk fdd d S )Nr-  rq   c                      sH    drdnd d  d d  d d d d d d	S )
Nz2: Incompatible shapes of A and B for the equation zAX = BzXA = Bz (r-  r4   rq   rt   rT   ru   r)   r1  rA  r,  r   r)   r*   rH   $  s    z#checkInputsSolver.<locals>.<lambda>)r~   r?  r=   rK   rv   r@  r)   rB  r*   checkInputsSolver  s    

*rC  r   fn_namer   r   result_namec                    s&   t jjk fdd d S )Nc                	      s$     d d dj  dj  	S )Nz: Expected z5 and input tensors to be on the same device, but got z on z and input on r  r)   rE  r   r   rF  r)   r*   rH   1  s    z!checkSameDevice.<locals>.<lambda>)r=   rK   r\   rD  r)   rG  r*   checkSameDevice,  s    
rH  UPLOc                    s8      }tt dko&|dkp&|dk fdd d S )Nr   ULc                      s
   d  S )Nz1Expected UPLO argument to be 'L' or 'U', but got r)   r)   rI  r)   r*   rH   <  rI   zcheckUplo.<locals>.<lambda>)upperr=   rK   r   )rJ  ZUPLO_uppercaser)   rI  r*   	checkUplo8  s
    
rN  eigenvaluesZeigenvectorsrL  )r1  rJ  	compute_vc                 C   sp   t | d t| t| j}|r@| |}||t|dd n| dg}|  | j|t| j	d}||fS )Nzlinalg.eighFZ	row_majorr   rg   )
r~   rN  r   rn   rm   r   r   poprE   rD   )r1  rJ  rP  rn   Zvecsvalsr)   r)   r*   meta__linalg_eigh@  s    


rT  )r   r   c                 C   s@   t | d t| jr| jn
t| j}| j| jd d |dS )Nzlinalg.eigvalsrq   rg   r~   r8   ra   rD   r   rm   rn   )r   complex_dtyper)   r)   r*   meta__linalg_eigvalsW  s    


rW  r   c                 C   sX   t | d t| jr| jn
t| j}| j| jd d |d}| j| j|d}||fS )Nz
linalg.eigrq   rg   rU  )r   rV  r   Zvectorsr)   r)   r*   meta_linalg_eigc  s    


rY  )r   r   c                 C   s   | j jtjdddS )Nr|   r-  rq   )ZmTcloner=   r   	transpose)r   r)   r)   r*   cloneBatchedColumnMajorq  s    r\  )ro   r1  rM  r   c                 C   s   t | S r$   )r\  )ro   r1  rM  r)   r)   r*   _cholesky_solve_helperu  s    r]  c                    sP   t jdkfdd t  jdk fdd t d\}}t|||S )Nr   c                      s   d j  dS )Nz-b should have at least 2 dimensions, but has  dimensions insteadry   r)   r   r)   r*   rH     rI   z cholesky_solve.<locals>.<lambda>c                      s   d j  dS )Nz-u should have at least 2 dimensions, but has r^  r_  r)   r5  r)   r*   rH     rI   cholesky_solve)r=   rK   ry   !_linalg_broadcast_batch_dims_namer]  )ro   r1  rM  Zself_broadcastedZA_broadcastedr)   r4  r*   r`  {  s    

  r`  )ro   rM  r   c                 C   s.   |   dkrtj| tjdS t| d t| S )Nr   r|   cholesky)rl   r=   r   legacy_contiguous_formatr~   r\  ro   rM  r)   r)   r*   rb    s    
rb  c                 C   s   t | d t| S )Ncholesky_inverse)r~   r\  rd  r)   r)   r*   re    s    
re  )r1  rM  check_errorsc                 C   sf   t | d t| d | j}t|}t|d}| |}||| | j|d|d  tjd}||fS )Nzlinalg.choleskyFr   r   rg   )	r~   r   rn   r   r   rm   r   r=   r   )r1  rM  rf  ZA_shapery   Z	L_stridesrL  infosr)   r)   r*   linalg_cholesky_ex  s    



rh  )r   taur   c                    s  t jdkdd  t ddkdd  t ddkdd  t jj dkfd	d jdkrjd d }jd d  t  |k fd
d t jjkfdd tdd t jjtjddjj	dS )Nr   c                   S   s   dS )NzHtorch.linalg.householder_product: input must have at least 2 dimensions.r)   r)   r)   r)   r*   rH     rI   z,linalg_householder_product.<locals>.<lambda>r-  rq   c                   S   s   dS )Nzbtorch.linalg.householder_product: input.shape[-2] must be greater than or equal to input.shape[-1]r)   r)   r)   r)   r*   rH     rI   c                   S   s   dS )Nz`torch.linalg.householder_product: input.shape[-1] must be greater than or equal to tau.shape[-1]r)   r)   r)   r)   r*   rH     rI   r   c                      s   dj  d j  S )Nzptorch.linalg.householder_product: Expected tau to have one dimension less than input, but got tau.ndim equal to  and input.ndim is equal to r_  r)   r   ri  r)   r*   rH     s    c                      s
   d  S )Nzltorch.linalg.householder_product: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r)   r)   actual_batch_tau_shaper)   r*   rH     s    c                      s   dj  d j  S )Nz,torch.linalg.householder_product: tau dtype z does not match input dtype rg   r)   rk  r)   r*   rH     s    z torch.linalg.householder_productri  FrQ  rv   r   rD   r\   )
r=   rK   ry   rv   rn   rD   rH  empty_stridedr   r\   )r   ri  Zexpected_batch_tau_shaper)   )rm  r   ri  r*   linalg_householder_product  sD    


rp  )r1  rf  c                 C   s^   t | d t| ddd | | j}|| jt| jdd | j| jd d tjd}||fS )Nzlinalg.inv_exF)r:  rQ  r-  rg   r~   r   rm   rn   r   r   r=   r   )r1  rf  rL  rg  r)   r)   r*   linalg_inv_ex_meta  s    
rr  LDpivotsinfo)	hermitianrf  )ro   rv  rf  r   c                C   st   t | d t| d tj| jt| jdd| j| jd}| j| jd d tj	d}| j| jd d tj	d}|||fS )Nztorch.linalg.ldl_factor_exFrQ  rn  rq   rg   r-  )
r~   r   r=   ro  rn   r   rD   r\   rm   int)ro   rv  rf  rs  rt  ru  r)   r)   r*   linalg_ldl_factor_ex_meta  s    

rx  )rv  )rs  rt  rA  rv  r   c                   s   t d td t d t jdk fdd jd d }t|jkfdd ttj	fdd tj	 j	k fdd t
 \}}tj|t|d	d
 j	 jdS )Nztorch.linalg.ldl_solver   c                      s   d j  dS )NzMtorch.linalg.ldl_solve: Expected B to have at least 2 dimensions, but it has r^  r_  r)   )rA  r)   r*   rH     s    z'linalg_ldl_solve_meta.<locals>.<lambda>rq   c                      s   d j  dS )Nzjtorch.linalg.ldl_solve: Expected LD.shape[:-1] and pivots.shape to be the same, but got pivots with shape  insteadrn   r)   rt  r)   r*   rH     s    c                      s   d j  S )Nz<torch.linalg.ldl_solve: Expected pivots to be integers. Got rg   r)   r{  r)   r*   rH     rI   c                      s   dj  d j  S )Nz!torch.linalg.ldl_solve: LD dtype z does not match b dtype rg   r)   )rA  rs  r)   r*   rH   #  rI   FrQ  rn  )r~   r   r7  r=   rK   ry   rn   r8   is_integer_dtyperD   _linalg_broadcast_batch_dimsro  r   r\   )rs  rt  rA  rv  Zexpected_pivots_shapeB_broadcast_sizer;   r)   )rA  rs  rt  r*   linalg_ldl_solve_meta  s6    







r  PrK  )pivot)r1  r  r   c          	         s   t  jdk fdd t j}|d }|d }t||}||d< |rV |}n dg}||d<  |}||d< ||d<  |}|||fS )Nr   c                      s   d j  dS )Nz@linalg.lu: Expected tensor with 2 or more dimensions. Got size: ry  rz  r)   r5  r)   r*   rH   3  rI   z linalg_lu_meta.<locals>.<lambda>r-  rq   r   )r=   rK   ry   r   rn   r  rm   )	r1  r  sizesr   r   r   r  rL  rK  r)   r5  r*   linalg_lu_meta.  s$    




r  LU)r  rf  )r1  r  rf  r   c          	         s   t  jdk fdd t j}|d }|d }t j|t|dd j jd}|	  t
|||d<  j|t jd	}|	   j|t jd	}|||fS )
Nr   c                      s   d j  dS )NzFtorch.lu_factor: Expected tensor with 2 or more dimensions. Got size: ry  rz  r)   r5  r)   r*   rH   Q  rI   z*linalg_lu_factor_ex_meta.<locals>.<lambda>r-  rq   FrQ  rn  rg   )r=   rK   ry   r   rn   ro  r   rD   r\   rR  r  rm   rw  )	r1  r  rf  r  r   r   r  rt  ru  r)   r5  r*   linalg_lu_factor_ex_metaJ  s&    


r  )r   adjoint)r  rt  rA  r   r  r   c                   s   t d tj jk fdd tjtjkdd  td t |d tddkdd  tjd d jkfdd t	 \}}tj
|t|| d	 j jd
}| dkr|s| r| }|S )Nztorch.linalg.lu_solvec                      s   dj  d j  dS )NzPlinalg.lu_solve: Expected LU and B to have the same dtype, but found LU of type  and B of type ry  rg   r)   )rA  r  r)   r*   rH   y  s    z&linalg_lu_solve_meta.<locals>.<lambda>c                   S   s   dS )NzElinalg.lu_solve: pivots should be a Tensor of scalar type torch.int32r)   r)   r)   r)   r*   rH     rI   zlinalg.lu_solverq   c                   S   s   dS )NzYlinalg.lu_solve: Number of pivots per batch should be same as the dimension of the matrixr)   r)   r)   r)   r*   rH     rI   c                      s   d j  dS )Nzclinalg.lu_solve: Expected LU.shape[:-1] and pivots.shape to be the same, but got pivots with shape ry  rz  r)   r{  r)   r*   rH     s    rQ  rn  r   )r   r=   rK   rD   rw  r~   rC  rv   rn   r}  ro  r   r\   rl   r   Zconj)r  rt  rA  r   r  r~  r;   r   r)   )rA  r  rt  r*   linalg_lu_solve_metak  s<    




r  )r  rt  unpack_dataunpack_pivotsr   c                    s   t  jdk fdd |r6t |jt jkdd  t j}|d }|d }t||}||d< |rr |}n dg}|r||d<  |}	||d< ||d<  |}
n dg}	 dg}
||	|
fS )Nr   c                      s   d j  dS )NzFtorch.lu_unpack: Expected tensor with 2 or more dimensions. Got size: ry  rz  r)   r  r)   r*   rH     rI   z lu_unpack_meta.<locals>.<lambda>c                   S   s   dS )Nztorch.lu_unpack: LU_pivots is expected to be a contiguous tensor of torch.int32 dtype.
Note: this function is intended to be used with the output produced by torch.linalg.lu_factorr)   r)   r)   r)   r*   rH     s    r-  rq   r   )	r=   rK   ry   rD   r   r   rn   r  rm   )r  rt  r  r  r  r   r   r   r  rL  rK  r)   r  r*   lu_unpack_meta  s4    




r  )moder   c                    sR    dkrd}d}n8 dkr$d}d}n& dkr6d}d}nt d fdd ||fS )NreducedTZcompleteFrc                      s   d  dS )Nzqr received unrecognized mode 'z=' but expected one of 'reduced' (default), 'r', or 'complete'r)   r)   r  r)   r*   rH     s    z _parse_qr_mode.<locals>.<lambda>r=   rK   )r  	compute_qr  r)   r  r*   _parse_qr_mode  s    
r  QRr  )r1  r  r   c                 C   s   t | d t| d t|\}}| jd }| jd }t||}|r|t| j}|rT|n||d< | |}||t|dd n| dg}t| j}	|s|s|n||	d< | |	}
|
|	t|	dd ||
fS )Nz	linalg.qrr-  rq   FrQ  r   )	r?  r   r  rn   r  r   rm   r   r   )r1  r  r  Zreduced_moder   r   r   ZQ_shaper  ZR_shaper  r)   r)   r*   linalg_qr_meta  s"    








r  sign	logabsdet)r1  r   c                 C   s   t | d t| dd | j}| |d d }| j|d d t| jd}tj|t|d| j| j	d}| j|d d tj
d}||||fS )Nzlinalg.slogdetFr-  rg   rn  rq   )r~   r   rn   rm   rE   rD   r=   ro  r   r\   r   )r1  rn   r  r  r  rt  r)   r)   r*   _linalg_slogdet  s    
r  )r1  full_matrices
compute_uvdriverc                 C   s   t | d t| d t| jd d }| jd }| jd }t||}|r|||rT|n|g }| |}	|	|t|dd ||r|n||g }
| |
}t| dk}||
t|
|d n| dg}	| dg}| j||g t	| j
d}|	||fS )	Nz
linalg.svdr-  rq   FrQ  cudar   rg   )r?  r   r   rn   r  rm   r   r   device_hintrE   rD   )r1  r  r  r  r   r   r   r   ZU_shaperK  ZV_shapeVZis_cudaSr)   r)   r*   _linalg_svd_meta  s$    






r  )arg1arg2r   c                 C   sn   | j d d }|j d d }t||}t|}|| d| dg7 }t|}||d|dg7 }||fS )Nr-  rq   )rn   r   r   rv   )r  r  Zarg1_batch_sizesZarg2_batch_sizesexpand_batch_portionarg1_expand_sizearg2_expand_sizer)   r)   r*   r}  <  s    
r}  )r  r  r2  r   c                 C   sV   |rt | || t| |\}}|| jkr,| n| |}||jkrD|n||}||fS r$   )r7  r}  rn   expand)r  r  r2  r  r  Zarg1_broadcastedZarg2_broadcastedr)   r)   r*   ra  L  s    ra  )r   rx   r   c                 C   s6   | j d d }|jdkp0| jd |jko0|j |k}|S )Nrq   r   )rn   ry   )r   rx   Zexpected_batched_rhs_shapevector_caser)   r)   r*   linalg_solve_is_vector_rhs^  s
    
r  )r   rf  r   r  rt  ru  )	r1  rA  r   rf  r   r  rt  ru  r   c                   sr  t  d t jjk fdd t }|r@dn}	t |	|d t|	 \}
}t|pl| dd  |r|
d d n|
}tj|t	|| jj
d} j} j}tj|t	|d j j
d} j|d d tjd} j|d d	 tjd}||||f}||||f}td
d |D rnt||D ]6\}}t||j ||j|  t||dd q6|S )Nzlinalg.solvec                      s   d j  dj  dS )NzKlinalg.solve: Expected A and B to have the same dtype, but found A of type r  ry  rg   r)   r1  rA  r)   r*   rH   u  s    z"_linalg_solve_ex.<locals>.<lambda>rq   c                   S   s   dS )Nzlinalg.solve: Vector broadcasting of the left hand side is not supported for left=False. In this case linalg.solve is equivalent to B / A.squeeze(-1)r)   r)   r)   r)   r*   rH     s    rn  Frg   r-  c                 s   s   | ]}|d k	V  qd S r$   r)   r2   r)   r)   r*   rQ     s     z#_linalg_solve_ex.<locals>.<genexpr>)	copy_fromcopy_toZexact_dtype)r   r=   rK   rD   r  	unsqueezerC  r}  ro  r   r\   rn   ry   rm   r   allzipr   r   r   r   )r1  rA  r   rf  r   r  rt  ru  r  B_ZB_broad_shaper;   Zresult_shapeZresult_rn   ry   ZLU_Zpivots_Zinfo_r   resr  or)   r  r*   _linalg_solve_exf  sL    



r  )r   unitriangularr   )r1  rA  rM  r   r  r   r   c          	      C   s   |d kr|  dg}t|ts"tt| ||d t|| d \}}|dd oV| }|rjt	||j
}n,t||j
r||ddj
 |dd |S )Nr   zlinalg.solve_triangularr-  rq   )rm   rN   r   rb   rC  ra  r[  is_contiguousZis_conjr   rn   r   Zresize_
transpose_)	r1  rA  rM  r   r  r   r  ZA_Zavoid_copy_Ar)   r)   r*   linalg_solve_triangular_meta  s    
r  solutioncloned_coefficient)ro   r1  rM  r[  r  r   c           	         s   t jdkfdd t  jdk fdd t d  jt jkrt \}}t j|t|ddj	j
d}t j|t|dd j	 j
d}n@ jt jks jt jkrt }d	g}nt dd
d  ||fS )Nr   c                      s   d j  dS )NzMtorch.triangular_solve: Expected b to have at least 2 dimensions, but it has r^  r_  r)   r   r)   r*   rH     s    z'triangular_solve_meta.<locals>.<lambda>c                      s   d j  dS )NzMtorch.triangular_solve: Expected A to have at least 2 dimensions, but it has r^  r_  r)   r5  r)   r*   rH     s    triangular_solveFrQ  rn  r   c                   S   s   dS )Nz+triangular_solve: Got an unexpected layout.r)   r)   r)   r)   r*   rH     rI   )r=   rK   ry   r7  r[   stridedr}  ro  r   rD   r\   
sparse_csr
sparse_bsrr   rm   )	ro   r1  rM  r[  r  Zself_broadcast_sizeZA_broadcast_sizer  r  r)   r4  r*   triangular_solve_meta  s8    	




r  c                 C   sp   t | d t| d | | jd d }| | j}|| jt| jdd | j| jd d tjd}|||fS )Nz
linalg.detr-  FrQ  rq   rg   rq  )r1  Zdetr  rt  r)   r)   r*   _linalg_det_meta  s    

r  )r   ri  rx   r   r[  r   c                    s  t jdkdd  t jdkdd  |r4dndt j jd kfdd t j jd kfdd t jd jd kd	d  t jj d
kfdd t jjkfdd jdkrFjd d }jd d t |kfdd jd d  t  |k fdd t jjkfdd t jjkfdd tdd tdd t jjtjddjjdS )Nr   c                   S   s   dS )Nz3torch.ormqr: input must have at least 2 dimensions.r)   r)   r)   r)   r*   rH     rI   zormqr.<locals>.<lambda>c                   S   s   dS )Nz3torch.ormqr: other must have at least 2 dimensions.r)   r)   r)   r)   r*   rH     rI   r-  rq   c                      s   d  dS )Ntorch.ormqr: other.shape[z0] must be greater than or equal to tau.shape[-1]r)   r)   left_size_conditionr)   r*   rH     rI   c                      s   d  dS )Nr  z"] must be equal to input.shape[-2]r)   r)   r  r)   r*   rH     rI   c                   S   s   dS )NzHtorch.ormqr: tau.shape[-1] must be less than or equal to input.shape[-1]r)   r)   r)   r)   r*   rH     rI   r   c                      s   dj  d j  S )Nz[torch.ormqr: Expected tau to have one dimension less than input, but got tau.ndim equal to rj  r_  r)   rk  r)   r*   rH     s    c                      s   dj  d j  S )Nzhtorch.ormqr: Expected other to have the same number of dimensions as input, but got other.ndim equal to rj  r_  r)   r   rx   r)   r*   rH   #  s    c                      s
   d  S )NzWtorch.ormqr: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r)   r)   rl  r)   r*   rH   .  s    c                      s
   d  S )NzYtorch.ormqr: Expected batch dimensions of other to be equal to input.shape[:-2], but got r)   r)   )actual_batch_other_shaper)   r*   rH   7  s    c                      s   d j  dj  S )NzPtorch.ormqr: Expected input and tau to have the same dtype, but input has dtype z and tau has dtype rg   r)   rk  r)   r*   rH   ?  s    c                      s   d j  dj  S )NzRtorch.ormqr: Expected input and other to have the same dtype, but input has dtype z and other has dtype rg   r)   r  r)   r*   rH   F  s    ztorch.ormqrri  rx   FrQ  rn  )	r=   rK   ry   rn   rD   rH  ro  r   r\   )r   ri  rx   r   r[  Zexpected_batch_shaper)   )r  rm  r   r  rx   ri  r*   ormqr  sr    	  






r  c                   s   t td  k fdd j}| d k}|}| }|rftd|D ]}|o`|dk}qLn"td|D ]}|o|dk}qpt |p| fdd d S )Nr   c                      s   dd   dt  S )Nzpadding size is expected to be r   z, but got: r   r)   )r_   paddingr)   r*   rH   Z  rI   z,_padding_check_valid_input.<locals>.<lambda>r   r   c                      s    d d  d d  dj  S )Nz	Expected r   zD or r   zcD (batch mode) tensor with possibly 0 batch size and other non-zero dimensions for input, but got: rz  r)   )r_   r   r)   r*   rH   o  s    )r=   rK   r   ry   r   rv   )r   r  r_   Z	input_dimZis_batch_modeZvalid_batch_modeZvalid_non_batch_moder   r)   )r_   r   r  r*   _padding_check_valid_inputW  s"    r  c                   s   d}d d}j dkr0d} d7  |d7 }t|dd |\|}   |rtk o|k  fdd tdkfdd j dkr|fS ||fS d S )	Nr   r   rs   rr   c                      s   d d d  dj  S NzcArgument #4: Padding size should be less than the corresponding input dimension, but got: padding (rS   ) at dimension 
 of input rz  r)   dim_wr   pad_lpad_rr)   r*   rH     s    z_pad1d_common.<locals>.<lambda>c                      s   d  d S )Nz
input (W: z%) is too small. Calculated output W: r)   r)   )input_woutput_wr)   r*   rH     rI   r   )ry   rv   r  r=   rK   rm   )r   r  is_reflection	dim_planenbatchnplaner)   )r  r   r  r  r  r  r*   _pad1d_commonv  s0    




r  c                 C   s   t | |ddS NTr  r  r   r  r)   r)   r*   meta_reflection_pad1d  s    r  c                 C   s   t | |ddS NFr  r  r  r)   r)   r*   meta_replication_pad1d  s    r  c                   s   d |s t t|dkdd  jdkr2 d7  |\ }|  |rzt |k of|k  fdd t  k fdd jS )Nr   r   c                   S   s   dS )Nz padding size is expected to be 2r)   r)   r)   r)   r*   rH     rI   z(_pad1d_backward_common.<locals>.<lambda>rs   c                      s   d d d  dj  S r  rz  r)   r  r)   r*   rH     s    c                      s   d d   S Nz(grad_output width unexpected. Expected: , Got: ru   r)   r  grad_outputr  r)   r*   rH     rI   )r=   rK   r   ry   rv   rm   rn   )r  r   r  r  r  r)   )r  r  r   r  r  r  r*   _pad1d_backward_common  s$    

r  
grad_inputc                 C   s   t | ||ddS r  r  r  r   r  r)   r)   r*   meta_reflection_pad1d_backward  s    r  c                 C   s   t | ||ddS r  r  r  r)   r)   r*   meta_replication_pad1d_backward  s    r  c                   s8  dd d}d}t |dd j}|dkrNd}d7  d7  |d7 }|\	
|} 
   	 |rtk o	k 	fdd t
k ök  
fdd tdkpdkfd	d jd
kr"|fS ||fS d S )Nr   r   r   rr      c                      s   d d d  dj  S r  rz  r)   r  r)   r*   rH     s    z_pad2d_common.<locals>.<lambda>c                      s   d d d  dj  S NzcArgument #6: Padding size should be less than the corresponding input dimension, but got: padding (rS   r  r  rz  r)   dim_hr   pad_bpad_tr)   r*   rH     s    c                      s   d  d d d S )Nz
input (H:  W: z%) is too small. Calculated output H: r)   r)   )input_hr  output_hr  r)   r*   rH     s    rs   r  ry   rv   r=   rK   rm   )r   r  r  Z
dim_slicesr  ry   r  r)   )r  r  r   r  r  r  r  r  r  r  r  r*   _pad2d_common  sB    



r  c                 C   s   t | |ddS r  r  r  r)   r)   r*   meta_reflection_pad2d  s    r  c                 C   s   t | |ddS r  r  r  r)   r)   r*   meta_replication_pad2d  s    r  c                    s   dd d}d}|j }| dkrB|d }d7  d7  |d7 }|\}}}}	|| }
|  }| }|| |	 || | tkfdd t k fdd ||j S )Nr   r   r   r  c                      s   d d   S r  ru   r)   r  r)   r*   rH   3  rI   z%meta_pad2d_backward.<locals>.<lambda>c                      s   d d   S Nz)grad_output height unexpected. Expected: r  ru   r)   r  r  r  r)   r*   rH   7  rI   )rn   r_   r=   rK   rv   rm   )r  ro   r  r  r  rG   r  r  r  r  r  r  r  r)   )r  r  r  r  r  r*   meta_pad2d_backward  s2    
r  c             	      s  ddd d}t |dd jdk}|rVd}d7 d7  d7  |d7 }|\
|}    
   	|r,tk oʈk fdd tk o
k 
fd	d tk ok  fd
d t	dkpJdkpJdk	fdd |r|||	fS |	fS d S )Nrs   r   r   r   rr      c                      s   d d d  dj  S r  rz  r)   r  r)   r*   rH   Y  s    z_pad3d_common.<locals>.<lambda>c                      s   d d d  dj  S r  rz  r)   r  r)   r*   rH   `  s    c                      s   d d d  dj  S )NzcArgument #8: Padding size should be less than the corresponding input dimension, but got: padding (rS   r  r  rz  r)   )dim_dr   pad_bkpad_fr)   r*   rH   g  s    c                      s(   d  d d d d d S )Nz
input (D:  H: r  z%) is too small. Calculated output D: r)   r)   )input_dr  r  output_dr  r  r)   r*   rH   o  s    r  )r   r  r  r  Z
batch_moder  r  r)   )r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r*   _pad3d_common<  sP    





r   c                 C   s   t | |ddS r  r   r  r)   r)   r*   meta_reflection_pad3d{  s    r  c                 C   s   t | |ddS r  r  r  r)   r)   r*   meta_replication_pad3d  s    r  c                    s(  t t|dkdd  |jdks&tj|jks6tddd |jdkrdd7 d7  d7  |\}}}}}}| }	|}
|}|	| | |
| | || | t kfdd t kfd	d t  k fd
d ||jS )N   c                   S   s   dS )Nz padding size is expected to be 6r)   r)   r)   r)   r*   rH     rI   z%meta_pad3d_backward.<locals>.<lambda>rs   r   r   r  c                      s   d d   S r  ru   r)   r  r)   r*   rH     rI   c                      s   d d   S r  ru   r)   r  r)   r*   rH     rI   c                      s   d d   S )Nz(grad_output depth unexpected. Expected: r  ru   r)   )r  r  r  r)   r*   rH     rI   )r=   rK   r   ry   rb   rv   rm   rn   )r  r   r  r  r  r  r  r  r  r  r  r  r)   )r  r  r  r  r  r  r  r*   meta_pad3d_backward  s<    




r  r   )ro   pr   c                 C   sb   t |  dd  | d}|dkr<| dgjt jdS | ||d  d fjt jdS d S )Nc                   S   s   dS )Nz(_pdist_forward requires contiguous inputr)   r)   r)   r)   r*   rH     rI   z%meta__pdist_forward.<locals>.<lambda>r   r   r|   r   )r=   rK   r  rv   rm   r   rc  )ro   r  r   r)   r)   r*   meta__pdist_forward  s     
r  )gradro   r  pdistr   c                 C   s8   t | dd  t | dd  t j|t jdS )Nc                   S   s   dS )Nz._pdist_backward requires self to be contiguousr)   r)   r)   r)   r*   rH     rI   z&meta__pdist_backward.<locals>.<lambda>c                   S   s   dS )Nz/_pdist_backward requires pdist to be contiguousr)   r)   r)   r)   r*   rH     rI   r|   )r=   rK   r  r   rc  )r  ro   r  r	  r)   r)   r*   meta__pdist_backward  s      r
  )r   r   c          	         s     d}  d} d}|||ft  dkdd  t dkdd  tj j  kozjkn   fdd  j}j|d |d td koƈd kfd	d   S )
Nr   r   r   rs   c                   S   s   dS Nzbatch1 must be a 3D tensorr)   r)   r)   r)   r*   rH     rI   zmeta_baddbmm.<locals>.<lambda>c                   S   s   dS Nzbatch2 must be a 3D tensorr)   r)   r)   r)   r*   rH     rI   c                      s   dj  d j  dj  S )Nz+Input dtypes must be the same, got: input: z
, batch1: z
, batch2: rg   r)   )batch1batch2ro   r)   r*   rH     rI   c                	      s&   d d d d  d d  d	S Nz@Expected size for first two dimensions of batch2 tensor to be: [rS   z] but got: [r   r   ].r)   r)   batch2_sizesbscontraction_sizer)   r*   rH     s    )rv   r  r=   rK   r_   rD   rn   rm   )	ro   r  r  r   r   dim1dim2Zdim3batch1_sizesr)   )r  r  r  r  r  ro   r*   meta_baddbmm  s&    


r  c                C   s   t |  S r$   r=   r   r   )ro   r   r)   r)   r*   meta_bernoulli  s    r        ?c                 C   s   | S r$   r)   ro   r  r   r)   r)   r*   meta_bernoulli_  s    r  c                 C   s   t |  S r$   r  r  r)   r)   r*   meta_bernoulli_p  s    r  c                 C   s6   t |
|  k dd  t j| t jd}t | |fS )Nc                   S   s   dS )NzJError in fused_moving_avg_obs_fake_quant_cpu: ch_axis must be < self.dim()r)   r)   r)   r)   r*   rH     rI   z6meta__fused_moving_avg_obs_fq_helper.<locals>.<lambda>rg   )r=   rK   r_   r   bool)ro   Zobserver_onZfake_quant_onZrunning_minZrunning_maxscaleZ
zero_pointZaveraging_constZ	quant_minZ	quant_maxZch_axisZper_row_fake_quantZsymmetric_quantmaskr)   r)   r*   $meta__fused_moving_avg_obs_fq_helper  s    
r"  c                    sn   t |  dkdd  t | dkdd  | j\ |j\t  k fdd | S )Nr   c                   S   s   dS )Nza must be 2Dr)   r)   r)   r)   r*   rH     rI   zmeta_mm.<locals>.<lambda>c                   S   s   dS )Nzb must be 2Dr)   r)   r)   r)   r*   rH     rI   c                	      s   d d  d d d	S )Nz/a and b must have same reduction dim, but got [rS   z] X [r  r)   r)   ZM1ZM2Nr  r)   r*   rH      rI   )r=   rK   r_   rn   rm   abr)   r#  r*   meta_mm  s    

r(  c                    s0   |r"t  fddtjD S tj S )Nc                 3   s$   | ]}| krj | nd V  qdS )r   Nrz  r3   r   dimsro   r)   r*   rQ   '  s     z+_compute_reduction_shape.<locals>.<genexpr>)rJ   r   ry   r8   compute_reduction_output_shapern   )ro   r+  r  r)   r*  r*   r  %  s    r  str)r   c                 C   s   t | tjjr| jjS dS d S )Nr  )rN   r=   Z_subclassesZ
FakeTensorZfake_devicerU   )r   r)   r)   r*   r  0  s    r  )input_tensorr   r   r  dilationis_transposedgroupsoutput_paddingc                 C   s  t t t t t t ddd}t t t t t t t ddd}	|jdd  }
| jdd  }|rb||jd  }n*|jd	 }|jd | | jd krtd
| jd	 |g}t|tr|gt| }nt|dkr|d	 gt| }t|tr|gt| }n t|dkr|d	 gt| }t|tr(|gt| }n t|dkrH|d	 gt| }d }|rt|trn|gt| }n&t|dkr|d	 gt| }n|}tt|D ]h}|r||	|| || || |
| || ||  n*|||| || || |
| ||  q|S )N)lnr  r   r   r  r   c                 S   s$   | d|  ||d   d | d S )a  
        Formula to apply to calculate the length of some dimension of the output

        See: https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
        Returns:
            The output length
        r   r   r)   )r3  r  r   r   r  r)   r)   r*   _formulaA  s    z+calc_conv_nd_return_shape.<locals>._formula)r3  r  r   r   r  r&   r   c                 S   s(   | d | d|  ||d   | d S )a  
        Formula to apply to calculate the length of some dimension of the output
        if transposed convolution is used.
        See: https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
            op: output padding in that dim

        Returns:
            The output length
        r   r   r)   )r3  r  r   r   r  r&   r)   r)   r*   _formula_transposedR  s    z6calc_conv_nd_return_shape.<locals>._formula_transposedr   r   r   zInvalid channel dimensions)rw  rn   r   rN   r   r   r   r   )r.  r   r   r  r/  r0  r1  r2  r4  r5  kernel_sizer+  Zout_channelsZ	ret_shapeZoutput_padding_listr   r)   r)   r*   calc_conv_nd_return_shape7  sZ    



"r7  c                 C   s   t j| t jkS r$   r=   _prims_commonsuggest_memory_formatchannels_lasttenr)   r)   r*   is_channels_last  s    r>  )	r.  r   r   r   r  r/  r0  r2  r1  c	              	      sf    fdd}	t  ||||||r&|nd }
d}d} |dkrJd|
|<  |
}|j|	 d}|S )Nc                      s^   t  dkr$t str2tjS nt r2tjS  jtjdrFtjS  jtjdrZtjS d S Nr  r|   )r  r>  r=   r;  r  r   preserve_formatr)   r.  r   r)   r*   pick_memory_format  s    z%meta_conv.<locals>.pick_memory_formatr   r   r|   )r7  rv   rm   r   )r.  r   r   r   r  r/  r0  r2  r1  rB  	shape_outZinput_channels_dimZoutput_channels_dimr   r)   rA  r*   	meta_conv  s$    

rD  mkldnnc
              	   C   sH   t | ||||d|g }
| |
}tj}|  dkr8tj}|j|d}|S )NFr  r|   )r7  rm   r=   r;  r_   channels_last_3dr   )r.  r   r   r  r   r/  r1  attrscalars	algorithmrC  r   Zout_memory_formatr)   r)   r*   meta_mkldnn_convolution_default  s            
rJ  c                 C   s    |  | jd d |jd fS Nrq   r   rm   rn   )r.  r   r   rG  rH  rI  r)   r)   r*   meta_linear_pointwise_default  s    rM  mklc                 C   s    |  | jd d |jd fS rK  rL  )r.  Zpacked_weightZorig_weightr   r   r)   r)   r*   meta_mkl_linear  s    rO  onednnc              	   C   sJ   t | ||||	d|
d }|tjtjfks*t| j||d}|jtjd}|S )NFrg   r|   )r7  r=   r   r   rb   rm   r   r;  )r4   x_scalex_zpww_scalew_zpr   r   r  r/  r1  output_scaleoutput_zero_pointoutput_dtyperG  rH  rI  rC  r   r)   r)   r*   meta_qconv2d_pointwise	  s    
rY  c                 C   s>   t | j}|jd |d< |	tjtjfks,t| j||	d}|S )Nr   rq   rg   )r   rn   r=   r   r   rb   rm   )r4   rQ  rR  rS  rT  rU  r   rV  rW  rX  Zpost_op_nameZpost_op_argsZpost_op_algorithmr   r   r)   r)   r*   meta_qlinear_pointwise'	  s
    
rZ  	quantizedr)   r   r   c                 C   sr   t | |||||\}}}|  dkr.| dnd}	tj}
|  dkrP|||g}n|	|||g}tj|| j| j|
dS Nr  r   rs   r   )#max_pool2d_checks_and_compute_shaper_   rv   r=   r;  rd   rD   r\   r   r6  r   r  r/  	ceil_modenInputPlaneoutputHeightoutputWidthr  r}   rv   r)   r)   r*   meta_quantized_max_pool2dC	  s.         re  c                    s4   t   koj k fdd d S )Nc                      s8   d  d d dd   d dj   S )NzExpected a tensor of dimension z and tensor.size[z] == rS   zbut got : dimension z] = r_   rn   r)   r_   dim_sizerv   r   r)   r*   rH   e	  s   z check_dim_size.<locals>.<lambda>)r=   rK   r_   rn   )r   r_   rh  rv   r)   rg  r*   check_dim_sizeb	  s    ri  c                 C   sd  dd }|d|\}}	t t|dkdd  t|dkrF||	 }
}n.t|dkrf|d |d  }
}n|d	|\}
}|d
|\}}t |d kp|dkdd  |  dkr| dnd}| d}| d}| d}t||||
d|}t||	||d|}t| }t| ||	|
|||dd|||||| |  dkrB|||g}n||||g}t j	|| j
| j|dS )Nc                    sD   t t|dk fdd |d }t|dkr4|n|d }||fS )Nr   r   c                      s   d  dS )Nzavg_pool2d: 4 must either be a single int, or a tuple of two intsr)   r)   r2  r)   r*   rH   w	  rI   z1meta_avg_pool2d.<locals>.unpack.<locals>.<lambda>r   r   r=   rK   r   r2  r  HWr)   rl  r*   unpackt	  s    

zmeta_avg_pool2d.<locals>.unpackr6  r   r   r   c                   S   s   dS NzOavg_pool2d: stride must either be omitted, a single int, or a tuple of two intsr)   r)   r)   r)   r*   rH   	  rI   z!meta_avg_pool2d.<locals>.<lambda>r   r   r   r  c                   S   s   dS Nzdivisor must be not zeror)   r)   r)   r)   r*   rH   	  rI   r  r^  r-  rq   rs   r   )r=   rK   r   r_   rv   pooling_output_shaper8   r:  pool2d_shape_checkrd   rD   r\   )r   r6  r   r  ra  count_include_paddivisor_overriderq  kHkWdHdWpadHpadWr  rb  inputHeight
inputWidthrc  rd  r}   rv   r)   r)   r*   meta_avg_pool2dj	  sb    
	




r  c                 C   sj   t | ||||||dd|	|
|||| |  }|	}t|||d | t|||d | t|||d | d S )Nr   rs   r   )rw  r_   ri  )r   Z
gradOutputr  rz  r{  r|  r}  r~  r  rb  r  r  rc  rd  
mem_formatry   nOutputPlaner)   r)   r*   avg_pool2d_backward_shape_check	  s,    r  c                 C   s  t t|dkpt|dkdd  |d }t|dkr<|n|d }	t t|dkpjt|dkpjt|dkdd  t|dkr|n|d }
t|dkr|	nt|dkr|
n|d }t t|dkpt|dkdd  |d }t|dkr|n|d }t |d kp|dkdd  |j}| d	kr2|d
 nd}|d }|d }|d }t||||
d|}t||	||d|}t|}t|| |||	|
||||||||| t j	||j
|j|dS )Nr   r   c                   S   s   dS )NzKavg_pool2d: kernel_size must either be a single int, or a tuple of two intsr)   r)   r)   r)   r*   rH   	  rI   z*meta_avg_pool2d_backward.<locals>.<lambda>r   c                   S   s   dS rs  r)   r)   r)   r)   r*   rH   	  rI   c                   S   s   dS )NzGavg_pool2d: padding must either be a single int, or a tuple of two intsr)   r)   r)   r)   r*   rH   	  rI   c                   S   s   dS rt  r)   r)   r)   r)   r*   rH   
  rI   r  r^  ru  r-  rq   r   )r=   rK   r   rn   r_   rv  r8   r:  r  rd   rD   r\   )ZgradOutput_r   r6  r   r  ra  rx  ry  rz  r{  r|  r}  r~  r  
input_sizer  rb  r  r  rc  rd  r  r)   r)   r*   meta_avg_pool2d_backward	  sj    "(
r  c                 C   s  t t|dkdd  |d }t|dkr0|n|d }t|dkrH|n|d }	t | pdt|dkdd  |sv|n|d }
|s|nt|dkr|
n|d }|s|	nt|dkr|
n|d }t t|dkdd  |d }t|dkr|n|d }t|dkr|n|d }t | jd	kd
d  t | p8|dkdd  | d}| d}| d}| d}| d}t||||
d|}t||||d|}t||	||d|}t| ||||	|
|||||ddd||||||ddd | jdkr| ||||fS | |||||fS d S )Nr   rs   c                   S   s   dS NzFavg_pool3d: kernel_size must be a single int, or a tuple of three intsr)   r)   r)   r)   r*   rH   :
  rI   z!meta_avg_pool3d.<locals>.<lambda>r   r   r   c                   S   s   dS NzJavg_pool3d: stride must be omitted, a single int, or a tuple of three intsr)   r)   r)   r)   r*   rH   B
  rI   c                   S   s   dS NzBavg_pool3d: padding must be a single int, or a tuple of three intsr)   r)   r)   r)   r*   rH   J
  rI   r  r  c                   S   s   dS Nz9non-empty 4D or 5D (batch mode) tensor expected for inputr)   r)   r)   r)   r*   rH   R
  rI   c                   S   s   dS rt  r)   r)   r)   r)   r*   rH   W
  rI   r^  ru  r-  rq   zavg_pool3d()T)check_input_sizer  )r=   rK   r   ry   rv   rv  pool3d_shape_checkrm   )r   r6  r   r  ra  rx  ry  kTrz  r{  dTr|  r}  padTr~  r  r  nslicesitimeiheightiwidthotimeoheightowidthr)   r)   r*   meta_avg_pool3d-
  s    
  





r  c                 C   s  t t|dkdd  |d }t|dkr0|n|d }	t|dkrH|n|d }
t | pdt|dkdd  |sv|n|d }|s|	nt|dkr|n|d }|s|
nt|dkr|n|d }t t|dkdd  |d }t|dkr|n|d }t|dkr|n|d }t |jd	kd
d  t | p8|dkdd  |d}|d}|d}|d}t||||d|}t||	||d|}t||
||d|}t|| |||	|
||||||||||||d ||jS )Nr  c                   S   s   dS r  r)   r)   r)   r)   r*   rH   
  rI   z*meta_avg_pool3d_backward.<locals>.<lambda>r   r   r   c                   S   s   dS r  r)   r)   r)   r)   r*   rH   
  rI   c                   S   s   dS r  r)   r)   r)   r)   r*   rH   
  rI   r  c                   S   s   dS r  r)   r)   r)   r)   r*   rH   
  rI   c                   S   s   dS rt  r)   r)   r)   r)   r*   rH   
  rI   r^  ru  r-  rq   zavg_pool3d_backward())	r=   rK   r   ry   rv   rv  avg_pool3d_backward_shape_checkrm   rn   )r  r   r6  r   r  ra  rx  ry  r  rz  r{  r  r|  r}  r  r~  r  r  r  r  r  Zotime_for_shape_checkZoheight_for_shape_checkZowidth_for_shape_checkr)   r)   r*   meta_avg_pool3d_backward
  st    
  




r  c                    sZ   t  jdkp jdk fdd  jd d t| }t }t j| j j	|dS )Nrs   r  c                      s   d j  S )Nz"Expected 3D or 4D tensor, but got rz  r)   r   r)   r*   rH   
  rI   z*meta_adaptive_avg_pool2d.<locals>.<lambda>r-  r   )
r=   rK   ry   rn   rJ   r8   r:  rd   rD   r\   )ro   output_sizer   r}   r)   r   r*   meta_adaptive_avg_pool2d
  s    

r  c                    s@   t  jdkp jdk fdd   jd d t| S )Nr  r  c                      s   d j  S )Nz"Expected 4D or 5D tensor, but got rz  r)   r   r)   r*   rH   
  rI   z*meta_adaptive_avg_pool3d.<locals>.<lambda>ru  )r=   rK   ry   rm   rn   rJ   )ro   r  r)   r   r*   meta_adaptive_avg_pool3d
  s
    
r  c                    s    j }td|D ]$t dk fdd qt|dkpH|dkfdd tj jk fdd tj}trtj}	j
j|d	S )
Nr   r   c                      s   d j  d dS )Nz{adaptive_avg_pool2d_backward(): Expected grad_output to have non-zero                       size for non-batch dimensions,  with dimension  being emptyrz  r)   )grad_outr   r)   r*   rH   
  s   z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>rs   r  c                      s   d j  S )NzBadaptive_avg_pool2d_backward(): Expected 3D or 4D tensor, but got rz  r)   r   r)   r*   rH   
  rI   c                      s   dj  d j  S Nzexpected dtype z! for `grad_output` but got dtype rg   r)   )r  ro   r)   r*   rH   
  rI   r|   )ry   r   r=   rK   rv   rD   r   r>  r;  rm   rn   r   )r  ro   ry   r}   r)   )r  r   ro   r*   "meta__adaptive_avg_pool2d_backward
  s$    

r  c                 C   s   t | d tj|tjdS )NZadaptive_avg_pool3d_backwardr|   )!_adaptive_pool_empty_output_checkr=   r   rc  r  ro   r)   r)   r*   "meta__adaptive_avg_pool3d_backward  s    
r  )r  r=  c                    s<   j }td|D ]&tdk fdd qd S )Nr   r   c                      s     dj  d dS )Nzc(): Expected grad_output to have non-zero size for non-batch dimensions, but grad_output has sizes r  r  rz  r)   r=  r  r   r)   r*   rH     s    z3_adaptive_pool_empty_output_check.<locals>.<lambda>)ry   r   r=   rK   rv   )r  r=  ry   r)   r  r*   r    s    r  r   c                    s&  j }t|dkfdd td|D ]$ t dk fdd q(tt|dkdd  d}d}d}j d	krd}|d7 }|d }|\}}j d
kr|||f}|}	j|tjd}
|	|
fS ||||f}t	}|j
|d}	j|tjdj
|d}
|	|
fS d S )Nrs   r  c                      s   d j  S )Nz:adaptive_max_pool2d(): Expected 3D or 4D tensor, but got: rz  r)   rX  r)   r*   rH     rI   z*meta_adaptive_max_pool2d.<locals>.<lambda>r   r   c                      s   dj  d  dS )Nzjadaptive_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  rz  r)   r   r   r)   r*   rH   #  s    r   c                   S   s   dS )NzCadaptive_max_pool2d(): internal error: output_size.size() must be 2r)   r)   r)   r)   r*   rH   +  rI   r  rs   rg   r|   )ry   r=   rK   r   rv   r   rm   r   r8   r:  r   )r   r  ry   ZdimHsizeBsizeDosizeHosizeWrz   r   r   r}   r)   r  r*   meta_adaptive_max_pool2d  sD    







r  c                    sd    j }t|dk fdd t d tj jk fdd t}jj	|dS )Nr  c                      s   d j  S )NzKadaptive_max_pooling2d_backward(): Expected 3D or 4D grad_output, but got: rz  r)   r  r)   r*   rH   N  rI   z3meta_adaptive_max_pool2d_backward.<locals>.<lambda>adaptive_max_pool2d_backwardc                      s   dj  d j  S r  rg   r)   r  r   r)   r*   rH   U  rI   r|   )
ry   r=   rK   r  rD   r8   r:  rm   rn   r   )r  r   r   ry   r}   r)   r  r*   !meta_adaptive_max_pool2d_backwardH  s    



r  c                    s   j }t|dkfdd td|D ]$ t dk fdd q(tt|dkdd  d}d}d}|d	krd}|d7 }|}|\}}}|d
kr||||f}	n|||||f}	|	}
j|	tjd}|
|fS )Nr  c                      s   d j  S )Nz:adaptive_max_pool3d(): Expected 4D or 5D tensor, but got: rz  r)   rX  r)   r*   rH   b  rI   z*meta_adaptive_max_pool3d.<locals>.<lambda>r   r   c                      s   dj  d  dS )Nzjadaptive_max_pool3d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  rz  r)   r  r)   r*   rH   g  s    rs   c                   S   s   dS )NzCadaptive_max_pool3d(): internal error: output_size.size() must be 3r)   r)   r)   r)   r*   rH   o  rI   r  r  rg   )ry   r=   rK   r   rv   r   rm   r   )r   r  ry   ZdimDr  r  ZosizeTr  r  rz   r   r   r)   r  r*   meta_adaptive_max_pool3d\  s8    





r  c                 C   s   t | d ||jS )Nadaptive_max_pool3d_backward)r  rm   rn   )r  r   r   r)   r)   r*   !meta_adaptive_max_pool3d_backward  s    
r  c                 C   s   |d krt d| |S )Nz:cannot repeat_interleave a meta tensor without output_size)r   rm   )repeatsr  r)   r)   r*   meta_repeat_interleave_Tensor  s    r  c                 C   s:   | j jst|j jstt| j|j}| j|t| j dS r  )rD   r   rb   r   rn   rm   r   )realimagrz   r)   r)   r*   meta_complex  s    r  )
fill_value)rv   r  c                C   s   | j ||  ftjdS r  )rm   r_   r=   rj   )ro   rv   r  r)   r)   r*   nonzero_static  s    r  c              
      s  t tdd  g }tD ]\ d k	rt jt jt jt jt jfkdd  jt jt jfkr }t	|t 
j jkfdd tjD ]Ft 
j j  k fdd ||d qn
| q | q |t t	jkfdd dd lm} t|j t	jk r`d  qBd}d	}D ]J|dkrd k	rd}n*|dkrd krd
}nd k	rl qqld}|s<g }g }tD ](\ d k	r|  | qtD ](\ d kr|  | q||g }	g }
g }tD ]H\}d kr|rz|
j|  n|	j|  n
tj}qP|	| |
 S )Nc                   S   s   dS )Nz#at least one index must be providedr)   r)   r)   r)   r*   rH     rI   z#meta_index_Tensor.<locals>.<lambda>c                   S   s   dS )Nz?tensors used as indices must be long, int, byte or bool tensorsr)   r)   r)   r)   r*   rH     rI   c                      s   d j  S )N)too many indices for tensor of dimension r_  r)   r   r)   r*   rH     rI   c                	      s$   dj  d  dj  d  S )NzThe shape of the mask z
 at index z0 does not match the shape of the indexed tensor rz  r)   )r   ri   jr   ro   r)   r*   rH     rI   r   c                      s   dj  dt  dS )Nr  z (got rT   )ry   r   r)   )r   ro   r)   r*   rH     rI   r   Fr   T)r=   rK   r  	enumeraterD   rj   rw  r   nonzeror   rk   ry   r   rn   r   selecttorch._refsZ_refsr   r    r   rm   )ro   r   r   r  refsstateZhas_contiguous_subspacer+  Ztransposed_indicesZbefore_shapeZafter_shapeZreplacement_shaper_   r)   )r   ri   r   r  r   ro   r*   meta_index_Tensor  s    












r  c                 C   sT   d }d }d }|
d r"|  | }|
d r8|  | }|
d rJ|  |}|||fS )Nr   r   r   rm   rv   )grad_output_input_weight_Zbias_sizes_optr   r  r/  Z
transposedr2  r1  output_maskZbackend_grad_inputZbackend_grad_weightZbackend_grad_biasr)   r)   r*   meta_convolution_backward	  s    
r  c                   s     d} d}| ||f} t  dkdd  t dkdd  t  d dk fdd t  d dk fd	d t|  d|ko|  d|kd
d  | |   S )Nr   r   rs   c                   S   s   dS r  r)   r)   r)   r)   r*   rH   -  rI   zmeta_addbmm.<locals>.<lambda>c                   S   s   dS r  r)   r)   r)   r)   r*   rH   .  rI   r   c                      s   d  d d d S )Nz8batch1 and batch2 must have same number of batches, got r   rt   ru   r)   r  r  r)   r*   rH   1  rI   c                
      s6   d  d d  d d d d d d	S )Nz#Incompatible matrix sizes for bmm (r   r4   r   rt   rT   ru   r)   r  r)   r*   rH   5  s    c                   S   s   dS )Nz.self tensor does not match matmul output shaper)   r)   r)   r)   r*   rH   <  rI   )rv   r  r=   rK   r_   rm   )ro   r  r  r   r   r  r  r)   r  r*   meta_addbmm'  s$    

r  )
grad_scale	found_infc       	            s4   | |||||fD ] t t t fdd qd S )Nc                      s   dt   S Nz'exponent must be a tensor list but got rU   r)   lr)   r*   rH   W  rI   z#meta__fused_adam_.<locals>.<lambda>r=   rK   rN   r   )ro   gradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepslrbeta1beta2weight_decayepsamsgradmaximizer  r  r)   r  r*   meta__fused_adam_A  s
    
r  c       	            sZ   | |||||fD ] t t t fdd qdd }|| ||||||||fS )Nc                      s   dt   S r  r  r)   r  r)   r*   rH   q  rI   z"meta__fused_adam.<locals>.<lambda>c                 S   s   dd | D S )Nc                 S   s   g | ]}t |qS r)   r=   r   )r3   r9  r)   r)   r*   r7   u  s     z=meta__fused_adam.<locals>.empty_like_list.<locals>.<listcomp>r)   )Ztensor_listr)   r)   r*   empty_like_listt  s    z)meta__fused_adam.<locals>.empty_like_listr  )ro   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r)   r  r*   meta__fused_adam[  s    
r  c                    s   t   dkdd  t  dkdd  t  jt jk fdd t jt jkfdd t  ddk fd	d  j ddft jd
S )Nr   c                   S   s   dS )Nza must be a 2D tensorr)   r)   r)   r)   r*   rH     rI   zmeta__int_mm.<locals>.<lambda>c                   S   s   dS )Nzb must be a 2D tensorr)   r)   r)   r)   r*   rH     rI   c                      s   d j  S )Nzexpected self to be int8, got rg   r)   )r&  r)   r*   rH     rI   c                      s   d j  S )Nzexpected mat2 to be int8, got rg   r)   )r'  r)   r*   rH     rI   r   r   c                
      s6   d  d d  d d d d d d	S )Nz'Incompatible matrix sizes for _int_mm (r   r4   r   rt   rT   ru   r)   r%  r)   r*   rH     s    rg   )r=   rK   r_   rD   r   rv   rm   r   r%  r)   r%  r*   meta__int_mm  s    



r  c                    sp   t   dkdd  t  jt jk fdd  d} d} j|d ||d  d	|d ft jd
S )Nr   c                   S   s   dS Nzw must be a 2D tensorr)   r)   r)   r)   r*   rH     rI   z2meta__convert_weight_to_int4pack.<locals>.<lambda>c                      s   d j  S Nzexpected w to be int32, got rg   r)   rS  r)   r*   rH     rI   r   r      r       rg   )r=   rK   r_   rD   r   rv   rm   )rS  Zinner_k_tilesr   r   r)   r  r*    meta__convert_weight_to_int4pack  s    




r  c                    s   t  dkdd  t   dkdd  t jt jt jt jfkfdd t  jt jk fdd j	d 	dd	 jd
S )Nr   c                   S   s   dS Nzx must be a 2D tensorr)   r)   r)   r)   r*   rH     rI   z*meta__weight_int4pack_mm.<locals>.<lambda>r  c                   S   s   dS )Nzw must be a 4D tensorr)   r)   r)   r)   r*   rH     rI   c                      s   d j  S Nz#expected x to be f32/f16/bf16, got rg   r)   r   r)   r*   rH     rI   c                      s   d j  S r  rg   r)   r  r)   r*   rH     rI   r   r  rg   )
r=   rK   r_   rD   r   r   r   r   rm   rv   )r4   rS  Zq_group_sizeZq_scale_and_zerosr)   rS  r4   r*   meta__weight_int4pack_mm  s    


r  c                    s   t  dkdd  t jt jt jt jfkfdd t   dkdd  t  jt jk fdd j	d 	djdS )	Nr   c                   S   s   dS r  r)   r)   r)   r)   r*   rH     rI   z*meta__weight_int8pack_mm.<locals>.<lambda>c                      s   d j  S r  rg   r)   r   r)   r*   rH     rI   c                   S   s   dS r  r)   r)   r)   r)   r*   rH     rI   c                      s   d j  S )Nzexpected w to be int8, got rg   r)   r  r)   r*   rH     rI   r   rg   )
r=   rK   r_   rD   r   r   r   r   rm   rv   )r4   rS  Zq_scalesr)   r  r*   meta__weight_int8pack_mm  s    


r  c           	         s  t  dkfdd t  dkfdd t ddkfdd t tjdd  t tjdd  t |d	kd
d  t  dk fdd d}d}jd d }jd d }tt 	||}|
||g |S )Nr   c                      s   d    dS )Nz1cdist only supports at least 2D tensors, X1 got: Drr   r)   )x1r)   r*   rH     rI   z$meta_cdist_forward.<locals>.<lambda>c                      s   d    dS )Nz1cdist only supports at least 2D tensors, X2 got: r  rr   r)   )x2r)   r*   rH     rI   rq   c                      s   d  d d d S )Nz4X1 and X2 must have the same number of columns. X1: rq   z X2: ru   r)   )r  r  r)   r*   rH     rI   c                   S   s   dS )Nz=cdist only supports floating-point dtypes, X1 got: {x1.dtype}r)   r)   r)   r)   r*   rH     rI   c                   S   s   dS )Nz=cdist only supports floating-point dtypes, X2 got: {x2.dtype}r)   r)   r)   r)   r*   rH     rI   r   c                   S   s   dS )Nz)cdist only supports non-negative p valuesr)   r)   r)   r)   r*   rH     rI   Nr   r   c                      s
   d  S )Nz%possible modes: None, 1, 2, but was: r)   r)   )compute_moder)   r*   rH     rI   r-  )r=   rK   r_   rv   r8   is_float_dtyperD   rn   r   broadcast_shapesextendrm   )	r  r  r  r  r1r2batch_tensor1batch_tensor2r   r)   )r  r  r  r*   meta_cdist_forward  s@    








r  c                 C   s   |j d }|j d }|j d }|j d d }|j d d }	tt||	}
|
 }|||g t|
}|dks|dks|dks|dkrt|S |t|j kr|	|}tj
|tjdS )Nrq   r-  r   r|   )rn   r   r=   r  copyr  mathprod
zeros_liker  r   r   )r  r  r  r  Zcdistc1r  r  r  r  r  Ztensor1_expand_sizeZbatch_productr)   r)   r*   meta_cdist_backward  s    



 

r  c	                    sF  t  jt jt jfk fdd t jt jt jfkfdd t tjfdd d}	|rt |	dkdd  |	d8 }	|	d}
t	d\}}}d k	r"t ||kd	d  t jjkfd
d t j
dkfdd t    k fdd fdddd fdd}tdkr  d}  }||kr |	d}n
 d}n||
|}|||fks|sΈ d}n
d}|	}jd }||kr,|rt |dkdd  |d8 }|jd }n| }|
|||fS )Nc                      s   d j  S )Nz(expected indices to be long or int, got rg   r)   r   r)   r*   rH     rI   z$meta_embedding_bag.<locals>.<lambda>c                      s   d j  S )Nz(expected offsets to be long or int, got rg   r)   )r   r)   r*   rH     rI   c                      s   d j  S )Nz/expected weight to be floating point type, got rg   r)   )r   r)   r*   rH     rI   r   r   c                   S   s   dS Nz1include_last_offset: numBags should be at least 1r)   r)   r)   r)   r*   rH   !  rI   rs   c                   S   s   dS )Nz@embedding_bag: per_sample_weights only supported with mode='sum'r)   r)   r)   r)   r*   rH   +  rI   c                      s   dj  d j  dS )Nzexpected weight (z) and per_sample_weights (z) to have same dtyperg   r)   )per_sample_weightsr   r)   r*   rH   /  rI   c                      s   d j  dS )Nz1expected per_sample_weights to be 1D tensor, got r  r_  r)   )r  r)   r*   rH   3  rI   c                      s   d   d    dS )Nz%expected per_sample_weights.numel() (z$ to be the same as indices.numel() (rT   rl   r)   )r   r  r)   r*   rH   7  s    c                    s    | ||o| ddkS Nr   r   r   r   r   r   padding_idx)is_fast_path_index_selectr)   r*   is_fast_path_index_select_scale=  s    z;meta_embedding_bag.<locals>.is_fast_path_index_select_scalec                 S   s<   | j tjks| j tjko:| ddko:|ddko:|dk S Nr   r   )rD   r=   r@   r>   r   )r   r   r  r)   r)   r*   r  B  s    z5meta_embedding_bag.<locals>.is_fast_path_index_selectc                    s&   |d k	r| |||S  | ||S d S r$   r)   r  )r  r  r)   r*   is_fast_pathJ  s    z(meta_embedding_bag.<locals>.is_fast_pathcpuc                   S   s   dS r  r)   r)   r)   r)   r*   rH   d  rI   )r=   rK   rD   rj   rw  r8   r  rv   rm   r   ry   rl   r  rn   )r   r   r   scale_grad_by_freqr  sparser  Zinclude_last_offsetr  Znum_bagsr   MODE_SUM	MODE_MEANMODE_MAXr  
offset2bagbag_sizemax_indicesZfast_path_sumZnumBagsr)   )r   r  r  r   r  r   r*   meta_embedding_bag  s~    












r  c                 G   s@   t | ||f| \}}}}t|dkr4|| }||||fS )Nr  )r  r  rm   rv   )r   r   r   r:   r   r  r  r  r)   r)   r*   meta_embedding_bag_forward_onlym  s      r  c                 C   s.   |r|S | j js| j jr| j S |r(tjS | j S r$   )rD   r   r   r=   rj   )r   rD   promote_int_to_longr)   r)   r*   _get_reduction_dtypew  s    r  rg   c                C   s6   t | |dd}t| j|}t| ||}| j||dS )NT)r  rg   )r  r8   r  rn   r  rm   )r   r+  r  rD   rX  r   r)   r)   r*   meta_nansum  s    r  c                 C   s$   t | jtt|  }| |S r$   )r8   r,  rn   rJ   r   r_   rm   )r   r   r)   r)   r*   meta_median  s
     r  c                 C   sL   t | dkrtd t| j|f}t| ||}| || j|tjdfS )Nr  zmedian CUDA with indices outputrg   )	r  r8   alert_not_deterministicr  rn   r  rm   r=   rj   )r   r_   r  r   r)   r)   r*   meta_median_mode_dim  s    
r!  c                 C   s   | S r$   r)   r   r)   r)   r*   meta_logical_not_  s    r"  c                    sd   t t|  kdd  t|   }d| t| j   fddttD }| |S )Nc                   S   s   dS )NzZNumber of dimensions of repeat dims can not be smaller than number of dimensions of tensorr)   r)   r)   r)   r*   rH     rI   zmeta_repeat.<locals>.<lambda>r\  c                    s   g | ]} | |  qS r)   r)   r)  Zpadded_sizer  r)   r*   r7     s     zmeta_repeat.<locals>.<listcomp>)r=   rK   r   r_   rJ   rn   r   rm   )ro   r  Znum_new_dimensionsZtarget_sizer)   r#  r*   meta_repeat  s    r$  c                 C   s   | S r$   r)   r   r)   r)   r*   
meta_zero_  s    r%  c                 C   s   t |tjrt| j|j | S r$   rN   r=   r
   rL   rn   ro   rx   r)   r)   r*   meta_binop_inplace  s    r(  c                 C   s   t |tjrt| j|j | S r$   r&  )ro   rx   r   r)   r)   r*   meta_binop_inplace_alpha  s    	r)  c                 K   s   t | tjdS Nr/   )r<   r   r9   )ro   kwargsr)   r)   r*   
meta_round  s     r,  c                    sj   t tj fdd tt jrJt tj fdd nt tt fdd d S )Nc                      s     dj  S )Nz7: Expected input tensor to have an integral dtype. Got rg   r)   )rE  ro   r)   r*   rH     rI   z#shift_dtype_check.<locals>.<lambda>c                      s     dj  S )Nz6: Expected shift value to have an integral dtype. Got rg   r)   rE  r  r)   r*   rH     rI   c                      s     d S )Nz): Expected shift value to be an int. Got r)   r)   r-  r)   r*   rH     rI   )r=   rK   r8   r|  rD   rN   r
   r   rE  ro   r  r)   r.  r*   shift_dtype_check  s    

r/  c                 C   s   t d| | t| |tjdS )Nrshiftr/   r/  r<   r   r9   r'  r)   r)   r*   meta_rshifts  s      r2  c                 C   s   t d| | t| |tjdS )Nlshiftr/   r1  r'  r)   r)   r*   meta_lshifts  s      r4  c                 C   s   |  | jS r$   rL  r   r)   r)   r*   	meta_zero  s    r5  c                 C   s   | S r$   r)   ro   r  r)   r)   r*   
meta_fill_  s    r7  c                 C   s
   t | S r$   r  r6  r)   r)   r*   	meta_fill  s    r8  c                 C   s   | S r$   r)   r   r)   r)   r*   
meta_relu_  s    r9  c                 C   s
   t | S r$   r  ro   r   r   
accumulater)   r)   r*   meta_index_put  s    r<  c                 C   s   t | j|j | S r$   )rL   rn   )ro   r!  valuer)   r)   r*   meta_masked_fill_$  s    r>  c                 C   s    |  |  jt| d}|S r   )rm   rv   r   r8   r:  )ro   r!  r   Zmasked_scaler)   r)   r*   meta__masked_scale*  s    r?  c                 C   s:   t |jt jt jfkdd  t | j|jkdd  | S )Nc                   S   s   dS )NzMask must be bool or uint8r)   r)   r)   r)   r*   rH   5  rI   z&meta_masked_scatter_.<locals>.<lambda>c                   S   s   dS )Nzdmasked_scatter: expected self and source to have same dtypes but got {self.dtype} and {source.dtype}r)   r)   r)   r)   r*   rH   9  rI   )r=   rK   rD   r  uint8)ro   r!  r   r)   r)   r*   meta_masked_scatter_2  s     
rA  c                 C   s*   t | |\} }tj| tjd}t|||S r   )r    r=   r   r   rA  )ro   r!  r   r   r)   r)   r*   meta_masked_scatter?  s    rB  c                 C   s
   |  |S r$   r  )ro   r!  r  r)   r)   r*   meta_masked_scatter_backwardG  s    rC  c                 C   s   | S r$   r)   r:  r)   r)   r*   meta_index_put_L  s    rD  c                 C   s   |  | jS r$   )viewrn   r   r)   r)   r*   
meta_aliasQ  s    rF  c                    s   t |  dkdd  t | dkdd  |  }|  |d |d |d } d }||ft  d ko d k fdd |}|sd k	rt  dkd	d  t  kfd
d |S )Nrs   c                   S   s   dS r  r)   r)   r)   r)   r*   rH   W  rI   z)common_meta_baddbmm_bmm.<locals>.<lambda>c                   S   s   dS r  r)   r)   r)   r)   r*   rH   X  rI   r   r   r   c                	      s&   d d d d  d d  d	S r  r)   r)   r  r)   r*   rH   e  rI   c                   S   s   dS )Nzself must be a 3D tensorr)   r)   r)   r)   r*   rH   n  rI   c                      s   d  d   S )Nz*Expected an input tensor shape with shape z but got shape: ru   r)   )r  self_baddbmmr)   r*   rH   q  rI   )r=   rK   r_   rv   rm   )r  r  Zis_bmmrG  r  Zres_rowsZres_colsr   r)   )r  r  r  r  rG  r*   common_meta_baddbmm_bmmV  s*    


rH  c                 C   s   t | |dS )NT)rH  )ro   r   r)   r)   r*   meta_bmmw  s    rI  c                 C   s<   | | }| | }|dkr8t |dk t |dk kr8|d8 }|S r	  )r  )r4   yqr  r)   r)   r*   div_rtn|  s
     rL  c                 C   sZ   t | | | ||d   d |r(|d nd |d }|rV|d | | | krV|d8 }|S r  )rL  )	inputSize
kernelSizer  r  r   r/  ra  Z
outputSizer)   r)   r*   pooling_output_shape_pad_lr  s*    
	rO  c                    sl   t |dkdd  t dkfdd t d   d d k fdd t| | |S )Nr   c                   S   s   dS )Nzstride should not be zeror)   r)   r)   r)   r*   rH     rI   z&pooling_output_shape.<locals>.<lambda>c                      s
   d  S )Nz'pad must be non-negative, but got pad: r)   r)   )padr)   r*   rH     rI   r   r   c                      s   d d d  S )NzApad should be at most half of effective kernel size, but got pad=z, kernel_size=z and dilation=r)   r)   r/  rN  rP  r)   r*   rH     s    )r=   rK   rO  )rM  rN  rP  r   r/  ra  r)   rQ  r*   rv    s          rv  c              	      sR     }tdkodkdd  t|dko:|dkdd  t|dkoV|dkdd   ddkoz ddk}|tjkrt|dko|o d	dkd
d  nDt|d	kr̈ ddkr|p|dko|o d	dk fdd td 
kod 	k	
fdd tdko6dkfdd d S )Nr   c                   S   s   dS )NzCkernel size should be greater than zero, but got kH: {kH}, kW: {kW}r)   r)   r)   r)   r*   rH     rI   z$pool2d_shape_check.<locals>.<lambda>c                   S   s   dS )Nz>stride should be greater than zero, but got dH: {dH}, dW: {dW}r)   r)   r)   r)   r*   rH     rI   c                   S   s   dS )Nz\dilation should be greater than zero, but got dilationH: {dilationH}, dilationW: {dilationW}r)   r)   r)   r)   r*   rH     rI   r   r   r  rs   c                   S   s   dS )NzExpected 4D (batch mode) tensor expected for input with channels_last layout with optional 0 dim batch size for input, but got: {input.size()}r)   r)   r)   r)   r*   rH     rI   c                      s   d    S )NzYExpected 3D or 4D (batch mode) tensor with optional 0 dim batch size for input, but got: ru   r)   rX  r)   r*   rH     rI   c                      s   d d d d  S )NzKpad should be smaller than or equal to half of kernel size, but got padW = z	, padH = z, kW = z, kH = r)   r)   )rz  r{  r~  r  r)   r*   rH     rI   c                      s*   d d  d d d d dS NzGiven input size: (r4   z). Calculated output size: (z). Output size is too smallr)   r)   )r  r  rb  r  rc  rd  r)   r*   rH     rI   )r_   r=   rK   rv   r;  )r   rz  r{  r|  r}  r~  r  	dilationH	dilationWrb  r  r  rc  rd  r}   ry   Z
valid_dimsr)   )r   r  r  rz  r{  rb  r  rc  rd  r~  r  r*   rw    sB    

rw  )r   r  r  rz  r{  r  r|  r}  pTpHpW	dilationTrS  rT  r  r  r  r  r  r  rE  r  c              
      s  	j }tdko dko dkfdd tdkoLdkoL dk fdd tdkoxdkoxdkfdd t|dk	fdd t|D ]8|dkrdkrqt	dk	fd	d q|r"t
ko
ko
k
fd
d td koLd koLd kfdd tdkodkodk
fdd d S )Nr   c                      s   d d  d S )Nz5kernel size should be greater than zero, but got kT: z, kH: z, kW: r)   r)   )rz  r  r{  r)   r*   rH     s    z$pool3d_shape_check.<locals>.<lambda>c                      s   d d  d S )Nz0stride should be greater than zero, but got dT: z, dH: z, dW: r)   r)   )r|  r  r}  r)   r*   rH     s    c                      s   d d  d S )Nz9dilation should be greater than zero, but got dilationT: z, dilationH: z, dilationW: r)   r)   )rS  rX  rT  r)   r*   rH     s    r  c                      s     dj  S )Nz/: Expected 4D or 5D tensor for input, but got: rz  r)   )rE  r   r)   r*   rH     rI   r  c                      s     dj  d dS )NzZ: Expected input's non-batch dimensions to have positive length, but input has a shape of z and non-batch dimension z has length zero!)rn   rv   r)   rE  r   r   r)   r*   rH   #  s    c                      s*   d d  d d d d dS )Nzinput image (T: r  r  z ) smaller than kernel size (kT:  kH:  kW: rT   r)   r)   )r  r  r  rz  r  r{  r)   r*   rH   -  s    r   c                      s(   d d d  d d d S )NzHpad should be smaller than or equal to half of kernel size, but got kT: r[  rZ  z padT: z padW: z padH: r)   r)   )rz  r  r{  rV  rU  rW  r)   r*   rH   5  s    r   c                      s6   d d d  d d d d d dS rR  r)   r)   )r  r  r  r  r  r  r  r)   r*   rH   =  s    )ry   r=   rK   r   rv   )r   r  r  rz  r{  r  r|  r}  rU  rV  rW  rX  rS  rT  r  r  r  r  r  r  rE  r  ry   r)   )r|  r  r}  rS  rX  rT  rE  r   r  r   r  r  rz  r  r{  r  r  r  r  rV  rU  rW  r*   r    sJ    	&r  c                 C   s   | j }t| |||||||	|
|||||||||||| t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | d S )Nr  rs   r   r   ry   r  ri  )r   r  r   r  r  rz  r{  r  r|  r}  rU  rV  rW  rX  rS  rT  r  r  r  r  r  r  rE  ry   r)   r)   r*   max_pool3d_backward_shape_checkE  s@    r]  )r   r  r  r  rz  r{  r  r|  r}  rU  rV  rW  r  r  r  r  r  r  rE  c                 C   s   | j }t| ||||||||	|
|ddd|||||||d t|||d | t|||d | t|||d | t|||d | d S )Nr   Tr  rs   r   r\  )r   r  r  r  rz  r{  r  r|  r}  rU  rV  rW  r  r  r  r  r  r  rE  ry   r)   r)   r*   r    s:    r  c                 C   sB  dd }|d|\}}t t|dkdd  t|dkrF|| }	}
n|d|\}	}
|d	|\}}|d
|\}}| d}| d}| d}t| }|t jkrt |  dkdd  n4|t jkrt |  dkdd  nt ddd  t	||||	||}t	||||
||}t
| |||	|
|||||||||| |||fS )Nc                    sD   t t|dk fdd |d }t|dkr4|n|d }||fS )Nrj  c                      s   d  dS )Nzmax_pool2d: rk  r)   r)   rl  r)   r*   rH     rI   zEmax_pool2d_checks_and_compute_shape.<locals>.unpack.<locals>.<lambda>r   r   rm  rn  r)   rl  r*   rq    s    

z3max_pool2d_checks_and_compute_shape.<locals>.unpackr6  rr  c                   S   s   dS )NzOmax_pool2d: stride must either be omitted, a single int, or a tuple of two intsr)   r)   r)   r)   r*   rH     rI   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>r   r   r  r/  ru  r-  rq   r  c                   S   s   dS )NzMnon-empty 4D (batch mode) tensor expected for input with channels_last layoutr)   r)   r)   r)   r*   rH     rI   r  c                   S   s   dS )Nz9non-empty 3D or 4D (batch mode) tensor expected for inputr)   r)   r)   r)   r*   rH     rI   Fc                   S   s   dS )Nz?Unsupport memory format. Supports only ChannelsLast, Contiguousr)   r)   r)   r)   r*   rH     rI   )r=   rK   r   rv   r8   r:  r;  r_   r   rv  rw  )r   r6  r   r  r/  ra  rq  rz  r{  r|  r}  r~  r  rS  rT  rb  r  r  r}   rc  rd  r)   r)   r*   r_    sb    	








r_  c                    s   t |||||\}tj jk fdd |jfdd}	|	  |	| t}
tjjjj	|
dS )Nc                      s   dj  d j  S )NzExpected dtype z  for `gradOutput` but got dtype rg   r)   r  r)   r*   rH     rI   z7meta_max_pool2d_with_indices_backward.<locals>.<lambda>c                    s:   t | d   t | d  t | d  d S )Nrs   r   r   )ri  )r9  )r  ry   rc  rd  r)   r*   _check_dim_size  s    z>meta_max_pool2d_with_indices_backward.<locals>._check_dim_sizer   )
r_  r=   rK   rD   ry   r8   r:  rd   rn   r\   )r  ro   r6  r   r  r/  ra  r   rb  r^  r}   r)   )r  r  ry   rc  rd  ro   r*   %meta_max_pool2d_with_indices_backward   s8         

r_  c                 C   s   t | |||||\}}}|  dkr.| dnd}	t| }
|  dkrT|||g}n|	|||g}tj|| j| j|
dtj|tj	| j|
dfS r]  )
r_  r_   rv   r8   r:  r=   rd   rD   r\   r   r`  r)   r)   r*   meta_max_pool2d_with_indices,  s<         
r`  c           
   	      s   t jdkfdd j}t|d |D ], t  dkd  d  d q.t td	kd
d  t t|d	kdd  d}dd|dkrd}nd}t jjkdd  t jdkfdd d}d}d	 t ||kd t ||kdd  t  d	k fdd t |d d  d kfdd t |d d  d kfdd  dkr|||d |d g}	n||d |d g}	t j|	jj	dt j|	t j
j	dfS )Nr  c                      s   d j  S )Nz:fractional_max_pool2d: Expected 3D or 4D tensor, but got: r_  r)   self_r)   r*   rH   R  rI   z,meta_fractional_max_pool2d.<locals>.<lambda>rs   r   z^fractional_max_pool2d: Expected input to have non-zero  size for non-batch dimenions, but got r  z emptyr   c                   S   s   dS )NzNfractional_max_pool2d: kernel_size musteither be a single int or tuple of Intsr)   r)   r)   r)   r*   rH   `  rI   c                   S   s   dS )NzOfractional_max_pool2d: output_size must either be a single int or tuple of Intsr)   r)   r)   r)   r*   rH   e  rI   ru  r-  rq   r  r   c                   S   s   dS )Nz6Expect _random_samples to have the same dtype as inputr)   r)   r)   r)   r*   rH   s  rI   c                      s   d j  S )Nz1Expect _random samples to have 3 dimensions got, r_  r)   )random_samplesr)   r*   rH   w  rI   z=Expect _random_samples.size(0) no less then input batch size.c                   S   s   dS )Nz<Expect _random_samples.size(1) equals to input channel size.r)   r)   r)   r)   r*   rH     rI   c                      s   d  dS )Nz/Expect _random_samples.size(2) equals to 2 got .r)   r)   )r   r)   r*   rH     rI   c                      s   dd  d  S )Nz%fractional_max_pool2d: kernel height r   z' is too large relative to input height r)   r)   )input_heightr6  r)   r*   rH     rI   c                      s   dd  d  S )Nz$fractional_max_pool2d: kernel width r   z& is too large relative to input width r)   r)   )input_widthr6  r)   r*   rH     rI   rD   r\   )r=   rK   ry   r   rv   r   rD   r_   rd   r\   r   )
rb  r6  r  rc  ry   Zinput_channelsZinput_batchr   crv   r)   )r   re  rf  r6  rc  rb  r*   meta_fractional_max_pool2dN  s    










ri  c           	         s  t d tjtjkfdd ttdkfdd \}}tjdkfdd tjjkfdd t	d	jD ]$ t
 d
k fdd q }jdkr|
d
}||||f}n&|
d
}|
d	}|||||f}|S )NZmax_unpooling2d_forward_outc                      s   d j  S )Nz2elements in indices should be type int64 but got: rg   r)   r  r)   r*   rH     rI   z#meta_max_unpool2d.<locals>.<lambda>r   c                      s   dt   dS )NzMThere should be exactly two elements (height, width) in output_size, but got 
 elements.r  r)   r  r)   r*   rH     s    r  c                      s   d j  dS )NzLInput to max_unpooling2d should be a 3d or 4d Tensor, but got a tensor with  dimensions.r_  r)   ra  r)   r*   rH     s    c                      s   dj  d j  S NzBExpected shape of indices to be same as that of the input tensor (z%) but got indices tensor with shape: rz  r)   )r   rb  r)   r*   rH     s    r   r   c                      s   dj  d  dS )NzZmax_unpooling2d(): Expected input to have non-zero size for non-batch dimensions, but got r   being empty.rz  r)   )r   rb  r)   r*   rH     s    rs   )r8   r   r=   rK   rD   r   r   ry   rn   r   rv   r   rm   )	rb  r   r  r  r  ro   	nchannelsr   r  r)   )r   r   r  rb  r*   meta_max_unpool2d  s>    






	



rp  c                    s  t jt jkdd  t jdkfdd t tdkfdd t tdkfdd t tdkfdd t jjkfd	d td
jD ]&t dk fdd qt d dkod
 dkod dkfdd d S )Nc                   S   s   dS )Nz(elements in indices should be type int64r)   r)   r)   r)   r*   rH     rI   z._max_unpooling3d_shape_check.<locals>.<lambda>r  c                      s   d j  dS )NzLInput to max_unpooling3d should be a 4d or 5d Tensor, but got a tensor with rl  r_  r)   rX  r)   r*   rH     rI   rs   c                      s   dt   dS )NzVThere should be exactly three elements (depth, height, width) in output_size, but got rj  r  r)   rk  r)   r*   rH     s    c                      s   dt   dS )NzRThere should be exactly three elements (depth, height, width) in stride, but got: rj  r  r)   r
  r)   r*   rH     rI   c                      s   dt   dS )NzSThere should be exactly three elements (depth, height, width) in padding, but got: rj  r  r)   )r  r)   r*   rH     rI   c                      s   dj  d j  S rm  rz  r)   )r   r   r)   r*   rH     s    r   r   c                      s     dj  d dS )NzI: Expected input to have non-zero size for non-batch dimensions, but got r  rn  rz  r)   rY  r)   r*   rH     s    r   c                      s
   d  S )Nz5strides should be greater than zero, but got stride: r)   r)   r
  r)   r*   rH     rI   )	r=   rK   rD   r   ry   r   rn   r   rv   )r   r   r  r   r  rE  r)   )rE  r   r   r   r  r  r   r*   _max_unpooling3d_shape_check  sB    
 







	"
rq  c                 C   s   t d t| ||||d |  }|\}}}| jdkrV|d}	||	|||f}
n(|d}|d}	|||	|||f}
|
S )NZmax_unpooling3d_forward_outzmax_unpooling3d()r  r   r   )r8   r   rq  r   ry   rv   rm   )rb  r   r  r   r  ro   Zodepthr  r  ro  r   r  r)   r)   r*   meta_max_unpool3d  s$    
     




rr  c                 C   s  t t|dkdd  |d }t|dkr0|n|d }t|dkrH|n|d }t | pdt|dkdd  |sv|n|d }	|s|nt|dkr|	n|d }
|s|nt|dkr|	n|d }t t|dkdd  |d }t|dkr|n|d }t|dkr|n|d }t t|dkd	d  |d }t|dkrB|n|d }t|dkr\|n|d }t | jd
kdd  | jdkr| dnd}| d}| d}| d}| d}t||||	||}t||||
||}t||||||}t| |||||	|
|||||||||||||d | jdko<t| t j	k}| jdkr|| 
d}|  ol|jt j	d}||||f}n|||||f}| |}| j|t jd}|r|jt j	d}|jt j	d}||fS )Nr  c                   S   s   dS NzMmax_pool3d: kernel_size must either be a single int, or a tuple of three intsr)   r)   r)   r)   r*   rH   /  rI   z.meta_max_pool3d_with_indices.<locals>.<lambda>r   r   r   c                   S   s   dS NzQmax_pool3d: stride must either be omitted, a single int, or a tuple of three intsr)   r)   r)   r)   r*   rH   7  rI   c                   S   s   dS NzImax_pool3d: padding must either be a single int, or a tuple of three intsr)   r)   r)   r)   r*   rH   ?  rI   c                   S   s   dS NzJmax_pool3d: dilation must be either a single int, or a tuple of three intsr)   r)   r)   r)   r*   rH   G  rI   r  c                   S   s   dS r  r)   r)   r)   r)   r*   rH   O  rI   r  r^  ru  r-  rq   zmax_pool3d_with_indices()r  r|   rg   )r=   rK   r   ry   rv   rv  r  r8   r:  rF  r  r  rm   r   r   )r   r6  r   r  r/  ra  r  rz  r{  r  r|  r}  rU  rV  rW  rX  rS  rT  r  r  r  r  r  r  r  r  r;  input_channels_last_checkrz   r   r   r)   r)   r*   meta_max_pool3d_with_indices#  s    

  







ry  c                 C   sd  t t|dkdd  |d }t|dkr0|n|d }	t|dkrH|n|d }
t | pdt|dkdd  |sv|n|d }|s|	nt|dkr|n|d }|s|
nt|dkr|n|d }t t|dkdd  |d }t|dkr|n|d }t|dkr|n|d }t t|dkd	d  |d }t|dkrB|n|d }t|dkr\|n|d }t |jd
kdd  |d}|d}|d}|d}| d}| d}| d}t|| ||||	|
|||||||||||||||d |jdkot|t jk}|jdkr@|	d}|
  o>|j
t jd}||j}|r`|jt jd}|S )Nr  c                   S   s   dS rs  r)   r)   r)   r)   r*   rH     rI   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>r   r   r   c                   S   s   dS rt  r)   r)   r)   r)   r*   rH     rI   c                   S   s   dS ru  r)   r)   r)   r)   r*   rH     rI   c                   S   s   dS rv  r)   r)   r)   r)   r*   rH     rI   r  c                   S   s   dS r  r)   r)   r)   r)   r*   rH     rI   r^  ru  r-  rq   z"max_pool3d_with_indices_backward()r  r  r|   )r=   rK   r   ry   rv   r]  r8   r:  rF  r  r  rm   rn   r   )r  r   r6  r   r  r/  ra  r   r  rz  r{  r  r|  r}  rU  rV  rW  rX  rS  rT  r  r  r  r  r  r  r  r;  rx  r  r)   r)   r*   %meta_max_pool3d_with_indices_backward  s    
  









rz  r   gridc                    s   t j jk fdd t jt jko8 jt jk fdd t jd  jd k fdd t  jd jd k fdd tdjD ]$t j dkfd	d qd S )
Nc                      s   dj  d j  S )NzNgrid_sampler(): expected input and grid to be on same device, but input is on z and grid is on r  r)   r|  r   r)   r*   rH     s    z+check_grid_sampler_common.<locals>.<lambda>c                      s   dj  d j  S )NzTgrid_sampler(): expected input and grid to have torch.strided layout, but input has z and grid has )r[   r)   r}  r)   r*   rH     s    r   c                      s   dj  d j  S )NzZgrid_sampler(): expected grid and input to have same batch size, but got input with sizes  and grid with sizes rz  r)   r}  r)   r*   rH     s    rq   r   c                      s   dj d  d j S )Nz+grid_sampler(): expected grid to have size r   z, in last dimension, but got grid with sizes )ry   rn   r)   r}  r)   r*   rH     s    c                      s   dj  d  dS )NzYgrid_sampler(): expected input to have non-empty spatial dimensions, but input has sizes r  r  rz  r)   r  r)   r*   rH     s    )r=   rK   r\   r[   r  rn   ry   r   r{  r)   )r|  r   r   r*   check_grid_sampler_common  s*    
r  c                   @   s   e Zd ZdZdZdZdS )GridSamplerInterpolationr   r   r   N)rV   
__module____qualname__ZBILINEARZNEARESTBICUBICr)   r)   r)   r*   r    s   r  r   r|  interpolation_modec                    sP   t jdkoj jk fdd t jdko@|tjjk dd  d S )Nr  c                      s   dj  d j  S )Nzdgrid_sampler(): expected 5D input and grid with same number of dimensions, but got input with sizes r~  rz  r)   r}  r)   r*   rH   $  s    z'check_grid_sampler_3d.<locals>.<lambda>c                   S   s   dS )Nz<grid_sampler(): bicubic interpolation only supports 4D inputr)   r)   r)   r)   r*   rH   /  rI   )r=   rK   ry   r  r  r=  r  r)   r}  r*   check_grid_sampler_3d!  s    

r  c           
      C   s:   |d }|rt j|t jd}nd }t j|t jd}	||	fS Nr   r|   )r=   r  r   r   
r  r   r|  r  padding_modealign_cornersr  Zinput_requires_gradr  	grad_gridr)   r)   r*   grid_sampler_2d_backward_meta3  s    
r  c           
      C   s\   t | | t| || | jd }| jd }|jd }|jd }|jd }	| |||||	fS )Nr   r   r   rs   )r  r  rn   rm   )
r   r|  r  r  r  r$  CZout_DZout_HZout_Wr)   r)   r*   grid_sampler_3dF  s    	





r  r  c           
      C   sP   t || t||| |d }|r4tj|tjd}nd }tj|tjd}	||	fS r  )r  r  r=   r  rc  r   r  r)   r)   r*   grid_sampler_3d_backwardY  s    
 r  c                 O   s4   | dd }|st|}||d< tj| f||S )NrD   )rC   r8   Z	get_dtyper=   rd   )rv   r  r:   r+  rD   r)   r)   r*   fullq  s
    
r  c                 C   s   |t jkrt |d kdd  t jd|d kr2| jn|||d krD| jn||d}| jrp||  | 	 | 
  n||  |  d |d |S tjj| |||||d}|d |S )Nc                   S   s   dS )Nz9memory format option is only supported by strided tensorsr)   r)   r)   r)   r*   rH     rI   zzeros_like.<locals>.<lambda>r   r   Tr  )r=   Z
sparse_coorK   rd   rD   r\   	is_sparseZsparse_resize_and_clear_rv   
sparse_dim	dense_dimr_   Z_coalesced_r"   r   r   fill_)ro   rD   r[   r\   r]   r}   r  r)   r)   r*   r  {  s>    	
  
	
r  c                    s     }t|dkdd   dkr( n |   }t |kpN|k  fdd dkrnn| t }t } |    }| = | = |||S )Nr   c                   S   s   dS )Nz-select() cannot be applied to a 0-dim tensor.r)   r)   r)   r)   r*   rH     rI   zmeta_select.<locals>.<lambda>c                      s   d d   d  S )Nzselect(): index z! out of range for tensor of size z at dimension ru   r)   r_   ri   ro   r)   r*   rH     rI   )r_   r=   rk   rv   r   r   r   r   )ro   r_   ri   ry   rv   new_sizer   Znew_storage_offsetr)   r  r*   meta_select  s$    
r  c                 C   s
   t | S r$   r8   Zclone_preserve_strides)ro   r   r_   ri   r)   r)   r*   meta_select_scatter  s    r  c                 C   s
   t | S r$   r  )ro   r   r_   rX   rW   stepr)   r)   r*   meta_slice_scatter  s    r  )r_   dim_post_exprwrap_scalarc                 C   sb   |dkr|st d}| }|d }| |k s2| |krNt d|  d| d| d| dk r^| |7 } | S )Nr   r   zdim z out of bounds (rS   rT   )rb   )r_   r  r  r  r  r)   r)   r*   r     s    ,r   c                 C   s   |   dkrdS | j| S r	  rf  )r9  r_   r)   r)   r*   ensure_nonempty_size  s    r  c                    st   t  d}t  d}t||kdd  t|D ]6 kr8tttk fdd q8d S )Nr   c                   S   s   dS )NzDIndex tensor must have the same number of dimensions as input tensorr)   r)   r)   r)   r*   rH     rI   z$gather_shape_check.<locals>.<lambda>c                      s$   d dj  dj  d   S )Nz!Size does not match at dimension z expected index  to be smaller than self  apart from dimension rz  r)   r_   r   ri   ro   r)   r*   rH     s   )r  r_   r=   rK   r   r  )ro   r_   ri   	self_dimsZ
index_dimsr)   r  r*   gather_shape_check  s    r  c                    sb   ddl m} t||  }|  dk}|sVt jtjk fdd t	| |  | 
 jS )Nr   guard_size_obliviousc                      s   d j  S )Nz2gather(): Expected dtype int64 for index, but got rg   r)   rh   r)   r*   rH     rI   zmeta_gather.<locals>.<lambda>)r   r  r   r_   rl   r=   rK   rD   rj   r  rm   rn   )ro   r_   ri   Zsparse_gradr  wrapped_dimZis_index_emptyr)   rh   r*   meta_gather  s    

r  c                 C   s   |rT| dkrdS | dkrdS | dkr(dS | dkr4dS | d	kr@d
S t ddd  d S | dkr`dS | dkrldS t ddd  d S d S )NsumZ
REDUCE_ADDr  ZREDUCE_MULTIPLYZmeanZREDUCE_MEANZamaxZREDUCE_MAXIMUMZaminZREDUCE_MINIMUMFc                   S   s   dS )Nz=reduce argument must be either sum, prod, mean, amax or amin.r)   r)   r)   r)   r*   rH     rI   z#get_operator_enum.<locals>.<lambda>addmultiplyc                   S   s   dS )Nz/reduce argument must be either add or multiply.r)   r)   r)   r)   r*   rH     rI   r  )reduce_use_new_optionsr)   r)   r*   get_operator_enum  s,    r  c                    s`   ddl m} || dkr8t|jtjk fdd |d k	r\t|j|jk fdd d S )Nr   r  c                      s
     dS )Nz"(): Expected dtype int64 for indexr)   r)   method_namer)   r*   rH   "  rI   z,scatter_gather_dtype_check.<locals>.<lambda>c                      s
     dS )Nz0(): Expected self.dtype to be equal to src.dtyper)   r)   r  r)   r*   rH   (  rI   )r   r  rl   r=   rK   rD   rj   )r  ro   ri   src_optr  r)   r  r*   scatter_gather_dtype_check  s    



r  c                 C   s
   t | dS r   )r  rr   r)   r)   r*   ensure_nonempty_dim,  s    r  c           	         s0  ddl m} | dkr d S tt t kdd  d}t }t|D ].}t|}| krtq\|t|kr\d} qq\|sƈd k	rt|D ]$}t|}|t|krd} qqd k	rtt t kdd  t|  fdd nt|  fd	d d S )
Nr   r  c                   S   s   dS NzCIndex tensor must have the same number of dimensions as self tensorr)   r)   r)   r)   r*   rH   8  rI   z%scatter_shape_check.<locals>.<lambda>FTc                   S   s   dS r  r)   r)   r)   r)   r*   rH   R  rI   c                      s&   dj  dj  d  dj   S )NExpected index r  r  z and to be smaller than src rz  r)   r_   ri   ro   r  r)   r*   rH   V  s   c                      s   dj  dj  d   S )Nr  r  r  rz  r)   r  r)   r*   rH   \  s   )	r   r  rl   r=   rK   r  r_   r   r  )	ro   r_   ri   r  r  Zis_wrong_shaper  r   Zindex_d_sizer)   r  r*   scatter_shape_check1  sF    


r  c                 C   s@   t ||  }td| || t| ||| |d k	r<t|| d S )Nscatter)r   r_   r  r  r  )ro   r_   ri   r   r  r  r  r)   r)   r*   scatter_meta_implb  s
    r  c                 C   s   t | |||d | | jS Nr  r  rm   rn   ro   r_   ri   r   r)   r)   r*   meta_scatter_addk  s    r  c                 C   s   t | |||d | S r  r  r  r)   r)   r*   meta_scatter_add_q  s    r  c                 C   s0   t |tjr|nd }t| |||| | | jS r$   )rN   r=   r
   r  rm   rn   ro   r_   ri   Zsrc_or_valuer   r   r)   r)   r*   meta_scatterw  s    
r  c                 C   s(   t |tjr|nd }t| |||| | S r$   )rN   r=   r
   r  r  r)   r)   r*   meta_scatter_  s    	r  )r  queryr   r=  r   	logsumexp	cum_seq_q	cum_seq_kmax_qmax_k	dropout_p	is_causalphilox_seedphilox_offsetr   c                 C   sX   t |dddd}t |dddd}t |dddd}|||fS r  )r=   r   r[  )r  r  r   r=  r   r  r  r  r  r  r  r  r  r  r   grad_qgrad_kgrad_vr)   r)   r*   'meta__scaled_dot_product_flash_backward  s    r          )r  r   r=  r  r  	attn_maskr   c                 C   sv   |  d}|  d}|  d}	|  d}
tj||	||
f| j| jddd}tj||	|ftj| jddd}||fS )Nr   r   r   rs   rg  )rv   r=   rd   rD   r\   r[  r@   )r  r   r=  r  r  r  r   r   	num_headsZmax_seqlen_batch_qhead_dimZ	attentionr  r)   r)   r*   0meta__scaled_dot_product_flash_attention_for_cpu  s4    




  
r  )
r  r  r   r=  r   r  r  r  r  r   c
                 C   s   | d}
| d}| d}| d}| d}tj|
|||fd|j|jd}tj|
|||fd|j|jd}tj|
|||fd|j|jd}|||fS )Nr   r   rs   r   r   r   r   rs   rg  )rv   r=   empty_permutedrD   r\   )r  r  r   r=  r   r  r  r  r  r   r   r  r  len_qZlen_kr  r  r  r)   r)   r*   9meta__scaled_dot_product_flash_attention_for_cpu_backward  s0    







r  )r  r  r   r=  	attn_biasr   r  r  r  r  grad_input_maskr  r   c                 C   s  | d}| d}| d}| d}| d}| d}tj||||fd|j|jd}tj||||fd|j|jd}tj||||fd|j|jd}d }|d k	r|
d r| d}|d dkr|n|d |d  }t|  }||d< tj||j|jd}|d	d |f }||||fS )
Nr   r   r   rs   r  rg  rq   r   .)rv   r=   r  rD   r\   r   rd   )r  r  r   r=  r  r   r  r  r  r  r  r  r   r   r  r  r  Z
head_dim_vr  r  r  r  	grad_biaslastDimlastDimAligned	new_sizesr)   r)   r*   +meta__scaled_dot_product_efficient_backward  sJ    









   r  )r  r  r   r=  r   r  r  r  r  r  r  r  r  r  r   window_size_leftwindow_size_rightc                 C   s(   t |}t |}t |}|||fS r$   r  )r  r  r   r=  r   r  r  r  r  r  r  r  r  r  r   r  r  
grad_querygrad_key
grad_valuer)   r)   r*   meta__flash_attention_backwardB  s    


r  )r  r  r   r=  r   cu_seqlens_qcu_seqlens_kmax_seqlen_qmax_seqlen_kr  r  r  r  custom_mask_typebias_requires_gradr   num_splits_keyshared_storage_dqdkdvc                 C   sF  |rt |jd |jd kdd  t |jd |jd kdd  t j|jdd d|jd |jd f|j|jd	}|d
d}|d
d}|d
d}nt |}t |}t |}|d k	r*|d}|d dkr|n|d |d  }t	| }||d< t j||j|jd	}|dd |f }nt jd|jd}||||fS )Nr   c                   S   s   dS )Nz,seqlen must match for `shared_storage_dqdkdvr)   r)   r)   r)   r*   rH   }  rI   z4meta__efficient_attention_backward.<locals>.<lambda>rs   c                   S   s   dS )Nz3embedding dim must match for `shared_storage_dqdkdvr)   r)   r)   r)   r*   rH     rI   r   r-  rq   rg  ru  r   r   .r)   r  )
r=   rK   rn   rd   rD   r\   r  r   rv   r   )r  r  r   r=  r   r  r  r  r  r  r  r  r  r  r  r   r  r  chunkr  r  r  r  r  r  r  r)   r)   r*   "meta__efficient_attention_backwarda  s:    "




 r  )ro   r   r   r   scale_ascale_bscale_resultuse_fast_accumc                    s8  dd }dd }	dd }
t  dko2  dk fdd	 t | d
d	  t |	 j  dd	  t dd dkfdd	 t  dd dko dd dk fdd	 t |
jo|
 j fdd	 |d k	r|nj}t jd d|jdt jdt j	jdfS )Nc                 S   s   | d | d ko| d dkS r	  r)   r
  r)   r)   r*   is_row_major  s    z$meta_scaled_mm.<locals>.is_row_majorc                 S   s   |d dko|d | d kS r	  r)   )rn   r   r)   r)   r*   is_col_major  s    z$meta_scaled_mm.<locals>.is_col_majorc                 S   s   | t jt jt jt jfkS r$   )r=   Zfloat8_e4m3fnZfloat8_e5m2Zfloat8_e4m3fnuzZfloat8_e5m2fnuzrg   r)   r)   r*   is_fp8_type  s    z#meta_scaled_mm.<locals>.is_fp8_typer   c                      s   d   d    S )Nz%Inputs must be 2D but got self.dim()=z and mat2.dim()=rr   r)   r   ro   r)   r*   rH     rI   z meta_scaled_mm.<locals>.<lambda>c                   S   s   dS )Nzself must be row_majorr)   r)   r)   r)   r*   rH     rI   c                   S   s   dS )Nzmat2 must be col_majorr)   r)   r)   r)   r*   rH     rI   r   r   r   c                      s   d  d S )NzBExpected self.size(0) to be divisible by 16, but got self.size(1)=r   ru   r)   r   r)   r*   rH     rI   c                      s   d j  S )Nz>Expected both dimensions of mat2 to be divisble by 16 but got rz  r)   )r   r)   r*   rH     rI   c                      s   dj  d j  S )Nz8Expected both inputs to be fp8 types but got self.dtype=z and mat2.dtype=rg   r)   r  r)   r*   rH     rI   rg  r)   )
r=   rK   r_   r   rn   rv   rD   rd   r\   r   )ro   r   r   r   r  r  r  r  r  r  r  Z
_out_dtyper)   r  r*   meta_scaled_mm  sH    

"
   r  c                 C   s    t | ||||dd | | jS NT)r  r  ro   r_   ri   r   r   r   r)   r)   r*   meta_scatter_reduce_two  s    r  c                 C   s   t | ||||dd | S r  r  r  r)   r)   r*   meta_scatter_reduce__two  s    r  c                   sh   t d    k odkn   fdd   dkrLt j|t j jdS t j d|t j jdS )Nr   r   c                      s   d    S )Nz@The probabilty distributions dimensions must be 1 or 2, but got rr   r)   rX  r)   r*   rH     rI   z"meta_multinomial.<locals>.<lambda>r   rg  )r=   rK   r_   rd   rj   r\   rv   )r   num_samplesreplacementr   r)   rX  r*   meta_multinomial  s    
   r  c                 C   s   d}| D ]}||9 }q|S r   r)   )vsr  vr)   r)   r*   multiply_integers  s    
r  c                    s   t tkfdd d  t t k fdd t tdd dd  D ortdd D fdd d d \}}||fS )Nc                      s   d  dt  S )Nz%It is expected output_size equals to , but got size r  r)   )num_spatial_dimsr  r)   r*   rH     rI   z'upsample_common_check.<locals>.<lambda>r   c                      s   d  dt  S )Nz$It is expected input_size equals to r  r  r)   )expected_input_dimsr  r)   r*   rH     rI   c                 s   s   | ]}|d kV  qdS r   Nr)   )r3   r  r)   r)   r*   rQ     s     z(upsample_common_check.<locals>.<genexpr>c                      s   d  d S )NzDInput and output sizes should be greater than 0, but got input size z and output size r)   r)   )r  r  r)   r*   rH     rI   )r=   rK   r   r  )r  r  r  r  Zchannelsr)   )r  r  r  r  r*   upsample_common_check  s    

*r  c                    sZ   t   dkp"t  dd   fdd t  |dd} |jt	 dS )Nr   r   c                      s   d    S )Nz>Non-empty 3D data tensor expected but got a tensor with sizes ru   r)   rX  r)   r*   rH     rI   z$upsample_nearest1d.<locals>.<lambda>r  r|   
r=   rK   rl   r  rv   r  rm   r   r8   r:  )r   r  scalesfull_output_sizer)   rX  r*   upsample_nearest1d  s    
  
r  c           	         s   t   dkp"t  dd   fdd t  |dd} |}t } j	\}}}} j
jdkr~|dk r~t j}|j|d	}|S )
Nr   r   c                      s   d    S Nz>Non-empty 4D data tensor expected but got a tensor with sizes ru   r)   rX  r)   r*   rH   !  rI   z$upsample_nearest2d.<locals>.<lambda>r   r  r  r  r|   )r=   rK   rl   r  rv   r  rm   r8   r:  rn   r\   rU   r   r   )	r   r  scales_hscales_wr  r   r}   r;   Z
n_channelsr)   rX  r*   upsample_nearest2d  s     
  

r  r  r  r  r  r  c                    st   t ||dd tjdkfdd tdD ]*t  k fdd q0|jt	dS )Nr   r  r  c                      s   d j  S )NzFExpected grad_output to be a tensor of dimension 4 but got: dimension r_  r)   r  r)   r*   rH   G  rI   z-upsample_nearest2d_backward.<locals>.<lambda>c                
      s&   d d   d d  S )NzCExpected grad_output to have the same shape as output; output.size(z) = z but got grad_output.size(ru   r)   r  r  r   r)   r*   rH   L  s    r|   )
r  r=   rK   ry   r   rv   rm   r   r8   r:  r  r)   r  r*   upsample_nearest2d_backward5  s"      
	
r  c                    sZ   t   dkp"t  dd   fdd t  |dd} |jt	 dS )Nr   r   c                      s   d    S )Nz>Non-empty 5D data tensor expected but got a tensor with sizes ru   r)   rX  r)   r*   rH   ^  rI   z$upsample_nearest3d.<locals>.<lambda>rs   r  r|   r	  )r   r  Zscales_dr  r  r  r)   rX  r*   upsample_nearest3dX  s    
  
r  c           
      C   s   t | t j| t jd }}|d k	r|d k	rt|ts:tt|tsHt|j}| }	t||}t||}|	||	 |	||	 t
||d t
||d ||fS ||fS )Nrg   )r  r  )r=   r   r   rN   r   rb   rn   r   r   r   r   )
ro   stabler_   
descendingr   r   r  r   rz   Z
out_strider)   r)   r*   	meta_sorth  s    	

r  )r_   r  c                C   s   t | |||dd S )N)r  r_   r  r   )r  )ro   r  r_   r  r)   r)   r*   meta_argsort  s    r  c                    s  t jdkfdd t jjkfdd dd k	rt jdkfdd t  kfdd t jjkfdd t jdkfd	d d
   t   k fdd t tfddfD dd  d S )Nr   c                      s    j  dS Nz != 2r_  r)   input_gatesr)   r*   rH     rI   z%rnn_cell_checkSizes.<locals>.<lambda>c                      s   j  d j  S N != rz  r)   )hidden_gatesr  r)   r*   rH     rI   r   c                      s    j  dS )Nz != 1r_  r)   )
input_biasr)   r*   rH     rI   c                      s      d  S r  r  r)   )
gates_sizer  r)   r*   rH     rI   c                      s   j  d j  S r  rz  r)   )hidden_biasr  r)   r*   rH     rI   c                      s    j  dS r  r_  r)   )prev_hiddenr)   r*   rH     rI   r   c                
      s,      dd d d d  d
S )Nr  r   z * z // z (aka rT   )rl   rv   r)   )expected_prev_hidden_numelfactorr   r  r"  r)   r*   rH     rI   c                 3   s   | ]}|j  j kV  qd S r$   r  r2   r  r)   r*   rQ     s   z&rnn_cell_checkSizes.<locals>.<genexpr>c                   S   s   dS )Nz%expected all inputs to be same devicer)   r)   r)   r)   r*   rH     rI   )r=   rK   ry   rn   rv   rl   r  )r  r  r  r!  r$  r"  r)   )r#  r$  r   r!  r  r  r  r"  r*   rnn_cell_checkSizes  s8    





r%  c                 C   sL   t | |||d| tj| tjd}tj|tjd}tj|tjd}|||fS )Nr  r|   )r%  r=   r   r   )r  r  cxr  r!  	workspacehycyr)   r)   r*   _thnn_fused_lstm_cell_meta  s
    r*  c                 C   s*  t |dk}|r,t |}|d }| jd }n4|
r:| jd n| jd }|
rR| jd n| jd }d}|rhdnd}|dkrx|n|}|r||| g}n |
r|||| gn|||| g}| |}|	| ||g}|d krtjd| jd}n
||}||	| ||g}|rdnd}| j|tjd}|||||fS )Nr   r   rq   r   r  rg   )r   rn   rm   r=   rd   r\   r@  )r   r   Zweight_stride0Z
weight_bufhxr&  r  hidden_sizeZ	proj_size
num_layersbatch_firstZdropouttrainbidirectionalbatch_sizesZdropout_stateZis_input_packed
seq_length
mini_batchZbatch_sizes_sumZnum_directionsZout_sizerz   r   Z
cell_shaper)  r(  Zreserve_shapeZreserver)   r)   r*   
_cudnn_rnn  s2    

r4  c                 C   s   |r| j d n| j d }|r&| j d n| j d }|
}|rB|||gn|||g}| |}|d krptjd| jd}n||j }|d krtjd| jd}n||j }tjd| jtjd}||||fS )Nr   r   r  r   )rn   rm   r=   rd   r\   r@  )r   Zw0Zw1Zw2Zw3hx_Zcx_r   r1  r  r,  r-  
has_biasesr0  r.  r/  r2  r3  Zoutput_chanelsrz   r   r(  r)  r'  r)   r)   r*   mkldnn_rnn_layer  s     
r7  c                    sR   | j dkr.t dkp dk fdd n t|  dk fdd d S )Nr   rq   c                      s    d  S )Nz4: Expected reduction dim -1 or 0 for scalar but got r)   r)   r_   rE  r)   r*   rH     rI   z'zero_numel_check_dims.<locals>.<lambda>c                      s    d  dS )Nz: Expected reduction dim z to have non-zero size.r)   r)   r8  r)   r*   rH     rI   )ry   r=   rk   rv   )ro   r_   rE  r)   r8  r*   zero_numel_check_dims  s    
r9  c                    sD   |d k	r$t || }t||  nt| dk fdd d S )Nr   c                      s
     dS )Nz@: Expected reduction dim to be specified for input.numel() == 0.r)   r)   rl  r)   r*   rH   *  rI   z%check_argmax_argmin.<locals>.<lambda>)r   r_   r9  r=   rK   rl   )r2  ro   r_   r)   rl  r*   check_argmax_argmin#  s    

r:  c                 C   sD   t d| | t| j|d k	r"|fnd }t| ||}| j|tjdS )Nargmaxrg   )r:  r8   r  rn   r  rm   r=   r   )ro   r_   r  r+  rn   r)   r)   r*   argmax_argmin_meta.  s    r<  c                 C   s   t jd||||dS )Nr)   r   r   )r  rD   r[   r\   r]   r)   r)   r*   scalar_tensor6  s        r=  c                 C   s   t ||  dd}t|dko:||  dkr6| |ndkdd  |  dkrTdn| |}t|dkop||kdd  t| j}t|dkr|||< | || j|tj	dfS )	NT)r  r   r   c                   S   s   dS )Nzselected index k out of ranger)   r)   r)   r)   r*   rH   C  rI   ztopk_meta.<locals>.<lambda>c                   S   s   dS )Nzk not in range for dimensionr)   r)   r)   r)   r*   rH   F  rI   rg   )
r   r_   r=   rK   rv   r   rn   r   rm   r   )ro   r   r_   largestsortedZ	sliceSizeZtopKSizer)   r)   r*   	topk_meta=  s    $
r@  c                 C   s   | d k	r| n|}t | dkdd  | }| d k	rPt |  |kdd  |d k	rpt | |kdd  t | |kdd  t | |kdd  t | dkdd  t | |d	 |d
  d kdd  d S )Nr   c                   S   s   dS N r)   r)   r)   r)   r*   rH   T  rI   z(checkLSTMBackwardSizes.<locals>.<lambda>c                   S   s   dS rA  r)   r)   r)   r)   r*   rH   W  rI   c                   S   s   dS rA  r)   r)   r)   r)   r*   rH   Y  rI   c                   S   s   dS rA  r)   r)   r)   r)   r*   rH   Z  rI   c                   S   s   dS rA  r)   r)   r)   r)   r*   rH   [  rI   c                   S   s   dS rA  r)   r)   r)   r)   r*   rH   \  rI   r   r   r  c                   S   s   dS rA  r)   r)   r)   r)   r*   rH   ]  rI   )r=   rK   r_   rv   rl   )grad_hygrad_cyr&  r)  r'  Zdefined_gradZexp_sizer)   r)   r*   checkLSTMBackwardSizesR  s    rE  c           	      C   s`   | d kr|d krdS t | |||| tj|td}tj|td}|rR|jdddnd }|||fS )N)NNNr|   r   F)r  )rE  r=   r   legacy_contiguous_memory_formatr  )	rC  rD  r&  r)  r'  Zhas_biasZ
grad_gatesZgrad_cxr  r)   r)   r*   #_thnn_fused_lstm_cell_backward_impla  s     rG  c                 C   sf   d }d }d }|d r"| |  }|d s2|d r\| |d| df}| |d}|||fS )Nr   r   r   rq   r  )r  r  r  r  r  Zgrad_weightr  r)   r)   r*   linear_backwardo  s    rH  c                    s   t jdkr$jd ||  dks:tdj d| dd   fdd	}jd ||  }jd
 | }jd | }jd d |||f}|}|j| d}|S )Nr   ru  r   z'Invalid input shape for pixel_shuffle: z with upscale_factor = c                 S   s   t j| t jkS r$   r8  r<  r)   r)   r*   r>    s    z,meta_pixel_shuffle.<locals>.is_channels_lastc                      sN    r"t dkrtjS tjS n(jtjdr6tjS jtjdrJtjS d S r?  )r  r=   r   r;  r  r@  r)   r>  ro   r)   r*   rB    s    z.meta_pixel_shuffle.<locals>.pick_memory_formatr-  rq   r|   )r   rn   rb   rm   r   )ro   Zupscale_factorrB  r  ZHrZWrrz   r   r)   rI  r*   meta_pixel_shuffle|  s    
rJ  c                 C   sZ   |  | j}| |j}| |j}| |j}| |j}| |j}|||||||fS r$   rL  )r   Zweight0Zweight1Zweight2Zweight3r5  Zcx_tmpr   Zhy_Zcy_Zgrad_output_r_optZgrad_hy_r_optZgrad_cy_r_optr   r  r,  r-  r6  r/  r0  r1  r.  r'  Zdiff_xZdiff_hxZdiff_cxZdiff_w1Zdiff_w2Zdiff_br)   r)   r*   mkldnn_rnn_layer_backward  s    rK  )	out_int32r   c                C   s   t j| |rt jnt jd S r  )r=   r   r   r   r   )ro   Z
boundariesrL  r   r)   r)   r*   meta_bucketize  s     rM  d   c                    s   dt dkr(t fdd tt t fdd t dk fdd tttfdd tttfd	d tkd
d  tj jj	dS )Nzhistc()r  c                      s   d j  dS )Nz%"histogram_cpu" not implemented for ''rg   r)   rX  r)   r*   rH     rI   zmeta_histc.<locals>.<lambda>c                      s    dt   S )Nz#: argument 'bins' must be int, not r  r)   binsrE  r)   r*   rH     rI   r   c                      s    d  S )Nz: bins must be > 0, but got r)   r)   rP  r)   r*   rH     rI   c                      s     dt  S )Nz%: argument 'min' must be Number, not r  r)   )rE  r  r)   r*   rH     rI   c                      s     dt  S )Nz%: argument 'max' must be Number, not r  r)   )rE  r  r)   r*   rH     rI   c                   S   s   dS )Nz&{fn_name}: max must be larger than minr)   r)   r)   r)   r*   rH     rI   r   )
r  r=   rK   r   rN   r   r   rd   r\   rD   )r   rQ  r  r  r)   )rQ  rE  r   r  r  r*   
meta_histc  s*    
rR  c                    sd   t   |dd}t  dkp>tdd   dd  D  fdd  |jt	 d	S )
Nr   r  r   c                 s   s   | ]}|d kV  qdS r  r)   )r3   rv   r)   r)   r*   rQ     s     z,meta_upsample_bimode2d_aa.<locals>.<genexpr>r   c                      s   d    S r  ru   r)   rX  r)   r*   rH     rI   z+meta_upsample_bimode2d_aa.<locals>.<lambda>r|   )
r  rv   r=   rK   rl   r  rm   r   r8   r:  )r   r  r  r  r  r  r)   rX  r*   meta_upsample_bimode2d_aa  s      (

rS  c                 C   s\   t | dkdd  t | dkdd  t |jjdd  t |jjdd  d S )Nr   c                   S   s   dS )Nz%found_inf must be a 1-element tensor.r)   r)   r)   r)   r*   rH     rI   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>c                   S   s   dS )Nz%inv_scale must be a 1-element tensor.r)   r)   r)   r)   r*   rH     rI   c                   S   s   dS )Nz!found_inf must be a float tensor.r)   r)   r)   r)   r*   rH     rI   c                   S   s   dS )Nz!inv_scale must be a float tensor.r)   r)   r)   r)   r*   rH      rI   )r=   rK   rl   rD   r   )ro   r  Z	inv_scaler)   r)   r*   *_amp_foreach_non_finite_check_and_unscale_  s     
 
 rT  c                 C   s   t |  }| |S r$   )r   rv   rm   )ro   nanZposinfZneginfr   r)   r)   r*   
nan_to_num  s    rV  c                 C   s   | j tjtjtjtjhks,td| j  d| j}t||}t||}||krR| S t	| 
 }t	|  }|| ||  ||< ||< || ||  ||< ||< | || | S )Nz>torch.transpose_: in-place transposition is not supported for z layout)r[   r=   r  Z
sparse_cscr  Z
sparse_bscrb   ry   r   r   rv   r   r   )ro   Zdim0r  ndimsrv   r   r)   r)   r*   r    s&    

r  c                 C   sx   | j }| jrD|  }|  }|dkr,|dks`td| d| dn|  dks`td| dt| d|dk rrdndS )	Nr   r   zEt_ expects a tensor with <= 2 sparse and 0 dense dimensions, but got z sparse and z dense dimensionsz6t_ expects a tensor with <= 2 dimensions, but self is r  r   )ry   r  r  r  rb   r_   r  )ro   rW  r  r  r)   r)   r*   t_'  s    

rX  )rL  r   sidesorterc                C   sD   |r
t jnt j}t|t jr.t j||d S t jd|| jdS d S )Nrg   r)   rg  )	r=   r   r   rN   r
   r   r   rd   r\   )Zsorted_sequencero   rL  r   rY  rZ  rD   r)   r)   r*   meta_searchsorted9  s    r[  c                    s,   t  t jt jt jt jfk fdd d S )Nc                      s
   d  S )Nz/Unsupported input type encountered for isin(): r)   r)   rg   r)   r*   rH   H  rI   z3_check_for_unsupported_isin_dtype.<locals>.<lambda>)r=   rK   r  r   Z
complex128Z	complex64rg   r)   rg   r*   !_check_for_unsupported_isin_dtypeE  s    
r\  c
                    sf   t  jt jt jt jt jfk fdd td\}
}}||krNt |d k	  | 	df}|S )Nc                      s   d j  S )Nz$Unsupported input type encountered: rg   r)   r  r)   r*   rH   [  rI   z3meta_embedding_bag_dense_backward.<locals>.<lambda>rs   r   )
r=   rK   rD   r   r   r   Zfloat64r   rm   rv   )r  r   r  r  Zmaximum_indicesZnum_weightsr  r  r  r  r  r  r  Zindex_grad_weightr)   r]  r*   !meta_embedding_bag_dense_backwardL  s    
r^  c                 C   s   t d\}}}	| d}
t||kd t|  dk t| dk |d}t| dk t|d|
k | |f}|S )Nrs   r   zHembedding_bag_backward: per_sample_weights only supported for mode='sum'r   r   )r   rv   r=   rK   r_   rm   )r  r   r   r   r  r  r  r  r  r  Zembedding_featuresr  r   r)   r)   r*   .meta_embedding_bag_per_sample_weights_backwardd  s    

r_  )assume_uniqueinvertc                C   sx   t t| tpt|tdd  t| ts:t j| |jd} t|tsTt j|| jd}t| j t|j t j| t j	dS )Nc                   S   s   dS )Nz<At least one of elements and test_elements must be a Tensor.r)   r)   r)   r)   r*   rH   |  rI   zmeta_isin.<locals>.<lambda>r  rg   )
r=   rK   rN   r
   r   r\   r\  rD   r   r  )elementsZtest_elementsr`  ra  r)   r)   r*   	meta_isinw  s    



rc  )r   ro   r   c                 C   s4   t | dkdd  t|tjd\}}t j||dS )Nr   c                   S   s   dS )Nz,polygamma(n, x) does not support negative n.r)   r)   r)   r)   r*   rH     rI   z meta_polygamma.<locals>.<lambda>r  rg   )r=   rK   r   r   r  r   )r   ro   r;   r6   r)   r)   r*   meta_polygamma  s    
rd  c                 C   s6   |   ^ }}}}tj||||f| j| j| jdS )N)rD   r[   r\   )rv   r=   rd   rD   r[   r\   )r   r1  Zleading_dimsr  ro  rp  r)   r)   r*   meta_channel_shuffle  s    re  r   c                 C   s   t dd S )Nz.Tensor.item() cannot be called on meta tensors)r   r   r)   r)   r*   meta_local_scalar_dense  s    rf  c                 C   s   t | t dd }|S )Nc                 S   s   t | tjdS r*  r<   r   r  r   r)   r)   r*   _f  s     z)_create_unary_float_meta_func.<locals>._fr.   r   funcrh  r)   r)   r*   _create_unary_float_meta_func  s    rl  c                 C   s   t | t dd }|S )Nc                 S   s   t | |tjdS r*  rg  )r4   rJ  r)   r)   r*   rh    s
      z*_create_binary_float_meta_func.<locals>._fri  rj  r)   r)   r*   _create_binary_float_meta_func  s    rm  c                  C   s8  i } dD ]*}t | }|D ]}|| kr|| | |< qq|  D ]\}}t|tjjrTq<t|tsbt|tj	j
j| tj	| dr|t d krt| dq<|jrq<| dkrq<d| krt|| q<d| krt|| q<d| kr
t|| q<d	| kr&t|| q<t|| q<d S )
N)rZ   Zpost_autogradZpre_autogradZCompositeImplicitAutogradrZ   z is a CompositeImplicitAutograd op, we shouldn't register meta function for it. Instead, we should let the decomposition run and write meta kernels for the base operators.>   aten::cloneaten::copy_aten::as_strided_scatteraten::empty_stridedaten::_to_copyaten::constant_pad_ndaten::rot90zmkldnn::zmkl::zonednn::zquantized::)r   itemsrN   r=   Z_opsZHigherOrderOperatorr   rb   Zpy_impl_CZDispatchKeyr#   Z%_dispatch_has_kernel_for_dispatch_keyr2  r   Zis_view2_meta_lib_dont_use_me_use_register_meta_for_mkldnnimpl/_meta_lib_dont_use_me_use_register_meta_for_mkl2_meta_lib_dont_use_me_use_register_meta_for_onednn5_meta_lib_dont_use_me_use_register_meta_for_quantized'_meta_lib_dont_use_me_use_register_meta)Zactivate_meta_tablerU   registryZopoZop_overloadr(   r)   r)   r*   activate_meta  sH     	 r~  )F)NNN)N)NNNF)F)F)NN)NNN)NN)T)r1  )r   )rL  T)F)F)F)FF)F)TT)r  )FTN)TFF)TF)r   )r  N)r  N)FF)N)r)   r   r\  F)r)   r   FTN)r)   r   FTN)N)Fr   FNFrq   )T)NF)rq   F)r   )F)F)N)F)r)   r   r\  F)r)   r   r\  F)NNNNN)r   NNr   )T)F)F)N)N)NNF)N)N)N)r  FNN)NN)FN)NNN)NNF)NNNNNF)T)T)F)N)NN)NN)NNN)Nrq   FNN)NN)NF)NNNN)rq   TT)rN  r   r   )NN)NNN)rq   )rq   (3  r   enumr   typingr   r   r   r   r   r=   Ztorch._prims_commonr9  r8   r   r	   r
   Ztorch._decompr   r   r   r   Z
torch._opsr   Ztorch._primsr   r   r   r   r   r   r   r   r   r   Ztorch._prims_common.wrappersr   r   r   r   r   r  r   r    Ztorch.utilsr!   r,   Zopsr"   libraryLibraryr|  r.   r<   rE   rL   ZlinspaceZlogspacer  rf   Ztaker   r   rp   r{   r   ZcummaxZcumminr   r   r   Z_fft_c2cr   Z_fft_r2cr   ZrandpermZgenerator_outr   rj   r   randintr   r   Zlow_outr   Zrandr   Z_fft_c2rr   r  r   r   Z
unsqueeze_r   Z_sparse_semi_structured_linearr-  rD   r   Z_sparse_semi_structured_mmr   Z_sparse_semi_structured_addmmr   Z_cslt_sparse_mmr  r   Zindex_reducerw  r   Zindex_reduce_r   Zindex_selectr   Zsegment_reducer   r  Z	unary_outr  r_   r	  r  r
  r  r  r  r  Z_assert_asyncr  msgr  Z_printr  Z_make_dep_tokenr  r"  Z_functional_sym_constrain_ranger&  r(  Z(_functional_sym_constrain_range_for_sizer)  Z_functional_assert_asyncr*  r~   r7  r   r?  rC  rH  rN  Z_linalg_eighrO  rT  Z_linalg_eigvalsZlinalg_eigvalsrW  Z
linalg_eigrY  r\  r]  r`  rb  re  rh  rp  Zlinalg_inv_exrr  Zlinalg_ldl_factor_exrx  Zlinalg_ldl_solver  Z	linalg_lur  Zlinalg_lu_factor_exr  Zlinalg_lu_solver  Z	lu_unpackr  r  Z	linalg_qrr  r  r  Z_linalg_svdr  r}  ra  r  r  Zlinalg_solve_triangularr  r  r  Z_linalg_detr  r  r  r  Zreflection_pad1dr  Zreplication_pad1dr  r  Zreflection_pad1d_backwardr  Zreplication_pad1d_backwardr  r  Zreflection_pad2dr  Zreplication_pad2dr  Zreflection_pad2d_backwardr  Zreplication_pad2d_backwardr  r   Zreflection_pad3dr  Zreplication_pad3dr  Zreflection_pad3d_backwardZreplication_pad3d_backwardr  Z_pdist_forwardr@   r  Z_pdist_backwardr
  Zbaddbmmr  Z	bernoullir  Z
bernoulli_r  r  r  Z_fused_moving_avg_obs_fq_helperr"  mmr(  r  r  r7  r>  ZconvolutionrD  rv  Z_has_mkldnnrw  rE  Z_convolution_pointwiserJ  Z_linear_pointwiserM  Zhas_mklry  rN  Z_mkl_linearrO  rz  rP  Zqconv2d_pointwiserY  Zqlinear_pointwiser   rZ  r{  r[  Z
max_pool2dre  ri  Z
avg_pool2dr  r  Zavg_pool2d_backwardr  Z
avg_pool3dr  Zavg_pool3d_backwardr  Z_adaptive_avg_pool2dr  Z_adaptive_avg_pool3dr  Z_adaptive_avg_pool2d_backwardr  Z_adaptive_avg_pool3d_backwardr  r  Zadaptive_max_pool2dr  r  r  Zadaptive_max_pool3dr  r  r  Zrepeat_interleaver  rO   r  r  ri   Z_unsafe_indexr  Zconvolution_backwardr  Zaddbmmr  Z_fused_adam_r  Z_fused_adamr  Z_int_mmr  Z_convert_weight_to_int4packr  Z_weight_int4pack_mmr  Z_weight_int8pack_mmr  Z_cdist_forwardr  Z_cdist_backwardr  Z_embedding_bagr  Z_embedding_bag_forward_onlyr  r  Znansumr  ZmedianZ	nanmedianr  Z
dim_valuesr  r   r!  Zlogical_not_r"  repeatr$  Zzero_r%  Zmul_ZScalarZdiv_Zlogical_and_Zlogical_or_Zlogical_xor_r(  Zadd_Zsub_r)  roundZdecimalsr,  r/  
__rshift__r2  
__lshift__r4  zeror5  r  r7  fillr8  Zrelu_r9  Z	index_putZ_unsafe_index_putr<  Zmasked_fill_r>  Z_masked_scaler?  Zmasked_scatter_rA  Zmasked_scatterrB  Zmasked_scatter_backwardrC  Z
index_put_rD  aliasrF  rH  ZbmmrI  rL  rO  rv  rw  r  r]  r  r_  Z max_pool2d_with_indices_backwardr_  Zmax_pool2d_with_indicesr`  Zfractional_max_pool2dri  Zmax_unpool2drp  rq  Zmax_unpool3drr  Zmax_pool3d_with_indicesry  Z max_pool3d_with_indices_backwardrz  r  r  r  Zgrid_sampler_2d_backwardr  r  r  r  r  r  r  Zselect_scatterr  Zslice_scatterr  r   r  r  Zgatherr  r  r  r  r  r  Zscatter_addr  Zscatter_add_r  r  r   r=  r   Zvalue_reducer  Zscatter_r  Z,_scaled_dot_product_flash_attention_backwardr  Z+_scaled_dot_product_flash_attention_for_cpur  Z4_scaled_dot_product_flash_attention_for_cpu_backwardr  Z0_scaled_dot_product_efficient_attention_backwardr  Z_flash_attention_backwardr  Z_efficient_attention_backwardZSymIntr  Z
_scaled_mmr  Zscatter_reducetwoZtwo_outr  Zscatter_reduce_r  Zmultinomialr  r  r  r  Z_upsample_nearest_exact1dr  Z_upsample_nearest_exact2dr  Z"_upsample_nearest_exact2d_backwardr  Z_upsample_nearest_exact3dr   r  Zvalues_stabler  Zargsortr  r%  Z_thnn_fused_lstm_cellr*  r4  r7  r9  r:  r;  Zargminr<  r=  Ztopkr@  r   rF  rE  rG  rH  Zpixel_shufflerJ  rK  Z	bucketizeZ
Tensor_outrM  ZhistcrR  Z_upsample_bilinear2d_aaZ_upsample_bicubic2d_aarS  rT  rV  r  rX  Zsearchsortedr[  r\  Z_embedding_bag_dense_backwardr^  Z*_embedding_bag_per_sample_weights_backwardr_  isinrc  Z	polygammard  Zchannel_shufflere  Z_local_scalar_denserf  rl  rm  Zspecial_airy_aiZspecial_bessel_y0Zspecial_bessel_y1Zspecial_modified_bessel_i0Zspecial_modified_bessel_i1Zspecial_modified_bessel_k0Zspecial_modified_bessel_k1Z!special_scaled_modified_bessel_k0Z!special_scaled_modified_bessel_k1Zspecial_chebyshev_polynomial_tZspecial_chebyshev_polynomial_uZspecial_chebyshev_polynomial_vZspecial_chebyshev_polynomial_wZ&special_shifted_chebyshev_polynomial_tZ&special_shifted_chebyshev_polynomial_uZ&special_shifted_chebyshev_polynomial_vZ&special_shifted_chebyshev_polynomial_wZspecial_hermite_polynomial_hZspecial_hermite_polynomial_heZspecial_laguerre_polynomial_lZspecial_legendre_polynomial_pZtorch._refs.nn.functionalZtorch._refs.specialr~  r)   r)   r)   r*   <module>   s  (	8	6

      
	

  !"   $



#
	
	











	

	



)      


2

   &
&
   7
 (
 
  &   
;  
/

 Z
&5 ?'  



 e
,
  
    "         M,H
    T
N


.


*
$c$#
     h	










!
T]?6G+      !T
7/


   g
e(

	    ,1	








	   " * 7    7     6
 


"  
7'
"  
   




	


C