U
    zh                    @   s.  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZmZmZmZmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlm  mZ d dlmZ d dlmZm Z  d dl!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, d dl-m.Z.m/Z/ d d	l0m1Z1m2Z2m3Z3m4Z4 d
dl5m6Z6 ddl7m8Z8m9Z9m:Z:m;Z; ddl<m=Z=m>Z> ddl:m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZI ddlmJZJmKZKmLZLmMZMmNZNmOZOmPZPmQZQmRZR ddlSmTZTmUZU eVeWZXi ZYeejZj[ede
f f e\d< i Z]eejZj[ede
f f e\d< e^ Z_eejZj[ e\d< ejTj`Z`ejTjaZaejTjbZbe^ ZceejZj[ e\d< e^ ZdeejZj[ e\d< e^ ZeeejZj[ e\d< ef ZgeejZj[ejZj[f e\d< ejTjhZhdd Zidd Zjdd Zkeje`jle`jme`jne`joe`jpe`jqe`jre`jse`jte`jue`jve`jwg ejxejyejzej{ej|ej}ej~ejejejejejejd Zed!d"d#Zd$d% Zd&d' Ze'd(d)d*Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3e'jd3fd4d5Zd6d7 ZdQd8d9ZdRd:d;ZdSd<d=ZdTeGejd>d?d@Zeebjdd(eGejd>dAdBZd3dCeGejd>dDdEZee`jjdd(eGejd>dFdGZd3dCeGejdHdIdJZeebjdd(eGejdHdKdLZddMe'jd3ddd3d3df	dNdOZdPdQ Ze  dUdRdSZee`jd3ddTdUdV Zee`jd3ddTdWdX Zee`je`je`je`jebjgdYdZ Zee`d[r
ee`je ee`jdd(dVd\d]Zee`jdd(dWd^d_Zee`jgdXd`daZee`jdbdc Zee`jddde Zee`jdfdg Zee`jdhdi Zee`jjdjdk Zee`jdldm Zee`jdd(dndo Zeebjdd(dpdq Zee`jdd(drds Zee`jdtdu Zee`jdd(ee`jdd(ee`jdd(dvdw Zee`jdd(dxdy Zee`jdd(dYd{d|Zee`jldd(dZd}d~Zlee`jdd(d[ddZee`jdd(d\ddZd]ddZeehjdd(eGeGeGeeeejeGdddZeehjdd(eGeGeGeeeejeGdddZeehjjdd(eGeeeeejeGdddZeehjjdd(eGeeeeejeGdddZeehjjdd(eGeGeGeeejeGdddZeehjjdd(eGeGeGeeejeGdddZee`j΃d^ddZee`jdd(d_eeedddZee`jdd(d`eeedddZee`jdd(daeeedddZee`jdd(dd Zee`jdd(dbddZee`jdd(dcddZee`jdd(ddddZee`jdd(dd Zee`jdd(dd Zee`jdd(dd ZؐdeddZee`jڃdfddZڐdgddZe ddd ZݐdhejjdddZdiejjdddZdjejjdddZdkddZdd ZeejTjjdd(dd Zee`jdd(dd Zee`jdd(ddĄ Zee`jjdd(ddƄ Zee`jddȄ Ze dddʄ Zdd̄ Zee`jjZee`jjZee`jjZee`jjZee`j ee`jdd΄ Zee`jddЄ Zee9jdd(dd҄ Zee9jdd(ejdӜddՄZee9jdd(ddׄ Zee9jdd(ddل Z ee9jdd(d dڜee eGeedۜdd݄Zee9jdd(d dڜeeee eGedޜddZee`jdd(d3d3deGeGeedddZdd Zdd Zdd Zd3dddZ	dhZ
dd Zee`j ee`j ee`j ee`j ee`jd3d ee`jjd3d ee`j ee`jd3d ee`j ee`j ee`j ee`j ee`j ee`j ee`jj ee`jj ee`j ee`jj ee`jj ee`j j ee`j! ee`j"d3d ee`j#d3d ee`jqe	 ee`j$e ee`j%e ee`j&e ee`j'e ee`j( ee`j) ee`j) ee`j* ee`j+ ee`j, ee`j- ee`j.e ee`j/ ee`j0 ee`j1 ee`j2 ee`j3 ee`j4 ee`j5 ee`j6 ee`j7e ee`j8 ee`j9e ee`j: ee`j; ee`j;j< ee`j= ee`j> ee`j? ee`j@ ee`jA ee`jB ee`jC ee`jD ee`jE ee`jF ee`jG ee`jH ee`jI ee`jJ ee`jK ee`jL ee`jM ee`jN ee`jO ee`jP ee`jQ ee`jR ee`jS ee`jT ee`jU ee`jVjW ee`jX ee`jY ee`jZ ee`j[ ee`j\ ee`j] ee`j^ ee`j_j ee`j`jd3d ee`jae eejbjcjd eejbjcje ee`jf ee`jg ee`jhe ee`ji ee`jj ee`jk ee`jl ee`jm ee`jnjo ee`jpjed3d ee`jqjed3d ee`jrjed3d ee`jsjed3d ee`jtjed3d ee`jujed3d ee`jvje ee`jwje ee`jxje ee`jyje ee`jzje	 ee`j{ ee`j|dd(dlddZ|ee`j}ddddZ}dd Z~ee`dree`je} eebjdd Zee`jdd(eedddZee`jdd(dmdd Zdd Zeeje`jgdddd3dddZeejdnddZeejdd	 Zee`jd
d Zee`jdd Zdd Zee`jdd(dd Zdd Zeeje`jgdddddddddZdd Zdd Zee`jeeZeedZeed Zdd Zee`jdddddddZee`jdddddd d!Zee`jdddddd"d#Zeebjjd$d% Zeeje`jgd&d' Zee`jdd(dod(d)Zee`jdd(dpd*d+Zd,d- Zd.d/ Zd0d1 Zee`jdd(d2d3 Zee`jdd(d4d5 Zee`jdqd6d7Zee`jdrd8d9Zd:d; Zd<d= Zee`jdd(dsd>d?Zee9jdd(dtd@dAZdBdC Zee9jdd3dDdEdF Zee`jdd(dudGdHZee`jdd(edIdJdKZddMdLejZj[eee edMdNdOZee`jdd(ddPeee dQdRdSZee`jdd(edIdTdUZee`jdd(edIdVdWZee`jdd(edIdXdYZee`jdd(dMdZeed[d\d]Zdveee df eed^d_d`Zee`jjdwee dadbdcZee`jjdxee dadddeZee`jujdyee ee dfdgdhZuee`jvjdzee ee dfdidjZvee`jjd{ee ee ee dkdldmZee`jjd|ee ee ee dkdndoZdpdq Zee`jee`jee`jdrds Zeebjjdtdu Zee`jdd(d}dvdwZejeejef dxdydzZejejd{d|d}Zejejejd~ddZd~ddZdd Zdd ZddddZeebjdd(dddZeebjdd(dd Zee`jr ee`jsjd3dZee`jsdd(dd ZsdddZǐdd Zee`jjd3dZee`jɃdd Zɐdd Zee`jjd3dZee`j̃dd Zee`jjd3dZϐdd Zee`j΃dd Zee`jjdddZee`jmjd3dZee`jjd3dZee`jmdd(dddZmee`jdd(dddZӐdd Zee`jnjd3dZee`jndd(dddZndd Zאdd ZؐdedddZِdd Zee`jۃddd!ddZېdd Zܐdd Zݐdd Zސdd Zee`jebjgddd3dddZee`jddd3dddÄZdĐdń ZedƐdǄ Zee`jjd3dZee`jjd3dZee`jjd3dZee`jdMdȍdɐdʄ Zddːd̄Zee`jd͐d΄ Zee`jdd(ddϐdЄZedѐd҄ ZedӐdԄ Zee`jdMdȍddՐdքZee`jgdMdȍdאd؄ Ze:jee:j dٜdڐdۄZeebjgdMdȍdܐd݄ Zee`je`jjgdMe'jdTdސd߄ Zee`jebjgdMdȍdd Zee`jdd Zee`jebjgddd!ddZee`jjZ ee`jjZee`jjZee`jjZee`jjZee`jdddZee`jdddZee`jdd Zee`jdd(dddZee`jdd(dddZee`j	ddd!ddZ	ee`j
dddZee`jdd(dddZee`jdd(dddZeebjeِd ee`jeِdZee`jeِdZee`jeِdej|dZee`jeِdej|dZee`jdMddZdd dZdd Zee`jZee`jZee`jZee`jZee`j Z ee`j!Z!ee`j"Z"ee`j#dMdZ#ee`j$ ee`j% ee`j&Z&ee`j'Z'ee`j(Z(ee`j)ddZ)ee`j*Z*ee`j+Z+ee`j,Z,ee`j- ee`j.Z.ee`j/e'jd(e. ee`j0 ee`j1 ee`j2 ee`jX ee`j3ddMejdZ3ee`j4ddMejdZ4ee`j5ddMejdZ5ee`j6ddMejdZ6ee`j7Z7ee`j8Z8ee`j9e7 ee`j:e8 ee`j;Z;ee`j&Z&ee`j<Z<ee`j= ee`j>ddZ>ee`j ee`j?ejd ee`j@e; ee`jAejd ee`jBejd ee`jCejd ee`jDejdZDee`jEejd ee`jFejd ee`jG ee`jH ee`jI ee`jJ ee`jK ee`jL ee`jM ee`jN ee`jO ee`jP ee`jQ ee`jR ee`jS ee`jT ee`jU ee`jV dd	lWmXZX d
d ZYeXD ]ZZeYe`eZD ],\Z[Z\Z]ee[eZe\e]d %qZeYebeZD ],\Z[Z\Z]ee[eZe\e]d %q%qFee`j^jedMdZ_ee`j^jedMdZ`ee`j^jedMd ee`jajeZbee`jajeZcee`jdje# ee`jdje# ee`jeje; ee`jfje& ee`jgje ee`jgjhe ee`jijeZjee`jijeZkee`jle! ee`jmje7 ee`jmje7 ee`jnje8 ee`jnje8 ee`joje7 ee`joje7 ee`jpje8 ee`jpje8 ee`jqe< ee`jre> ee`jse| dd Ztete`juje`j^je_ ete`juje`j^je` ete`jvje`jajeb ete`jvje`jajec ete`jwje`jijej ete`jwje`jijek dd Zxexe`jye exe`jze' exe`j{e( exe`j|e) exe`j}e* exe`j~e+ exe`je, exe`je exe`jje exe`jje exe`je3 exe`je4 exe`je5 exe`je6 exe`je# exe`je exe`je  ee`je' ee`je( ee`je* ee`je+ ee`je, exe`je`j exe`je`j exe`je`j exe`je`j exe`je`j ee`jdddZee`jjdd Zee`jjdd Zee`jdd Ze. D ]"\ZZee/ee *qee`jdd ZeejTjjdd ZeejTjjdd ZeejTj`jjdd  ZeejTj`jddd!d"Zd d#lmZ ee ee d$d% Zeed&d' ZeejTjjd(d) ZeejTjjd*d+ Zeedd(e:jed,d-d.ZeejTjbjjd/d0 ZeejTjjd1d2 Zzd dlZejTjZeejd3d4 Zeejd5d6 Zeejd7d8 Zeejd9d: Zeejd;d< Zeejd=d> Zeejd?d@ ZeejdAdB ZeejdCdD ZeejădEdF ZeejƃdGdH ZeejȃdIdJ ZeejTjʐj˃dKdL ZW n, e͐efk
.rd   eXϐdM Y nX ddNl7mАZ e6eЃ ddOl7mѐZ eѐҡ  eѐӡ  ddPl7mԐZ eԐա  dS (      N)defaultdict)AnyCallableDictListOptionalSetTupleUnion)patch)associative_scan_op) triton_kernel_wrapper_functionaltriton_kernel_wrapper_mutation)canonicalize_dimcanonicalize_dimscheckdtype_to_typeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDget_computation_dtypeis_boolean_dtypeis_float_dtypeis_integer_dtypeNumber)magic_methodsmethod_to_operator)CeilDivFloorDiv
IntTrueDivModularIndexing   )import_submodule   )configinductor_primsirtest_operators)decompositionsget_decompositions)
ExpandViewIndexingConstant	is_tritonops_wrapperPermuteView	Pointwise	ReductionSqueezeView	TensorBoxvalidate_irView)	ceildivdecode_device
is_dynamicis_gpuis_pointwise_use,needs_fallback_due_to_atomic_add_limitationspad_listlikesympy_productuse_scatter_fallback)opsV.	loweringslayout_constraints	fallbacksneeds_realized_inputsforeach_opsinplace_foreach_opsinplaceable_foreach_opsc                 C   s   | st d| d S )Nzinductor does not support NotImplementedError)condmsg rJ   J/var/www/html/venv/lib/python3.8/site-packages/torch/_inductor/lowering.py
assert_nyiS   s    rL   c                    sV   t  tttfrdd  D S t  t  tjjrRt	 fdd 
 D  d S )Nc                 S   s   g | ]}t |qS rJ   )add_needs_realized_inputs.0xrJ   rJ   rK   
<listcomp>Z   s     z-add_needs_realized_inputs.<locals>.<listcomp>c                 3   s   | ]}t  |V  qd S N)getattr)rO   overloadfnrJ   rK   	<genexpr>]   s    z,add_needs_realized_inputs.<locals>.<genexpr>)
isinstancelisttuplesetrB   addtorch_opsOpOverloadPacketupdate	overloadsrU   rJ   rU   rK   rM   X   s    
rM   c                 C   s8   t | tjjr,|  D ]}|tt| |< qn|t| < d S rR   )rX   r]   r^   r_   ra   r@   rS   )rV   
constraintrT   rJ   rJ   rK   add_layout_constraintb   s    rc   )r   r"   r                      	   
         dtypec                 C   s2   t | ts| S | tks&td|  dt|  } | S )Nzid z missing from DTYPE_ID_LOOKUP)rX   intDTYPE_ID_LOOKUPAssertionErrorrn   rJ   rJ   rK   decode_dtype   s
    
rs   c                 C   sF   t | tr"t|  p t|  S t | tjr8| jdkS t | tS d S )NT)	rX   r1   r   	get_dtyper   sympyExpr
is_integerrp   rP   rJ   rJ   rK   is_integer_type   s
    

ry   c                 C   s$   t | trt|  S t | tS d S rR   )rX   r1   r   rt   boolrx   rJ   rJ   rK   is_boolean_type   s    
r{   type_promotion_kindc                    s0   dd   fdd|D }t |d| i\}}|S )Nc                 S   sJ   t | ttjfr| S t| ds"tt|  }tj	dg| | 
 dS d S )Nrt   r"   rn   )rX   r   ru   rv   hasattrrr   lenget_sizer]   zerosrt   )inpdimrJ   rJ   rK   construct_input   s
    z+get_promoted_dtype.<locals>.construct_inputc                    s   g | ]} |qS rJ   rJ   rO   argr   rJ   rK   rQ      s     z&get_promoted_dtype.<locals>.<listcomp>r}   )r   )r}   argsZinps_ro   rJ   r   rK   get_promoted_dtype   s    	r   c                 C   sh   t | ttfs| g} nt| } t| D ]<}t |tjjr&| D ] }t||}|tkr@| 	| q@q&| S rR   )
rX   rY   rZ   r]   r^   r_   ra   rS   r?   append)aten_fnrV   rT   Zother_fnrJ   rJ   rK   get_overloads   s    
r   c                    s   dd t  D |s|rhrh|r*tjndd  D }t|d|i fddfdd D  |rrtt fddD  D ]\}}| |< qtt D ]8}t | t	j
rt | t d	    |< q S )
Nc                 S   s   g | ]\}}t |tr|qS rJ   rX   r1   rO   irP   rJ   rJ   rK   rQ      s     
 z"transform_args.<locals>.<listcomp>c                 S   s0   g | ](}t |ttjfs(t|d ddk	r|qS )ro   N)rX   r   ru   rv   rS   rO   arJ   rJ   rK   rQ      s   r}   c                    sD   t | trt| S t | tjr<t| j d   S | S d S Nr   )rX   r1   to_dtyper%   Constantvalue
get_device)r   )r   ro   indicesrJ   rK   promote   s
    

ztransform_args.<locals>.promotec                    s   g | ]} |qS rJ   rJ   r   )r   rJ   rK   rQ      s     c                    s   g | ]} | qS rJ   rJ   rO   r   r   rJ   rK   rQ      s     r   )	enumerater]   rz   r   zipbroadcast_tensorsranger   rX   r%   r   r)   createrY   r   )r   	broadcastr}   convert_input_to_boolZpromoting_argsr   rP   rJ   )r   ro   r   r   rK   transform_args   s*    $
&r   c                    s>   t   fdd}t| }t| tt|| |S )a  
    Add a foreach lowering to lowerings dict.

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                     s&   t | dkst | |}t| |S )Nr    )r   rr   r2   )r   kwargsout	decomp_fnrJ   rK   wrapped   s    
z+_register_foreach_lowering.<locals>.wrapped)	functoolswrapsr   rC   r`   r?   dictfromkeys)r   r   r   Zaten_fnsrJ   r   rK   _register_foreach_lowering   s    
r   c                    s<   t  fdd}t  tt | |S )a  
    Add a lowering to lowerings dict

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                     s   t | } d}t| dkr6t| d t tfr6d}| d } tdd | D rTtdtdd | D rtd	d  D stt	| } |r| g} | |}t
| |S )
NFr"   r   Tc                 s   s   | ]}|d kV  qdS )r   NrJ   rN   rJ   rJ   rK   rW   !  s    z6_register_lowering.<locals>.wrapped.<locals>.<genexpr>zout= ops aren't yet supportedc                 s   s   | ]}t |tV  qd S rR   r   rN   rJ   rJ   rK   rW   %  s     c                 s   s   | ]}|t kV  qd S rR   )rA   )rO   rV   rJ   rJ   rK   rW   %  s    )rY   r   rX   rZ   anykeysrr   valuesallr   r2   )r   r   unpackedr   r   r   r   r   r}   rJ   rK   r     s0    
   
z#_register_lowering.<locals>.wrapped)r   r   r   r?   r`   r   r   )r   r   r   r}   r   r   rJ   r   rK   _register_lowering	  s
    r   Fc                 C   s   t jt| |||dS )z+
    Shim to support decorator syntax.
    r   r}   r   )r   partialr   )r   r   r}   r   rJ   rJ   rK   register_lowering;  s    	r   c                 C   s   g }t jt| t|tddD ]v\}}|dkr>|| q"|dkrR|| q"tjj	|| t
t|jt
t|jk r|| q"|| q"tt|S )z
    Broadcasting logic based on symbolic shapes.

    We give the shapes 0 and 1 concrete values, while all other shapes
    are symbolic sympy formulas.
    r"   )	fillvalue)	itertoolszip_longestreversedru   Integerr   r>   graphsizevarsguard_equalsr   expandZfree_symbolsrZ   )r   boutputrP   yrJ   rJ   rK   broadcast_symbolic_shapesM  s       r   c                    s*  |d ks|d kst d|d kr.|d kr.tj}tdd | D sD| S tdd | D r|pft| d|ifdd  fdd	| D S td
d | D }g }| D ]}t|tt	fr|
tt|| | t|  qt|tjr|
tt|| | t|  q|
| q|S )NzEonly one of override_return_dtype or type_promotion_kind may be givenc                 s   s    | ]}t |tjttfV  qd S rR   )rX   ru   rv   rp   floatrN   rJ   rJ   rK   rW   m  s     z$promote_constants.<locals>.<genexpr>c                 s   s    | ]}t |tttjfV  qd S rR   )rX   rp   r   ru   rv   rN   rJ   rJ   rK   rW   o  s     r}   c                    s4   t | tjrt|  td S t|  td S d S rR   )rX   ru   rv   r%   r*   r5   r   rx   rn   rJ   rK   
const_funct  s    z%promote_constants.<locals>.const_funcc                    s   g | ]} |qS rJ   rJ   rN   )r   rJ   rK   rQ   z  s     z%promote_constants.<locals>.<listcomp>c                 s   s$   | ]}t |tttjfr|V  qd S rR   )rX   r1   r)   r%   r   rN   rJ   rJ   rK   rW   {  s      )rr   r   DEFAULTr   r   r   nextrX   rp   r   r   r)   r   r%   r   rt   r   rY   r   ru   rv   r*   )inputsoverride_return_dtyper}   exr   rP   rJ   )r   ro   rK   promote_constantse  sJ     

r   c              	      s,   d dt t d fdd}|S )Nalphar   c              	      sv  
d k	r&t tt|r&rt
| S t|	}r`| d k	rl| dkrlt|}t|d | |d< n| d ksltdd |D |d  	p|d   t	|d 
 jdk|dd  D ]B}t|tjstt| kstd d d|  q fd	d
}sZd }|D ]$}t|
 jr"|
 } qHq"|sZ|d 
 }pb|}tj| |dS )Nr"   c                 S   s   g | ]}|  qS rJ   make_loaderrN   rJ   rJ   rK   rQ     s     z1make_pointwise.<locals>.inner.<locals>.<listcomp>r   cudazndim mismatch  c                    s   t  t ks$td  d tjkrLd k	rL fddD  S rtrttjkrt fddD  S  fddD  S d S )Nzwrong ndim r   c                    s   g | ]}| qS rJ   rJ   rO   loadindexrJ   rK   rQ     s     zCmake_pointwise.<locals>.inner.<locals>.inner_fn.<locals>.<listcomp>c                    s   g | ]}| qS rJ   rJ   r   r   rJ   rK   rQ     s     c                    s   g | ]}| qS rJ   rJ   r   r   rJ   rK   rQ     s     )r   rr   r]   rz   float64r   )ro   rV   is_cudaloadersoverride_fn_when_cuda_float64override_fn_when_input_boolrangesr   rK   inner_fn  s    $z/make_pointwise.<locals>.inner.<locals>.inner_fndevicero   r   r   )r   mapr+   rr   r   rY   mulr   rt   r5   r   typerX   r%   BaseConstantr   r7   r.   r   )r   r   otherr   r   r   allow_alpharV   override_devicer   r   r   triton_fallback)ro   r   r   r   rK   inner  sF    
	

zmake_pointwise.<locals>.innerr   r1   )rV   r   r   r   r   r   r   r   rJ   r   rK   make_pointwise  s    	(1r   c                    s&   ddt t t  d fdd}|S )Nr"   r   r   c                    s  dd }t tjjjdkp&tjjjtk}tjjjD ](}|jD ]}|jdkrT|jtks<d}q<q2d }|D ]}t	|t
tfrd|} qqd|d k	stdg }|D ]2}t	|t
tfs||gt |  q|| q|t| }	d gt | }
|	 D ]|\\}}}g }|D ]V\}} r |d| i}n| }||
|< t|jr|r|r||  q|rtj| qtdd	 |
D st|
S )
Nc                 S   st   t t}t| D ]^\}}t|  }d }|D ]}t|tr*|j } qHq*|d k	sXtd|||f 	||f q|S )Nz.foreach op should have at least one tensor arg)
r   rY   r   r6   rX   r1   datar   rr   r   )Z	arg_pairsr   r   r   use_foreachr   trJ   rJ   rK   
group_args  s    


z9make_foreach_pointwise.<locals>.inner.<locals>.group_argsr   Zcall_functionTz1at least one input must be a list to a foreach opr   c                 s   s   | ]}|d k	V  qd S rR   rJ   rN   rJ   rJ   rK   rW     s     z8make_foreach_pointwise.<locals>.inner.<locals>.<genexpr>)r   r>   r   current_nodeuserstargetrD   oprC   rX   rY   rZ   rr   r   r   itemsr7   r   realizeZregister_listr   )r   r   r   Zrealize_outputsnodeuserZa_list_inputinputZbroadcast_inputsgroupsoutputsr   r   groupZbuffer_listZ
output_indr   r   r   pw_fnrJ   rK   r     sP    
z%make_foreach_pointwise.<locals>.innerr   )r   r   r   rJ   r   rK   make_foreach_pointwise  s    "Er   rP   ro   c                    s>   |    kr |rt| S | S  fdd}t| d| S )Nc                    s   t j|  dS )N)	src_dtype)r=   r   rx   ro   r  rJ   rK   	_to_dtype  s    zto_dtype.<locals>._to_dtyper   )rt   cloner   )rP   ro   copyr  rJ   r  rK   r     s
    r   c                 C   sZ   |j s|  j rL|  r6t| |d}tj||  |S ttj	j
dd| |S t| |ddS )Nrn   Fadd_to_fallback_setTr  )
is_complexrt   r   
empty_liker%   ZInplaceCopyFallbackr   fallback_handlerprimsconvert_element_typedefaultr   )rP   ro   dstrJ   rJ   rK   _convert_element_type"  s      r  r
  c                   sp   |    kr |rt| S | S dd }|}| }||krRttjj|  S  fdd}t| d| S )Nc                 S   s"   | j rt| jS t| jS d S rR   )is_floating_pointr]   finfobitsiinforn   rJ   rJ   rK   _get_primitive_bitwidth7  s    z1to_dtype_bitcast.<locals>._get_primitive_bitwidthc                    s   t |  S rR   )r=   to_dtype_bitcastrx   ro   Zx_dtyperJ   rK   _to_dtype_bitcastC  s    z+to_dtype_bitcast.<locals>._to_dtype_bitcastr  )rt   r  r  atenviewro   r   )rP   ro   r  r  Zsrc_bitsZdst_bitsr  rJ   r  rK   r  2  s    r  c                 C   s<   |j s|  j r.ttjtjjj	j
| |S t| |ddS NTr
  )r  rt   r1   r   r%   ZComplexViewr]   r=   r  r  ro   r  r  rJ   rJ   rK   _view_dtypeN  s
    r  rP   r   c                C   s8   t |}|  |kr$|r t| S | S ttj| |S rR   )r5   r   r  r1   r   r%   Z
DeviceCopy)rP   r   r  rJ   rJ   rK   	to_deviceW  s    r   c                 C   s   t | |ddS r  )r   r  rJ   rJ   rK   _device_put^  s    r!  Tc
                 C   s   |p| j }t|}
|r"td| }|dk	r2t|}t|
|||rB|nd||	d}
t| |||d|
}
tt|rttt|d|d|
 |
S )z3A pointwise function that maps ops.{name} to inputsZ
libdevice_N)r   r   r   r   r   r   )r}   r   )__name__r,   r   r   r~   r  rS   )r   namer   r}   r   r   r   r   use_libdevice_for_f64r   rV   Zfn_libdevicerJ   rJ   rK   register_pointwisec  s>    


r%  c                     sx   d} t d  fdd} fdd}t|t|tjdgfdd}ttj|}tt| rttt	t| d	d
| |S )z2A pointwise function that maps ops.frexp to inputsfrexpc                     s    | |d S r   rJ   r   r   r&  rJ   rK   frexp0  s    zregister_frexp.<locals>.frexp0c                     s    | |d S Nr"   rJ   r'  r(  rJ   rK   frexp1  s    zregister_frexp.<locals>.frexp1r  c                     s    d | | d | |fS Nr   r"   rJ   r'  )pw_fnsrJ   rK   rV     s    zregister_frexp.<locals>.fnNr|   )
r,   r   r]   int32r   r  r&  r~   r  rS   )r#  r)  r+  rV   rJ   )r&  r-  rK   register_frexp  s*    
r/  c                 C   s   t ||d}t| |}|S )Nr   )r   r   )r   Zpointwise_lowering_fnr   rV   rJ   rJ   rK   register_foreach_pointwise  s    
r1  )r   r}   c                    s  dd }t |ttfr"t||}t |ttfr<t||}| ||g t d  d tjd}dd t D }t|t	 fdd|D  D ]\}}| |< qt
t D ]8}t  | tjrt | t |d	    |< qt||d
 d	 t d |t d |S )Nc                  W   s
   t j|  S rR   )r=   wherer   rJ   rJ   rK   rV     s    zwhere.<locals>.fnr"   r    r|   c                 S   s   g | ]\}}t |tr|qS rJ   r   r   rJ   rJ   rK   rQ     s     
 zwhere.<locals>.<listcomp>c                    s   g | ]} | qS rJ   rJ   r   r   rJ   rK   rQ     s     r   r  )rX   r   rp   constant_liker   r   r   r   r   r   r   r   r%   r   r)   r   rY   r   r   r   )rH   r   r   rV   ro   r   r   rP   rJ   r   rK   r2    s,    
  $
&
  r2  c                  G   s   t | dkr*t| d ttfr*t| d  S ttdd | D g }g }| D ]H}| }t |t |ks~t	dd t
||D rt||}|| qJ|S )Nr"   r   c                 S   s   g | ]}|  qS rJ   )r   rN   rJ   rJ   rK   rQ     s     z%broadcast_tensors.<locals>.<listcomp>c                 s   s2   | ]*\}}|d kr|d kp(|d ko(|d kV  qdS r"   NrJ   rO   r   r   rJ   rJ   rK   rW     s    z$broadcast_tensors.<locals>.<genexpr>)r   rX   rY   rZ   r   r   reducer   r   r   r   r   r   )r   r   r   rP   sizesrJ   rJ   rK   r     s       
r   c                 C   s   | S rR   rJ   rx   rJ   rJ   rK   nop  s    r8  
lift_freshc                 C   s   t | tst|d kr&tt| jS tt|  |}t	t |t
sJ|fn|}g }t|  D ]0\}}||krtjjt|ds`|| q`||  krt| |S | S r*  )rX   r1   rr   r0   r   r   r   r   r   r[   rZ   r   r>   r   r   evaluate_exprru   Eqr   r  )rP   r   dims	new_shapedsrJ   rJ   rK   squeeze  s    r@  c                 C   s   t t| |S rR   )r  r@  )rP   r   rJ   rJ   rK   squeeze_copy  s    rA  c                 C   s2   t | |}t| tstt|ts&t|j| _| S rR   )r@  rX   r1   rr   r   rP   r   valrJ   rJ   rK   squeeze_  s
    
rD  c                 C   s2   t | rt| dtjdS td}t|tjd| S )NFrn   isinfr  ry   	full_liker]   rz   r,   r   rP   rV   rJ   rJ   rK   rE    s    rE  c                 C   s2   t | rt| dtjdS td}t|tjd| S )NFrn   isnanr  rF  rH  rJ   rJ   rK   rI    s    rI  c                 C   s$   t | rt| S td}t|| S )Nceilry   r  r,   r   rH  rJ   rJ   rK   rJ    s    rJ  c                 C   s$   t | rt| S td}t|| S )NfloorrK  rH  rJ   rJ   rK   rL  '  s    rL  c                 C   s(   t | rt| S td}t|| S d S )NroundrK  rH  rJ   rJ   rK   rM  /  s    rM  c                 C   s$   t | rt| S td}t|| S )NtruncrK  rH  rJ   rJ   rK   rN  8  s    rN  c                 C   s   ddl m} t| g\} t| tjr4t| t|S t| t	sBt
t|ttfsTt
t|  t|krl| S ||  stjjt|  }|dkr||s| tjjt||  t	t| jt|S )Nr   )free_unbacked_symbols)%torch.fx.experimental.symbolic_shapesrO  r   rX   r%   r   r)   r   rZ   r1   rr   rY   r   r>   r   r   	size_hintr;   
mark_reuser   )rP   r7  rO  Zx_size_productrJ   rJ   rK   r   @  s    r   c                 C   sL   t |}|D ]}d||< q| }t|D ]\}}|dkr&t||}q&t||S Nr   )rY   r   	unsqueezer   )r   shapeZbroadcast_dimensionsr?  Zbroadcast_dimensionvidxrP   rJ   rJ   rK   broadcast_in_dimZ  s    
rX  c                 C   s   t | | S rR   )r   r   )rP   r   rJ   rJ   rK   	expand_ash  s    rY  c                    sV  t |   tt krJtdgtt      t| t  } tt|  ksbtt |  }d}ttD ](}| dkrd}|| |  ||< q~|rt|| 	 | 
 dS tdd t D rtt| |S  fdd	}tjjt }|dkr0| tjjt||  |  tj| 
 | 	 |t |d
S )Nr"   Fr   Tro   r   c                 s   s"   | ]\}}|d kp|d kV  qdS r4  rJ   r5  rJ   rJ   rK   rW     s     zrepeat.<locals>.<genexpr>c                    sv   t | t kstt| } tt D ]D}| dkr( | dkrTtd| |< q(t| | d | | |< q(| S )Nr"   r   )r   rr   rY   r   ru   r   r   )r   r   Zold_sizerepeatsx_loaderrJ   rK   r     s    zrepeat.<locals>.inner_fnr   )rY   r   r   ru   r   r  rr   r   emptyrt   r   r   r   r  r   r>   r   r   rQ  r;   rR  r   r.   r   )rP   r\  new_sizeZzero_tensorr   r   Zold_size_productrJ   r[  rK   repeatm  s8     
r`  c                 C   s2   t | tstt |ttfs ttt| j|S rR   )rX   r1   rr   rY   rZ   r3   r   r   )rP   r7  rJ   rJ   rK   r    s    r  c                 C   s6   t | tstt |ttfs ttt| jt|S rR   )rX   r1   rr   rY   rZ   r-   r   r   )rP   r<  rJ   rJ   rK   permute  s    ra              c              	   C   s8   t | tstt| |d}ttjj| j|||||dS )Nr   clamp)rX   r1   rr   _validate_dimr%   	SliceViewr   r   )rP   r   startendsteprd  rJ   rJ   rK   slice_  s    rj  c              	   C   s   t | tr"t | jtjr"| j } |   t| sDtd|  dt	| \}}t
|j|jdd |D dd |D t|pd}tt||S )Nzunrealized as_strided(z, ...)c                 S   s   g | ]}t |qS rJ   ru   r   rO   r?  rJ   rJ   rK   rQ     s     zas_strided.<locals>.<listcomp>c                 S   s   g | ]}t |qS rJ   rk  rl  rJ   rJ   rK   rQ     s     r   )rX   r1   r   r%   BaseViewunwrap_viewr   is_storage_and_layoutrG   as_storage_and_layoutFixedLayoutr   ro   ru   r   ReinterpretView)rP   sizestridestorage_offsetstorageZ
old_layoutZ
new_layoutrJ   rJ   rK   
as_strided  s    

rw  c                 C   s$   t | tstt| |||j| _| S rR   )rX   r1   rr   rw  r   )rP   rs  rt  ru  rJ   rJ   rK   as_strided_  s    rx  c                 C   s   t | |||}t|S rR   )rw  r  )rP   rs  rt  ru  resultrJ   rJ   rK   as_strided_copy  s    rz  c                    s   g d}D ]*} |||    f d d }qdd D  fdd}td  }d d | < tjd  d  ||dS )Nr   r   c                 S   s   g | ]}|  qS rJ   r   rO   r   rJ   rJ   rK   rQ     s     z!pointwise_cat.<locals>.<listcomp>c           
   	      s>  t |  tj}g }g }ttD ]؉  dkr@t dtjnt   d tj}t   d tj}t ||}t ||} dkr|}n" td kr|}nt 	||}|
| t|     d 8  < |
t | fddd q&|d }	ttd ddD ] t |  |  |	}	q|	S )Nr   r"   c                      s     S rR   rJ   rJ   )r   idx_loadinputs_loadersrJ   rK   <lambda>       z1pointwise_cat.<locals>.inner_fn.<locals>.<lambda>        r   r    )r=   
index_exprr]   int64r   r   constantgeltand_r   rY   maskedr2  )
rW  idx_dimZmasksZmasked_loadsrg  rh  Z
start_condZend_condmaskZnext_valr   r   r}  Zinputs_ranges)r   r|  rK   r     sD    
zpointwise_cat.<locals>.inner_fnr   )r   r   rY   r.   r   r   rt   )r   r   Zprev_endr   r   r_  rJ   r  rK   pointwise_cat  s    .

r  )r   scaleszero_pointsaxis	quant_min	quant_maxro   returnc              	      s   t  dkstdt  dks0td|  tjkrJt| tj} |  tjksjtd|    t |  k stdt |   |     f	dd}t	j
|  ||  dS )	Nr"   expect scales 1 dimexpect zero_points 1 dim<Expecting input to have dtype torch.float32, but got dtype: Expecting axis to be < c           
         s   |   f}| }|}|}t tjd\}}jtjkrPt|tj}jtjkrjt|tj}t|}t|| | }t	|t
||}	t|	S Nrn   )_create_constantsr]   float32ro   r=   r   r.  
reciprocalrM  maximumminimum)
rW  channel_idxr   scale
zero_pointqminqmax	inv_scalerC  clamped	r  ro   input_loaderr  r  r  scales_loaderr  zero_points_loaderrJ   rK   r   3  s    

z;quantized_decomposed_quantize_per_channel.<locals>.inner_fnr   )r   r   rr   rt   r]   bfloat16r   r  r   r.   r   r   r   r  r  r  r  r  ro   r   rJ   r  rK   )quantized_decomposed_quantize_per_channel  s.    
r  c                    s   t  dkstdt  dks0td|  |ksTtd| d|    t |  k sztdt |   |     fdd}tj|  tj	||  d	S )
Nr"   r  r  Expecting input to have dtype , but got dtype: r  c                    st   |   f}| }|}|}j tjkr<t|tj}j tjkrVt|tj}tt|tj|| }|S rR   )ro   r]   r  r=   r   sub)rW  r  r   r  r  rC  r  r  r  r  r  r  rJ   rK   r   e  s    
z=quantized_decomposed_dequantize_per_channel.<locals>.inner_fnr   
r   r   rr   rt   r   r.   r   r   r]   r  r  rJ   r  rK   +quantized_decomposed_dequantize_per_channelL  s*    
r  )r   r  r  r  r  ro   r  c                    s   |   tjkrt| tj} |   tjks:td|    |   fdd}tj| 	  t
j|t|t|d|  dS )Nr  c           	         sf   | }t d| |tjd\}}t|| | }t tjd\}}tt|||}t| S )N      ?rn   )r  r]   r  r=   rM  r  r  r   )	rW  r  r  r   r  rC  r  r  r  ro   r  r  r  rJ   rK   r     s      
zBquantized_decomposed_quantize_per_tensor_default.<locals>.inner_fnr  r  r   )rt   r]   r  r   r  rr   r   r.   r   r   r   r   r   rp   r   r   r  r  r  r  ro   r   rJ   r  rK   0quantized_decomposed_quantize_per_tensor_default{  s$    
  r  c                    sh   |   |ks$td| d|    |    fdd}tj|  tjtj	|t
|t|d|  dS )Nr  r  c                    s:    | }t ||tjd\}}tt|tj|| }|S r  )r  r]   r  r=   r  r   )rW  r  r  r   rC  r  rJ   rK   r     s    zDquantized_decomposed_dequantize_per_tensor_default.<locals>.inner_fnr  r   )rt   rr   r   r.   r   r   r]   r  r   r   r   rp   r   r  rJ   r  rK   2quantized_decomposed_dequantize_per_tensor_default  s     
  r  c                    s   |   tjkrt| tj} |   tjks:td|    t dksrt dkrj d dksrtdt dkst dkr d dkstd|     fdd}t	j
|   ||  dS )	Nr  r   r"   expect scale as scalar tensor"expect zero_point as scalar tensorc                    s   | }t  dkrdnd}t  dkr:dnd}jtjkrZt|tj}jtjkrtt|tj}t|t| | }t	tjd\}}t
t|||}t| S )Nr"   r   rJ   rn   )r   r   ro   r]   r  r=   r   rM  r  r  r  r  )rW  r   _scale_zero_pointrC  r  r  r  ro   r  r  r  r  scale_loaderr  zero_point_loaderrJ   rK   r     s    zAquantized_decomposed_quantize_per_tensor_tensor.<locals>.inner_fnr   )rt   r]   r  r   r  rr   r   r   r   r.   r   r   r  rJ   r  rK   /quantized_decomposed_quantize_per_tensor_tensor  s<    r  c                    s   t  dks8t  dkr0 d dks8tdt  dkspt  dkrh d dksptd|  |kstd| d|   |      fdd}tj|  tj	||  d	S )
Nr   r"   r  r  r  r  c                    s    | }t  dkrdnd}t  dkr:dnd}jtjkrZt|tj}jtjkrtt|tj}tt|tj|| }|S )Nr"   r  rJ   )r   r   ro   r]   r  r=   r   r  )rW  r   r  r  rC  r  r  r  r  r  rJ   rK   r     s    zCquantized_decomposed_dequantize_per_tensor_tensor.<locals>.inner_fnr   r  r  rJ   r  rK   1quantized_decomposed_dequantize_per_tensor_tensor  s8    
r  c           
         s  | d   jdk}|rrtdd | D rr| D ]}|  q,tdd | D r`ttjf|  \} }ttjj| |S t	| dkrt
| d S t| d |d}t| dtjifdd	| D } tttjf tjd
dddd fddtfdd| D }tdfdd|r2ttj| |S fddd}d t	| |kst	| tjkrt fdd| D rtdd tjjD }tfdd| D o|}tfdd| D otfdd| D  }	|s|	r|st| |S ttj| |S )Nr   cpuc                 s   s"   | ]}|  tjtjfkV  qd S rR   )rt   r]   int8uint8rO   r   rJ   rJ   rK   rW   "  s    zcat.<locals>.<genexpr>c                 s   s   | ]}t | d kV  qdS )re   N)r   r   r  rJ   rJ   rK   rW   )  s     r"   r}   c                    s   g | ]}t | qS rJ   r   r{  rn   rJ   rK   rQ   4  s     zcat.<locals>.<listcomp>rP   r  c                 S   s>   t | tr(t | jtjr"| j S | jS t | tjr:| jS | S rR   )rX   r1   r   r%   rm  rn  
StorageBoxrx   rJ   rJ   rK   unwrap_tensor6  s    

zcat.<locals>.unwrap_tensorc                 S   s   t | tjot | jtjS rR   )rX   r%   ComputedBufferr   r/   r   rJ   rJ   rK   is_reductionB  s    zcat.<locals>.is_reductionc                    sJ   t | ttjfr | S | pHt | tjoHt fdd|  D S )Nc                 3   s   | ]} t j|V  qd S rR   )r>   r   
get_buffer)rO   readcan_fuse_reductionrJ   rK   rW   K  s   z2cat.<locals>.can_fuse_reduction.<locals>.<genexpr>)rX   r1   r%   r  r.   r   get_read_namesr  )r  r  r  rJ   rK   r  E  s    zcat.<locals>.can_fuse_reductionc                 3   s   | ]} |V  qd S rR   rJ   rO   r   r  rJ   rK   rW   R  s     )r  c                    sZ   t | r*t j| dd\}}t j| S t| tt jfrF | S t| t jrVdS dS )NF)freezeT)	r%   ro  rp  ConcatKernelZcan_realize_into_without_copyrX   r1   r  r.   )rP   rv  r   )should_lower_cat_inputr  rJ   rK   r  T  s    
z#cat.<locals>.should_lower_cat_inputc                    sZ   t | ttjfr | S t | tjs,dS |  }|  D ]}| tj	|7 }q<|S r   )
rX   r1   r%   r  r.   Zinner_fn_opcountr  r>   r   r  )rP   countr  )op_countr  rJ   rK   r  h  s    zcat.<locals>.op_countri   r    c                 3   s   | ]}| kV  qd S rR   rJ   r  )MAX_SIMPLE_OP_COUNTr  rJ   rK   rW     s     c                 s   s   | ]}t |V  qd S rR   )r8   )rO   ZuserJ   rJ   rK   rW     s     c                 3   s   | ]} |V  qd S rR   rJ   r{  r  rJ   rK   rW     s     c                 3   s   | ]} |V  qd S rR   rJ   r{  r  rJ   rK   rW     s    c                 3   s   | ]} |V  qd S rR   rJ   r  r  rJ   rK   rW     s     )r   r   r   r   require_channels_lastr  catr  r  r   r  re  r   r   r   r
   r1   r%   r  IRNoder   rz   r  r   r#   Zmax_pointwise_cat_inputsr>   r   r   r  )
r   r   Z
cpu_devicer   r   Zfusable_reductionZMAX_COMPLEX_POINTWISE_CATZpointwise_usesZfuse_pointwise_useZhorizontal_fuse_catrJ   )r  r  ro   r  r  r  r  rK   r    sX    


r  )offsetdim1dim2c                    s  |   ttdtdtkfdd tjjt	|d}|rtjj
tjj |  d}n(tjj
tjj  | d}d |r| df nd|f fddtD }||  fdd	}ttj| ||S )
N)rW  rankc                      s   d  d S )Nz(diagonal dimensions cannot be identical z, rJ   rJ   r  r  rJ   rK   r~    r  zdiagonal.<locals>.<lambda>r   )r   r   c                    s    g | ]\}}| fkr|qS rJ   rJ   )rO   r   r?  r  rJ   rK   rQ     s      zdiagonal.<locals>.<listcomp>c                    s   | d }dgt  }d}tD ]L}|kr@| d  ||< q"|krZ| d  ||< q"| | ||< |d7 }q"|t d kst|S )Nr   r   r"   r    )r   r   rr   )rW  Zdiag_idxZoriginal_idxZcur_dimr>  Zbase_idxr  r  Znum_dimsZoriginal_shaperJ   rK   	reindexer  s    
zdiagonal.<locals>.reindexer)r   r   r   r   r>   r   r   r:  ru   LtZevaluate_maxZevaluate_minr   r   r1   r%   GenericViewr   )r   r  r  r  Zoffset_negativeZ	diag_sizer7  r  rJ   r  rK   diagonal  s@     
  

r  c                 C   s   t t| |||S rR   )r  r  )r   r  r  r  rJ   rJ   rK   diagonal_copy  s    r  c                 C   s$   t | }t||||}t|| |S rR   )r  r  	mutate_to)r   srcr  r  r  r   r   rJ   rJ   rK   diagonal_scatter  s    
r  c                 C   s,   t ||  | }tt| |||d |S r*  )r3   Zhandle_negative_indexr   r@  rj  )rP   r   rW  rJ   rJ   rK   select  s    r  c           	   
   C   s   t | |d}t|tjr&tjj|}t|ttj	frbtjj| 
 | }|g|| d |  }g }d}|D ](}|| }|t| ||||d |}qn|S )Nr   r"   rc  )re  rX   ru   rv   r>   r   r   evaluate_static_shaperp   r   r   r   rj  )	rP   r7  r   rd  x_sizery  rg  rs  rh  rJ   rJ   rK   split  s    r  c                 C   s   t | ||ddS )NFrc  )r  )rP   r7  r   rJ   rJ   rK   split_with_sizes  s    r  c                 C   sJ   t | |d}tjj|  | }g }t|D ]}|t| || q.|S r   )	re  r>   r   r   r  r   r   r   r  )rP   r   r  ry  r   rJ   rJ   rK   unbind  s    r  c                    s   |   }t|}t|| |dkr4tt| d|dS |  }tjj}||| |	d t
|| d }||dkr| |t|| | |d   |f| d d  |f}	 fdd}
ttj| |	|
S )Nr   )rh  r"   c                    s4   | d |     }| d   |f|  d d S )Nr   r"   rJ   )rW  Zdim_idxr   ri  rJ   rK   r    s    zunfold.<locals>.reindexer)r   r   r   rj  rT  r>   r   r   	guard_leqguard_ltr   rQ  rR  r   r1   r%   r  r   )rP   	dimensionrs  ri  r7  ndimdim_sizer   Znew_dim_sizeZout_sizer  rJ   r  rK   unfold  s    
$r  c                 C   s4   t | |d}t|  }||td t| |S r*  )re  rY   r   insertru   r   r  )rP   r   r=  rJ   rJ   rK   rT    s    rT  c                 C   s2   t | |}t| tstt|ts&t|j| _| S rR   )rT  rX   r1   rr   r   rB  rJ   rJ   rK   
unsqueeze_"  s
    
r  c                 C   sP   t |tstt|  }|dk r.||| 7 }d|  krF|| k sLn t|S r   )rX   rp   rr   r   r   )rP   r   r  r  rJ   rJ   rK   re  +  s    re  r   c                 C   sT   t | |d}tjj|  | d }t| |d|}t| |||d }t|t|S )Nr   r    )	re  r>   r   r   r  r   rj  r   sigmoid)rP   r   Znew_lenr   r   rJ   rJ   rK   glu4  s
    r  c                    s   |rt    fdd}|S )Nc                     s   t tjtjj f| |S rR   )pytreetree_mapr1   r   r%   FallbackKernelr'  kernelrJ   rK   handlerB  s     z!fallback_handler.<locals>.handler)rA   r\   )r   r	  r  rJ   r  rK   r  >  s    
r  c                   C   s   t d d S )NzjTorchinductor does not support code generation for complex operators. Performance may be worse than eager.)warningswarnrJ   rJ   rJ   rK   _warn_complex_not_supportedJ  s    r  r  c                 C   s<   |   r8|r.|jtjjjjtjjjj	fkr.dS t
  dS dS )z0Do not support reading or writing to this tensorFT)r  r   r]   r=   r  r  ro   r  r  r  r  r   parentrJ   rJ   rK   unsupported_input_tensorS  s    

r  c                 C   s   t | |rdS | jotjS )z2Do not support writing tensor but can read from itT)r  Zis_cpur#   Zdisable_cpp_codegenr  rJ   rJ   rK   unsupported_output_tensora  s    
r  )r   c                 C   sd   | j tjjkrdS | j tjjkr$dS dd }tj| j| jD ]}||| ddr< dS q<|| | ddS )NFc                 S   sp   t | tjjsdS d| jkr dS t| jd D ]:}t |tjjsDq0|rZt	||rj dS q0t
||r0 dS q0dS )NFrC  T)rX   r]   fxNodemetar  tree_leaves_subclasses
FakeTensorr  r  )r   r  	is_outputr  rJ   rJ   rK   check_skip_conditionq  s    


zCfallback_node_due_to_unsupported_type.<locals>.check_skip_condition)r  T)	r   r  view_as_complexr  lift_fresh_copyr  Zarg_tree_leavesr   r   )r   Zallow_cpu_inputsr  r   rJ   rJ   rK   %fallback_node_due_to_unsupported_typeh  s    r  c                    s   | t kstd|  |rtttdrtt| grttjrF| tj	j
jksttjjjrddtjj_td td|  d fdd}t| tjjr|  D ]}t| |}|| qn8t| tjjtjjfr||  ntd	|  d
t|  d S )Nz*both a fallback and a decomp for same op: CIFznA make_fallback error occurred in suppress_errors config, and suppress_errors is being disabled to surface it.zmake_fallback(a.  ): a decomposition exists, we should switch to it. To fix this error, either add a decomposition to core_aten_decompositions (preferred) or inductor_decompositions, and delete the corresponding `make_fallback` line. Get help from the inductor team if unsure, don't pick arbitrarily to unblock yourself.c                    s.   t |   d k	rt|   t| d dt| S Nr|   )rM   rc   r   r  )op_overloadlayout_constraintrJ   rK   register_fallback  s    

z(make_fallback.<locals>.register_fallbackzUnsupported fallback z with type )r'   rr   rz   osgetenvr(   r#   fallback_randomr]   _decompZdecompositions_for_rngZextra_random_decompsZ_dynamoZsuppress_errorslogwarningrX   r^   r_   ra   rS   
OpOverloadZHigherOrderOperatorRuntimeErrorr   )r   r  r  r  olr  rJ   r  rK   make_fallback  s6    




r#  c                 C   s$   d}| D ]}|| }qt |tjdS )z
    TorchInductor offset calculation differs from PyTorch eager offset
    calculation for random ops (tl.rand vs torch.rand). In future, we should
    strive for same impl for tl.rand and torch.rand.
    r"   rn   tensorr]   r  )rU  Znumelr?  rJ   rJ   rK   philox_rand_offset  s    
r&  c           	         sd   t | | t j|  | |  fdd}tj| |t| d}t	| }||fS )Nc                    sV   t g tj}t g tj}t t | tj|}t ||}t | S rR   )r=   r   r]   r.  r\   r  rand)r   Zseed_index_exprZoffset_index_exprZrand_index_exprry  ro   Zoffset_loader
random_posseed_loaderrJ   rK   r     s     zphilox_rand.<locals>.inner_fnr   )
r%   rq  FlexibleLayoutcontiguous_stridesmake_indexerr   r.   r   rY   r&  )	rs  seedr  rt  r   ro   r   Zrandom_values_nodeZoffset_noderJ   r(  rK   philox_rand  s"    
r/  c              	   C   s2   t jr&ttjtjtj	j
| ||S tdd S )Nz&should be handled in replace_random.py)r#   r  r  r  r1   r   r%   r  r  native_dropoutr  rr   )rP   ptrainrJ   rJ   rK   r0    s    r0  c                 G   sh   t js |  tdks td|   t|dksBt|d t	rJt
jj	nt
jj}tj|| f|  | S )Nr  Tthis should be handled in decomps unless config.fallback_random or the device is CPUr   )r#   r  r   r]   r   rr   r   r   rX   r   r  
bernoulli_Tensorr%   ZInplaceBernoulliFallback)rP   r   r  rJ   rJ   rK   r4    s    

r4  c                 G   s2   t js |  tdks tdtt| f| S )Nr  r3  )r#   r  r   r]   r   rr   r4  r  )rP   r   rJ   rJ   rK   bernoulli_p	  s    r6  c                 C   s   t d S rR   rr   r   rJ   rJ   rK   _foobar  s    r9  c                 C   s   t d d S )Nz1using triton random, expect difference from eager)r  info)saltrJ   rJ   rK   _warn_triton_random  s    r<  c                   C   s   t tjj d S rR   )r<  r>   r   Zcreation_timerJ   rJ   rJ   rK   warn_triton_random  s    r=  c                  O   sB   | dd d k	rt| |S tjr6|dd  t| |S tdd S N	generatorz-should have been handled in replace_random.py)getfallback_rand_generatorr#   r  popfallback_rand_defaultrr   r'  rJ   rJ   rK   r'  (  s    

r'  c                  O   sB   | dd d k	rt| |S tjr6|dd  t| |S tdd S r>  )r@  fallback_randn_generatorr#   r  rB  fallback_randn_defaultrr   r'  rJ   rJ   rK   randn2  s    

rF  c                 C   s   t |}t j| |S rR   )r%   get_stride_orderExternKernelrequire_stride_order)Zinput_tensorrt  stride_orderrJ   rJ   rK   inductor_force_stride_order<  s    
rK  r   c                 C   s   t dd S )Nz.should be handled in fuse_seed_creation_pass()r7  rL  rJ   rJ   rK   inductor_seedB  s    rM  c                 C   s   t   tt| t|S rR   )r=  r1   r   r%   ZRandomSeedsr5   )r  r   rJ   rJ   rK   inductor_seedsG  s    rN  c                    s(    fdd}t j  |g dS )Nc                    s   t   S rR   )r=   Z	load_seedget_namer8  r   seedsrJ   rK   r   O  s    z&inductor_lookup_seed.<locals>.inner_fnr   )r.   r   r   rt   )rQ  r   r   rJ   rP  rK   inductor_lookup_seedM  s    rR  r  )rs  r.  moder  c                   s   t jr
t dkst| } tj}| }tj||| tj	| |d
 |  fdd}tj|||| d}|  |S )N)r'  rF  rS  c                    s"   t t g t| tjS rR   )rS   r=   r  r]   r.  r   rT  r)  r*  rJ   rK   r   f  s    z!inductor_random.<locals>.inner_fnr   )r#   r  rr   r]   r  r   r%   rq  r+  r,  r-  r   r.   r   r   )rs  r.  rT  r  ro   r   r   ry  rJ   rU  rK   inductor_randomZ  s,    
   
 
rV  )lowhighrs  r.  r  c                   sl   t jr
t|}tj}| }tj|||tj	||d
 |  fdd}tj||||dS )NrS  c              	      s6   t g t | tjt tjt  tjS rR   )r=   Z	randint64r  r]   r.  r  r   rX  rW  r)  r*  rJ   rK   r     s    z"inductor_randint.<locals>.inner_fnr   )r#   r  rr   r]   r  r   r%   rq  r+  r,  r-  r   r.   r   )rW  rX  rs  r.  r  ro   r   r   rJ   rY  rK   inductor_randintv  s&    
   
 
rZ  	out_int32right)r   
boundariesr\  r]  c                   s   t   dkstt| r$t s@ttjjdd|  |dS      d  	 }| 
 }| 	 |rvtjntj fdd}tj|||  dS )	Nr"   Fr  r[  r   c                    s"   | }t |  }|S rR   )r=   	bucketizerO  )r   rC  r   r^  Zboundaries_sizeZindex_dtyper  r]  rJ   rK   r     s    zbucketize.<locals>.inner_fnr   )r   r   rr   r+   r  r  r_  r5  r   r   r   r]   r.  r  r.   r   )r   r^  r\  r]  Zboundaries_loaderr   r   rJ   r`  rK   r_    s*       r_  c                 O   s$   t tjtjj||f\}}||fS rR   )r  tree_map_onlyr%   r  rH  Zrequire_stride1r   r   r   rJ   rJ   rK   require_dense  s      rc  c                 O   s$   t tjtjj||f\}}||fS rR   )r  ra  r%   r  rH  require_contiguousrb  rJ   rJ   rK   rd    s      rd  c                 O   s$   t tjtjj||f\}}||fS rR   )r  ra  r%   r  rH  r  rb  rJ   rJ   rK   r    s      r  )ignore_mutated_args_FIXMEc                   s  dd  |rt jtjjs tjj} fdd}g }i }tt|j	D ](\}\}	}
|j
| }||||	|
 qLdd |j
D }| D ].}|| }	j| }
|| }|||	|
||< qt||fS t fddt|j	D } fd	d| D }||fS )
Nc                 S   s2   t | tjr.t|jd  }tj| |S | S NrC  )rX   r%   r  rG  r  rt  rH  rI  )r   fx_argrJ  rJ   rJ   rK   apply_constraint  s    z1constrain_to_fx_strides.<locals>.apply_constraintc                    s    | j d k	r| j jr|S  ||S rR   )Z
alias_infoZis_write)
schema_argr   rg  rh  rJ   rK   maybe_apply_constraint  s    z7constrain_to_fx_strides.<locals>.maybe_apply_constraintc                 S   s   i | ]}|j |qS rJ   )r#  r   rJ   rJ   rK   
<dictcomp>  s      z+constrain_to_fx_strides.<locals>.<dictcomp>c                 3   s   | ]\}} ||V  qd S rR   rJ   rO   r   rg  rj  rJ   rK   rW     s    z*constrain_to_fx_strides.<locals>.<genexpr>c                    s"   i | ]\}}| |j | qS rJ   r   rO   krV  rh  fx_noderJ   rK   rl     s      )rX   r   r]   r^   r   rr   Z_schemar   r   r   	argumentsr   r   r   rZ   r   )rr  re  r   r   Zschemark  new_args
new_kwargsrW  r   rg  ri  Zschema_kwargskeyrJ   rq  rK   constrain_to_fx_strides  s,    


rw  ztorchvision::roi_alignc                    sF   dd  t  fddt|jD } fdd| D }||fS )Nc                    s4  t | tjs| S |jd }| }t|}|rT|d dkrTtttt	| 
 }|jshtj| |S d t | tsztt	| 
 dkr| S  fdd}z,|   || rtjtj| |W S W n tk
r   Y nX  fdd	}t | jtjr&|| s&||  r&tjtj| |S tj| |S )
NrC  r   r   ri   rd   re   c                    sF   t  fddtt  d D }tjj  d dkoD|S )Nc                 3   s,   | ]$}t jj |   d kV  qdS r   N)r>   r   r   rQ  
get_strider   )	ALIGNMENTrP   rJ   rK   rW   &  s   z`sdpa_constraint.<locals>.apply_constraint.<locals>.is_aligned_realized_tensor.<locals>.<genexpr>r"   r   )r   r   r   rz  r>   r   r   rQ  )rP   Zaligned_stridesr{  rx   rK   is_aligned_realized_tensor%  s    zMsdpa_constraint.<locals>.apply_constraint.<locals>.is_aligned_realized_tensorc                    s   t jj|  d   dkS )Nr   r   )r>   r   r   rQ  r   rx   r|  rJ   rK   
is_aligned7  s    z=sdpa_constraint.<locals>.apply_constraint.<locals>.is_aligned)rX   r%   r  r  rt  rG  rY   r   r   r   r   r   rH  rI  r1   rr   rz  r>   r   Ztry_match_insignificant_stridesrealize_inputAttributeErrorr   rm  rn  )r   rg  Zmeta_valZmeta_striderJ  r}  r~  rJ   r|  rK   rh    s@    

	
 


 z)sdpa_constraint.<locals>.apply_constraintc                 3   s   | ]\}} ||V  qd S rR   rJ   rm  rj  rJ   rK   rW   C  s    z"sdpa_constraint.<locals>.<genexpr>c                    s"   i | ]\}}| |j | qS rJ   rn  ro  rq  rJ   rK   rl  F  s      z#sdpa_constraint.<locals>.<dictcomp>)rZ   r   r   r   )rr  r   r   rJ   rq  rK   sdpa_constraint  s    5
r  )r  c                 C   sn   |}|   |  kr"t||   }|  | kr@t||  }|  | krft||  }t|S t|S rR   )r   r   rt   r   r   r   r  )selfr  non_blockingrP   r   rJ   rJ   rK   r  	  s    r  )memory_formatc                C   s&   t j|  |  |  t|  dS )Nr   )r.   r   r   rt   r   rY   r   )rP   r  rJ   rJ   rK   r  	  s    
r  c                 C   s   g }t | trLt | jtjrL| j} t | tjrD||   | j} q"t| } t| } |r| j} |d d d D ]}t| |} qlt| } | S rS  )rX   r1   r   r%   rr  r   Z
get_layoutr  )rP   Zreinterpret_view_layoutslayoutrJ   rJ   rK   clone_preserve_reinterpret_view!	  s    r  r  c                   s(    fdd}t jt| || gdS )Nc                    s   t j| d    dS )Nr   rn   r=   r  r   ro   rg  ri  rJ   rK   rV   C	  s    ziota.<locals>.fnr   )r.   r   r5   )lengthrg  ri  ro   r   Zrequires_gradrV   rJ   r  rK   iota9	  s    
r  )r   r   c                    s   |   |  kst|  t|  d tjjt	drN| 
    tjjd tjj| 
    tt| | 
 }|  fdd}tj|  |   |t| 
 dS )Nr   c              	      s6   t t t |   tjt tj| | S rR   )r=   r2  eqr  r]   r.  rW  r   r   
src_loaderr]  rJ   rK   r   Z	  s    z select_scatter.<locals>.inner_fnr   )rt   rr   r   re  r>   r   r   r:  ru   r  r   r  r  r   rT  r.   r   r   rY   )rP   r  r   r   r   rJ   r  rK   select_scatterN	  s     

r  c                    s     |  kst t d    tj \t }t	 d  | < t
||}|  fdd}tj   |t dS )Nr   r"   c              
      s4  dkr kr dkr | S t |  tj}t|  t|    < g }dkr~|t |t t	tj kr|t 
|t t	tj dkr|t t t|   dtjt dtj |sttt j|}t | fddtrdnd}t ||| S )Nr   r"   c                      s    S rR   rJ   rJ   )src_idxr  rJ   rK   r~  	  r  z1slice_scatter.<locals>.inner_fn.<locals>.<lambda>r  )r=   r  r]   r  rY   r   r   r  ru   r   r  r  r   r  rr   r   r6  r  r  ry   r2  )rW  r  r  Zsrc_valr   r  rh  r  rg  ri  rP   r]  )r  rK   r   z	  sT     zslice_scatter.<locals>.inner_fnr   )rt   rr   r   re  r   r%   rf  Znormalize_start_endrY   r   r   r.   r   r   )rP   r  r   rg  rh  ri  Zsrc_sizer   rJ   r  rK   slice_scatterl	  s     
.
r  c                 C   s*   t | ttfr&t| dkr&t| d S | S r   )rX   rY   rZ   r   _unwraprx   rJ   rJ   rK   r  	  s    r  ro   r   r  
pin_memoryc                   s  t |d tjfkd|  t | d tt tr@p<tjnpJt g }t tj	rl fdd}nt t
tfr fdd}nft dkst d t
tfrt dkr|tt   fdd}ntjtj |d	S tjt|||d
S )Nlayout=r  c                    s   t  S rR   r  r   r   ro   rJ   rK   r   	  s    ztensor.<locals>.inner_fnc                    s   t  S rR   r=   r  r   r  rJ   rK   r   	  s    r   ri   c                    s8    fdd t dkr*tdS  dt S )Nc              	      sr   | |k st ||  dkr(t|  S ||  d |  }tttd tjt|tj | | ||S )Nr"   r    r   )rr   r=   r  r2  r  r  r]   r  )rg  rh  mid)binary_searchr   ro   r   rJ   rK   r  	  s    z/tensor.<locals>.inner_fn.<locals>.binary_searchr   )r   r=   r  r   r  )r  r   rK   r   	  s    rZ  r   )rL   r]   stridedrX   r  rp   r  get_default_dtyperu   rv   r   r   r   r   r>   r   Zadd_tensor_constantr%  r.   r   r5   )r   ro   r   r  r  r   r   rJ   r  rK   r%  	  s,    *r%  c                 C   s@   t | tr2|d k	rt| |} |d k	r.t| |} | S t| ||dS )NrZ  )rX   r1   r   r   r%  )r   ro   r   rJ   rJ   rK   	as_tensor	  s    


r  c                 C   s   t | tjdS r  r$  r   rJ   rJ   rK   long_tensor	  s    r  c                 C   s   ddl m} |tjjjtjjjd }t|dks:t	|t
t| \}}t||| }tj||_tjjjd }t|tjtjtjfr|jjS t|S d S )Nr   )resolve_unbacked_bindingsunbacked_bindingsr"   rC  )rP  r  r>   r   r   Z	shape_envr   r  r   rr   r   iterr   r%   ZDynamicScalarregister_bufferr#  rX   r]   SymIntZSymFloatZSymBoolr   exprru   sympify)r   r  r  Zbinding_symZkeypathbufferrC  rJ   rJ   rK   _local_scalar_dense	  s     r  c                 C   s   d S rR   rJ   )r   rI   rJ   rJ   rK   _assert_scalar,
  s    r  c                    s   | t | ttfs"tdr"jt ttfr@ fdd}nDt tjr\ fdd}n(t dkspt	
 fdd}tj| |t|dS )Nr   c                    s   t  S rR   r  r   ro   r   rJ   rK   r   <
  s    z_full.<locals>.inner_fnc                    s   t  S rR   r  r   r  rJ   rK   r   A
  s    r   c                    s    g S rR   rJ   r   )value_loaderrJ   rK   r   H
  s    r   )rX   rp   r   r~   r   ru   rv   r   r   rr   r   r.   r   rY   )
fill_valuer   ro   rs  r   rJ   )ro   r   r  rK   _full5
  s     r  c                 K   s   t t|| f|S rR   create_tensor_liketensor_constructor)rP   r  r   rJ   rJ   rK   rG  S
  s    rG  c                    s    d d d d dd d fdd
}|S )NF)namesro   r   r  r  r  c                    s   t | d kd t |d tjfkd|  t | d t|}|pFt }t|dkrvt|d tttj	frvt|d }|D ]}t|tj
rztqzdd |D }t |||S )Nnamed tensorsr  r  r"   r   c                 S   s   g | ]}t |qS rJ   rk  rl  rJ   rJ   rK   rQ   n
  s     z5tensor_constructor.<locals>.inner.<locals>.<listcomp>)rL   r]   r  r5   r  r   rX   rY   rZ   Sizer  rr   r  )r  ro   r   r  r  r  rs  r?  r  rJ   rK   r   Z
  s    	"z!tensor_constructor.<locals>.innerrJ   )r  r   rJ   r  rK   r  X
  s    r  )r  ro   r  r   r  r  c                 G   sX   t | d kd t|}t|dkrDt|d tttjfrDt|d }t|d ||||dS )Nr  r"   r   ro   r  r   r  )	rL   r5   r   rX   rY   rZ   r]   r  empty_strided)r  ro   r  r   r  r  rs  rJ   rJ   rK   r^  t
  s    
"     r^  c                    s   dddddd fdd
}|S )zZ
    Shim to convert X_like(...) into X(...).  For example zeros_like() into zeros().
    NF)ro   r   r  r  r  c                   sj   t | d t |d tjfkd|  |d kr8|  }nt|}|pJ|  }t|  } |||||dS )Nr  r  r  )rL   r]   r  rt   rs   r   rY   r   )rP   ro   r   r  r  r  rs  creation_fnrJ   rK   _constant_like
  s    
    z*create_tensor_like.<locals>._constant_likerJ   )r  r  rJ   r  rK   r  
  s        r  c                 C   s   t t| S rR   r  r  rJ   rJ   rK   r3  
  s    r3  c                    s   d d d d d fdd
}|S )Nr  c                   sp   t |ttfstt| d t|d tjfkd|  t|pF|  }|pR| 	 }dd |D }t
 |||S )Nr  r  c                 S   s   g | ]}t |qS rJ   )ru   r   rl  rJ   rJ   rK   rQ   
  s     z7new_constant.<locals>._new_constant.<locals>.<listcomp>)rX   rY   rZ   rr   rL   r]   r  rs   rt   r   r  rP   rs  ro   r  r   r  r  rJ   rK   _new_constant
  s    z#new_constant.<locals>._new_constantrJ   )r  r  rJ   r  rK   new_constant
  s       r  r  c                C   s4   |d kr|   }|d kr |  }t|d ||||dS Nr  rt   r   r  r  rJ   rJ   rK   	new_empty
  s         r  c                C   s   t | ttfstt |tttd fs*tt| d t|d tjfkd|  t|p^t	 }|pnt
dj}td||| d}|  |jj}dgt|  |j_t |tjstdd | D } |rdd |D n
tj| }tj||| |d	|_|S )
Nr  r  r  r   )r  r   ro   rs  c                 S   s   g | ]}t |qS rJ   rk  rl  rJ   rJ   rK   rQ   
  s     z!empty_strided.<locals>.<listcomp>c                 S   s   g | ]}t |qS rJ   rk  rl  rJ   rJ   rK   rQ   
  s     )r   ro   rs  rt  )rX   rY   rZ   rr   r   rL   r]   r  rs   r  r%  r   r  r   r   r   r   r%   r  r+  r,  rq  r  )rs  rt  ro   r  r   r  Z	pointwiser  rJ   rJ   rK   r  
  s.    
r  c                C   s4   |d kr|   }|d kr |  }t||||||dS r  r  )rP   rs  rt  ro   r  r   r  rJ   rJ   rK   new_empty_strided
  s         r  c                 C   s2   dd |D }t tt||jd}tj| |S )Nc                 S   s   g | ]}t jj|qS rJ   )r>   r   r   rQ  rl  rJ   rJ   rK   rQ   
  s     z copy_strided.<locals>.<listcomp>)rv  )sortedr   r   __getitem__r%   rH  rI  )rP   rt  rJ  rJ   rJ   rK   copy_strided
  s    r  c                 K   s&   | dd k	stdt|| f|S )Nro   z(dtype should be handled by decomposition)r@  rr   r  )rs  r  r   rJ   rJ   rK   full
  s    r  c                    s   t | tst| tjks t|  tdk}t|  | | 	 |	  fdd}t
j|  |  || dS )Nr   c                    s4   t | } t| dkr,t|   |  < | S r   )rY   r   r=   indirect_indexingr  r   index_loaderrs  r]  rJ   rK   rV     s    zgather.<locals>.fnr   )rX   r1   rr   rt   r]   r  r   r   re  r   r.   r   r   )rP   r   r   Zsparse_gradr  rV   rJ   r  rK   gather
  s    r  c                    s   |rt t| tst t|ts$t dt| ks8t |  |  t| |  | dd   fdd}tj	| 
 |  |dS )Nrp   r"   c                    sZ   t | t ks"t|  d  | d  }t|d g| d   }|S )Nz != r   )r   rr   r=   r  )rW  Z	var_indexZ
weight_idxindices_loaderZindices_ndimr_  Zweight_loaderZweight_sizerJ   rK   rV   "  s    "
zembedding.<locals>.fnr   )rr   rX   r1   strrt   r   r   r   r.   r   r   )weightr   Zpadding_idxZscale_grad_by_freqsparserV   rJ   r  rK   	embedding  s     r  c                    s   t dd  D s*tddd  D  tdd  D rDtddd t D }t|d	ksjtd
d gt  }t|t fdd|D  D ]$\}}| |krtd|||< q||fS )Nc                 s   s2   | ]*}|d k	r|  tjtjtjtjfkV  qd S rR   )rt   r]   r  r.  rz   r  r   rJ   rJ   rK   rW   3  s   z.check_and_broadcast_indices.<locals>.<genexpr>z)indices must be int64, byte or bool. Got c                 S   s   g | ]}|d k	r|  qS rR   rt   r   rJ   rJ   rK   rQ   7  s      z/check_and_broadcast_indices.<locals>.<listcomp>c                 s   s*   | ]"}|d k	r|  tjtjfkV  qd S rR   )rt   r]   rz   r  r   rJ   rJ   rK   rW   8  s     zFallback for bool indicesc                 S   s   g | ]\}}t |tr|qS rJ   r   r   rJ   rJ   rK   rQ   =  s     
 r   z"requires at least 1 non-None indexc                    s   g | ]} | qS rJ   rJ   r   r   rJ   rK   rQ   @  s     z.Fallback when indices is on a different device)	r   rr   r   rG   r   r   r   r   r   )r   r   Z
valid_idxsZnew_indicesr   rP   rJ   r  rK   check_and_broadcast_indices2  s"    
$
r  c              	      s   dt dd  D ]\}}	|	| dkrdq	fddtD 	tt d  d }
rx nd |
  |
d    f	dd}|fS )	NFr"   Tc                    s    g | ]\}}|d kr | qS rR   rJ   )rO   r   rC  r  rJ   rK   rQ   `  s      z2index_output_size_and_inner_fn.<locals>.<listcomp>r   c           	   	      s  t | t kstt t ks(tt }g }d }rDdn|}d}td d D ]}||krp||7 }| d kr|t | k st|| |  |d7 }q\| }|d k	st| }|tj|| |||  | d q\|| |d  }d kr|S |S )Nr   r   r"   r   )r   rr   r   r   r=   r  )	rW  r  	new_indexfirst_tensor_indexZstart_offsetZnext_idxr   loaderrs  )	r   indexed_sizer   indices_loadersnon_consecutive_tensorsoutput_sizetensor_indicestensor_sizer]  rJ   rK   rV   m  s8    

z*index_output_size_and_inner_fn.<locals>.fn)r   r   r   )r  r   r  r  r  r  r]  r   previouscurrentr  rV   rJ   )
r   r  r   r  r  r  r  r  r]  r  rK   index_output_size_and_inner_fnJ  s"    


!r  c           
   
      s   t  ttfst|  }t |  \ }t|dks@tddd  D }t |d   }|   fddt	t D }d|krd|krt
dfddt	t D }t ||||||d\}}	tj|  |  |	|d	S )
Nr   z Must have at least one valid idxc                 S   s    g | ]}|d k	r|  nd qS rR   r   r   rJ   rJ   rK   rQ     s     zindex_impl.<locals>.<listcomp>c                    s    g | ]} | d k	r| qS rR   rJ   r   r   r  rJ   rK   rQ     s      z0index is out of bounds for dimension with size 0c                    s   g | ]} | qS rJ   rJ   r   r  rJ   rK   rQ     s     r  r   )rX   rY   rZ   rr   r   r  r   r   r   r   
IndexErrorr  r.   r   rt   )
rP   r   r   r]  r  r  r  r  r  r   rJ   r  rK   
index_impl  s6    
r  c                 C   sH   zt | |ddW S  tk
rB   |   ttjjdd| | Y S X d S )NTr  Fr  )r  rG   r   r  r  r   r5  rP   r   rJ   rJ   rK   r     s     r   c                 C   s   t | |ddS NFr  )r  r  rJ   rJ   rK   _unsafe_index  s    r  c                 C   s   t t| |||S rR   )
index_put_r  rP   r   r   
accumulaterJ   rJ   rK   	index_put  s    r  c                 C   s   t t| |||ddS r  )index_put_impl_r  r  rJ   rJ   rK   _unsafe_index_put  s    r  c                 C   sB   |  |   krt||   }|r,t| |}t| t|d || S r   )r   r   r\   r  r2  )r  r   r   r  rJ   rJ   rK   index_put_as_masked_fill  s
    
r  c                 C   sl   t  }t|rP|s|rP|s dnd}tjjjdd  }rH| d| }|tj_t	
tjjj| ||| | S )Nzindex put with accumulate.zdeterministic index put.stack_trace Found from : 
 )r]   $are_deterministic_algorithms_enabledr+   r>   r   r   r  r@  disable_cudagraphs_reasonr%   ZIndexPutFallbackr   )r  r   r   r  deterministicrI   r  rJ   rJ   rK   index_put_fallback  s    r  c                 C   s   t | |||ddS )NTr  r  r  r   r   r  rJ   rJ   rK   r    s    r  c                 C   s   t | |||ddS r  r  r  rJ   rJ   rK   _unsafe_index_put_  s    r  c              
      s^  |  dkrrt|dkrr|d  tjtjhkrr|d }tt| t|  D ]}t|d}qRt	| |g||S t
 rt| |||S |D ]2}|d k	r| tjtjhkrt| |||  S q|   t }|rt|  r|dkrt| dg} t| |||} |dkrt| g } | S t||  }zt||  \}}	W n$ tk
rf   t| ||| Y S X dd |D }
t| tst|   |dkrt| dg} t||	d   } fddtt|D }t ||	||
|d |d\}}t||}tj|  |  | |||rdnd d	}td t| |}tj ||_!|dkrZt| g } | S )
Nr"   r   r   c                 S   s    g | ]}|d k	r|  nd qS rR   r   r   rJ   rJ   rK   rQ   '  s     z#index_put_impl_.<locals>.<listcomp>c                    s   g | ]} | qS rJ   rJ   r   r  rJ   rK   rQ   2  s     r  
atomic_addr   ro   r   r   output_indexerZscatter_mode)"	get_numelr   rt   r]   rz   r  r   r   rT  r  r  r  r9   r  r   r  r   rG   rX   r1   rr   r   rY   r  r   r%   Scatterr   r  MutationLayoutSHOULDREMOVEr>   r   r  r#  )r  r   r   r  r   r  r   r   Zx_ndimr  r  r  r  Zexpected_vals_sizer   scatterr  rJ   r  rK   r    s    



 




r  )r}   r   c           	         s   dd | ||fD \}}}|   |  ks.t| tjtjhksFt| |  | | |  fdd}tj	| 
 |  ||  d}t||   S )Nc                 s   s   | ]}t |d V  qdS )r   N)r  rN   rJ   rJ   rK   rW   Z  s     z,masked_scatter_with_index.<locals>.<genexpr>c                    sH    }t  tj} fdd}t ||d}t |||S )Nc                     s    } t | }|gS rR   )r=   r  )Zsource_idx_valr   )rW  source_idx_loadersource_loadersource_numelrJ   rK   load_source_vali  s    zDmasked_scatter_with_index.<locals>.inner_fn.<locals>.load_source_valr   )r=   r   r]   rz   r  r2  )rW  Zself_valZmask_valr  Z
source_valZmask_loaderZself_loaderr  r  r   r  rK   r   e  s
    z+masked_scatter_with_index.<locals>.inner_fnr   )r   rr   rt   r]   rz   r  r   r  r.   r   r   r  )	r  r  Z
source_idxsourceZ	self_flatZ	mask_flatZsource_flatr   Zresult_flatrJ   r  rK   masked_scatter_with_indexV  s     r  c                 C   s$   t | }t||||}t|| |S rR   )r  rw  copy_)r  r  rs  rt  ru  r   Zoutput_viewrJ   rJ   rK   as_strided_scatterz  s    
r  r   c                 K   s   t t| |||f|S rR   )scatter_r  )rP   r   r   r  r   rJ   rJ   rK   r    s    r  r6  include_self)r  r   r6  r
  c             	   C   s^   t |t}t| || |r"| nt||r6| jnd|rZtj| ||||||d |S d S )Nznot implr	  )rX   r1   r<   rt   r   r   r%   ZScatterFallback)r  r  r   r   r  r6  r
  Zsrc_is_tensorrJ   rJ   rK   scatter_fallback  s(    

	r  r6  )r   r6  c                C   sr   |dkst |d krHttjtjjjj}t	|| ||||d}|d k	rH|S |dkrVd}n|dkrbd}t
| ||||S )N>   Nr\   multiplyr  r\   sumr  prod)rr   rS   r  r  r>   r   r   r   Z_overloadnamer  scatter_reduce_)r  r   r   r  r6  r  fallback_resultrJ   rJ   rK   r    s$         r  c                 C   s   t t| |||S rR   )scatter_add_r  rP   r   r   r  rJ   rJ   rK   scatter_add  s    r  c                 C   s   t | |||dS )Nr  )r  r  rJ   rJ   rK   r    s    r  c                 K   s   t t| ||||f|S rR   )r  r  )rP   r   r   r  reduction_typer   rJ   rJ   rK   scatter_reduce  s    r  )r
  )r   r
  c             	      s  |dkst ttj dkr,dtj ks4t dttjj |||d}|rV|S ttsdt dt	|
 ksxt t }|dkrtdgttrt dkrtdgt|trt| dkrt|dg}t    | ttr nd  fdd	}fd
d}	dd }
|stj 
 fdd| |d d}td t|}tj||_tj 
 |	| ||
|d}td t|}tj||_|dkrtg S )N>   r  Naminmeanamaxr  r"   twozKaten.scatter_reduce_.two is not the unique overload of aten.scatter_reduce_r	  rp   r   c                    s@     }t|}t| }t| |dkr.dn|  | < |S r,  )r   r   rY   r=   r  )rW  rU  r  Zindirect_idx)r   r  r  rJ   rK   r    s     z'scatter_reduce_.<locals>.output_indexerc                    s    r| S t   S d S rR   r=   r  rt   r  )r  r  r  rJ   rK   rV     s    zscatter_reduce_.<locals>.fnc                 S   s    | dkrdS | d kst d S d S )Nr  r  r7  r  rJ   rJ   rK   backend_reduce_str  s    z+scatter_reduce_.<locals>.backend_reduce_strc                    s   t d  S r   r  r   )r  rJ   rK   r~    r  z!scatter_reduce_.<locals>.<lambda>r  )rr   r   r  r  ra   r  r  rX   r1   r  rt   r   r  re  r   r   r%   r  r   r  r  r>   r   r  r#  )r  r   r   r  r6  r
  r  r  r  rV   r  Zzero_outr  r  rJ   )r   r  r  r  r  rK   r    s    





r  )scales_xnexactc           
         s   |    |  |   d  |  d   }dd D t|ksRt|}dd t|D t|D ]\}}|d k	rrd| |< qr fddfdd}	tj| 	 | 
 |	||d	S )
Nc                 S   s   g | ]}t jj|qS rJ   )r>   r   r   r  r   rJ   rJ   rK   rQ   <  s     z&upsample_nearestnd.<locals>.<listcomp>c                 S   s   g | ]\}}|| qS rJ   rJ   )rO   r   orJ   rJ   rK   rQ   A  s     r  c                    s\   t | tj}  r(t | t dtj} t | t |tj} t | tj} t j	| |ddS )N      ?Fr  )
r=   r  r]   r  r\   r  r   r   r.  r  )rP   r  rs  )r  rJ   rK   scale_fnF  s    z$upsample_nearestnd.<locals>.scale_fnc                    s>   |  d  }| d   }|fddt | D S )Nc                    s   g | ]\}}} |||qS rJ   rJ   )rO   r   r?  rs  )r"  rJ   rK   rQ   U  s     z2upsample_nearestnd.<locals>.fn.<locals>.<listcomp>)r   )rW  rP   r   )i_sizes
inv_scalesr  r"  r]  rJ   rK   rV   Q  s
    zupsample_nearestnd.<locals>.fnr   )realize_hintr   r   r   rr   r   r   r.   r   r   rt   )
rP   r  r  r  r  batchZo_sizesr   r  rV   rJ   )r  r#  r$  r  r"  r]  rK   upsample_nearestnd1  s&    r'  )r  c                 C   s   t | ||fddS )Nr"   r  r'  rP   r  r  rJ   rJ   rK   upsample_nearest1d`  s    r+  c                 C   s   t | ||fdddS )Nr"   Tr  r  r)  r*  rJ   rJ   rK   _upsample_nearest_exact1de  s    r-  )scales_hscales_wc                 C   s   t | |||fddS )Nr    r(  r)  rP   r  r.  r/  rJ   rJ   rK   upsample_nearest2dj  s    r1  c                 C   s   t | |||fdddS )Nr    Tr,  r)  r0  rJ   rJ   rK   _upsample_nearest_exact2dq  s    r2  )scales_dr.  r/  c                 C   s   t | ||||fddS )Nrd   r(  r)  rP   r  r3  r.  r/  rJ   rJ   rK   upsample_nearest3dx  s    r5  c                 C   s   t | ||||fdddS )Nrd   Tr,  r)  r4  rJ   rJ   rK   _upsample_nearest_exact3d  s        r6  c                    s   t  fdd|D S )Nc                 3   s   | ]}t | V  qd S rR   r  r   rn   rJ   rK   rW     s     z$_create_constants.<locals>.<genexpr>)rZ   )ro   r   rJ   rn   rK   r    s    r  c                    s   t d dd |  d  D  |  fddtD fddtD  fdd}tj|  |  |t| dS )	Nr    c                 S   s   g | ]}|d  qS )r"   rJ   rO   hrJ   rJ   rK   rQ     s     z._reflection_padnd_backward.<locals>.<listcomp>c                    s    g | ]}d  d |   qS r    r"   rJ   r   r   paddingrJ   rK   rQ     s     c                    s$   g | ]}d  d |  d  qS r9  rJ   r   r:  rJ   rK   rQ     s     c                    s  | d    |  d   fdddd 	fddt D 	fddt D }	fddt D }	
fd	dt D ttjfd
dt D }t|fddd}fdd}tjdd t D  D ]}|tdg krqg }g }t D ]}	||	 dkrH|	 }
|	 }nb||	 dkrr||	 }
|	 d	|	 f}n8||	 dkr||	 }
|	 |	 
|	  |	 d f}|	|
 |	| q$||||}q|S )Nc                    s    | S rR   rJ   rx   )r   grad_loaderrJ   rK   load_from_output  s    z@_reflection_padnd_backward.<locals>.fn.<locals>.load_from_outputc                 S   sP   | \}}}t |tj}t |tj}t |tj}t t ||t ||S rR   )r=   r  r]   r.  r  r  r  le)index_ranger   ZlbZubrJ   rJ   rK   index_range_condition  s
    
zE_reflection_padnd_backward.<locals>.fn.<locals>.index_range_conditionc                    s   g | ]}|  |  qS rJ   rJ   r   padding_leftxyzrJ   rK   rQ     s     z:_reflection_padnd_backward.<locals>.fn.<locals>.<listcomp>c                    s   g | ]} | |  qS rJ   rJ   r   rA  rJ   rK   rQ     s     c                    s(   g | ] }d  |  |  |  qS )r    rJ   r   )dhwrB  rC  rJ   rK   rQ     s     c                    s.   g | ]&} | d | |  |  fqS r  rJ   r   )centerrD  rB  padding_rightrJ   rK   rQ     s   c                    s   g | ]} | qS rJ   rJ   r   )r@  range_crJ   rK   rQ     s     c                      s    S rR   rJ   rJ   )rE  r=  rJ   rK   r~    r  z8_reflection_padnd_backward.<locals>.fn.<locals>.<lambda>r  c                    s|   t D ]2}|| d || d k }t|tr|r|   S qttjfdd|D }t| fddd}t| |S )Nr    r"   c                    s   g | ]} |qS rJ   rJ   )rO   r?  )r@  rJ   rK   rQ     s     zN_reflection_padnd_backward.<locals>.fn.<locals>.accumulate.<locals>.<listcomp>c                      s    S rR   rJ   rJ   )r=  r   rJ   rK   r~    r  zL_reflection_padnd_backward.<locals>.fn.<locals>.accumulate.<locals>.<lambda>r  )	r   rX   rz   r   r6  r=   r  r  r\   )gradr   index_rangesr   Zupper_less_than_lowerrH   g)r   r@  r=  )r   rK   r    s    
z:_reflection_padnd_backward.<locals>.fn.<locals>.accumulatec                 S   s   g | ]}d ddgqS )r   r   r"   rJ   )rO   r   rJ   rJ   rK   rQ     s     r   r   r"   )
r   r   r6  r=   r  r  r   productrZ   r   )rW  Zleft_reflectZright_reflectrH   rH  r  ZareaZoutsrI  r   r   r?  )rD  r   r<  rB  rF  )r   rE  r@  r=  rG  rC  rK   rV     sF     
"
z&_reflection_padnd_backward.<locals>.fnr   )	r   r   r   r   r.   r   r   rt   rY   )grad_outputrP   r;  rV   rJ   )rD  r   r<  r;  rB  rF  rK   _reflection_padnd_backward  s    O
rM  c                    s:   |   |   fdd}tj|  |  |dS )Nc                    sF   t | } t| tkst D ]}| d | |  | |< q | S r*  )rY   r   rr   )rW  r   r<  r7  r]  rJ   rK   r    s
    zrev.<locals>.loaderr   )r   r   r.   r   r   rt   )rP   r<  r  rJ   rN  rK   rev  s    rO  c              	      s\  t |d dksttdd |D r.t| S |  }tttt|d d d |dd d  t |t   g  D ] \}}t	j
j||f qxt|d  }g t |d  D ].\\}}}	|	 |t|	| |  qt |t |kstt|   fddfdd	}
|  tj|  |  |
|d
S )Nr    r   c                 s   s   | ]}|d kV  qdS ry  rJ   )rO   r1  rJ   rJ   rK   rW     s     z"constant_pad_nd.<locals>.<genexpr>r"   c                    s~   g }t  d  D ]>\}\}}}|dkr>|t|d |dkr|t|| qttj|}t| fddS )Nr   c                      s    S rR   rJ   rJ   )r   r]  rJ   rK   r~  -  r  z/constant_pad_nd.<locals>.mask.<locals>.<lambda>)	r   r   range_mask_lowrange_mask_highr   r6  r=   r  r  )r   r  rW  rW  rX  r  )boundsr  
mask_sizesr  r]  r   rK   r  %  s    "zconstant_pad_nd.<locals>.maskc                    sZ   t | d  }t| d   D ]\}\}}|||  q"t|t| ksRt|S rR   )rY   r   r   r   rr   )r   r  rW  rW  rX  )bounds_precompr  r  rJ   rK   	offset_fn/  s
    z"constant_pad_nd.<locals>.offset_fnr   )r   rr   r   r  r   rY   r   r   r   r>   r   r   Zlookup_precomputed_sizeru   r   r   rt   r   r.   r   r   )rP   r;  r  r7  lr8  r  rW  rX  rs  rU  rJ   )rR  rT  r  r  rS  r  r]  rK   constant_pad_nd  s2    *

rW  r   rW  c                 C   s&   t t | tjt t|tjS rR   )r=   r  r  r]   r  ru   r   rX  rJ   rJ   rK   rP  ?  s    rP  r   rX  c                 C   s    t t | tjt |tjS rR   )r=   r  r  r]   r  rY  rJ   rJ   rK   rQ  F  s    rQ  r   rX  rW  c                 C   s   t t| |t| |S rR   )r=   r  rP  rQ  rZ  rJ   rJ   rK   
range_maskM  s    r[  r  c                    sF       d   p&dg   fdd}|S )Nr   c                    s|   | d   |  d   t tj fddtD }rbt| fddS t| 	fddS )Nc                    s.   g | ]&}t |  | |  |  qS rJ   )r[  r   )r8  ih	padding_hrJ   rK   rQ   a  s     z=constant_boundary_condition.<locals>.load.<locals>.<listcomp>c                      s   t  dS )Nr  )constant_boundary_conditionrJ   )r   r\  pad_fill_valueprefixrP   rJ   rK   r~  f  s   z;constant_boundary_condition.<locals>.load.<locals>.<lambda>c                      s    S rR   rJ   rJ   )r\  r`  r]  rJ   rK   r~  l  r  )r   r6  r=   r  r   r  )r   r  r   r  r8  r_  r;  r]  rP   r]  )r\  r`  rK   r   [  s    z)constant_boundary_condition.<locals>.loadr   r   )rP   r  r;  r_  r   r   rJ   ra  rK   r^  T  s
    r^  c                 C   s   t | d||   || d  || d  || }|rt | d||   || d  d|| d   || }tjj|d ||  |  ||  dkr|d8 }tjjd|||  |  ||   tjj|| dkrtjj|| d}n|}||fS )Nr    r"   r   F)r   r>   r   r   rQ  r  r   )rP   r   kernel_sizert  r;  	ceil_modeZx_outZx_altrJ   rJ   rK   pooling_sizer  s"    & * *$re  c                 C   s4   t | d} | d | d  }|dkp2tdd |D S )Nr    r   r"      c                 s   s   | ]}|d kV  qdS r4  rJ   rO   r>  rJ   rJ   rK   rW     s     z:should_fallback_max_pool2d_with_indices.<locals>.<genexpr>)r:   r   )rc  dilationwindow_sizerJ   rJ   rK   'should_fallback_max_pool2d_with_indices  s    
rj  assert_fallbackc                C   s   |dkrddg}|dkr ddg}|s(|}t |d}t |d}t |d}t |d}t| ts^tt|dksntt|dks~tt|dkstt|dkstt|  dkstt||}|d k	r||kst|||||fS )Nr   r"   r    rx  )r:   rX   r1   rr   r   r   rj  )rP   rc  rt  r;  rh  rl  Zuse_fallbackrJ   rJ   rK   max_pool2d_checks  s(    




rm  c                    s  t |  |dd\ }}|   |  ^ }}}	t|d |\}
}t|	d |\}}t||
|g }d sd s|s|rt| tdddn|   fdd	}tj	| 
 |  tj|dd
|d}tj	| 
 tjtj|dd
|d}||fS )NFrk  r   r"   -infr    r  c                    s   | ^ }}}d }d }t t d t d D ]\}}|d  | d  }	|d  | d  }
||	|
f}|rt| d  | tj}|d kr|}ntt||||}|d kr|}q0t	||}q0|r|S |S d S r,  )
r   rK  r   r=   r  r]   r  r2  gtr  )rW  return_indexr`  bhbwmaxvalmaxindexh_incw_incr\  iwrC  r   rc  r;  rt  r]  rJ   rK   rV     s*    
 
z/_low_memory_max_pool2d_with_offsets.<locals>.fnrp  r   T)rm  r%  r   re  rY   r^  r   r   r.   r   r   rt   r   r   r]   r  )rP   rc  rt  r;  rh  rd  r   r&  r8  wh_out
ceil_mode1w_out
ceil_mode2r_  rV   r   offsetsrJ   rx  rK   #_low_memory_max_pool2d_with_offsets  s<    
     r  c                    sH   |   fdd  fdd}tj|  tj||  d}|S )Nc           	         sf   t  tj}t |d  d  tj}t |d  d  tj}||  }|| }|| | S r,  )r=   r  r]   r  )	ru  rv  rq  rr  w_inZhbaseZwbaser\  rw  )input_widthr;  rt  rJ   rK   increments_to_index  s    zF_low_memory_max_pool2d_offsets_to_indices.<locals>.increments_to_indexc                    sL   | ^ }}}|||f}t tj}|| }|||  } ||||S rR   )r=   r  r]   r.  )rW  r`  rq  rr  r  Zkw_constru  rv  )r  kernel_widthoffsets_loaderrJ   rK   offsets_to_indices  s    zE_low_memory_max_pool2d_offsets_to_indices.<locals>.offsets_to_indicesr   )r   r.   r   r   r]   r  r   )r  r  r  rt  r;  r  r   rJ   )r  r  r  r  r;  rt  rK   )_low_memory_max_pool2d_offsets_to_indices  s    r  r  c                    s  dkrddg|dkr ddg}s(t |ts6ttdksFttdksVttdksftt|dksvtt| dkst|   z|  }W n tk
r   d }Y nX t |trt |jjt	r|jj}	t
jd t
j|	 |	 |	 d|	d}
|
  |
 }n(z| }W n tk
r<   d }Y nX |d k	rV|d dkpj|d k	oj|d dk}tdd |D rt| ||||S | ^ }}
|  ^ }| |   t| }tfd	dtd d D tfd
dtd d D 		 }|dkrBt| ||||S |  	
fdd}t	j|  |  ||d}|rt
j|S |S d S )Nr   r"   r    rx  )r   ro   rs  )r#  r  r   c                 s   s   | ]}|d kV  qdS r4  rJ   rg  rJ   rJ   rK   rW   J  s     z3max_pool2d_with_indices_backward.<locals>.<genexpr>c                 3   s:   | ]2}t |d   t d | d   d    dV  qdS r   r"   Nmaxr7  rc  rt  rJ   rK   rW   W  s   c                 3   s:   | ]2}t |d   t d| d   d    d V  qdS r"   r   Nr  rO   rz  r  rJ   rK   rW   [  s   rf  c                    sZ  | ^ }}}t |
 | tj}|d  }|d  }t t|d  d  d tj}t t|d  d  d tj}t t|d d tj}t t|d d tj}t |t dtj}t |t dtj}t |t tj}t |t tj}d }	tD ]}
t	D ]}t 	|t |
tj}t 	|t |tj}|t j
t |t |t dtjd ddt j
t |t |t dtjd ddf}|} |}t ||}|	d krt ||t dtj}	n:t t t ||t |||}t |t 	|	||	}	q4q&|	d k	sVt|	S )Nr   r"   Fr  r   r  )r=   r  r]   r.  r   r  r  r  r   r\   r  r  r  r2  r  r  r  rr   )rW  r`  r8  rz  Z
index_testphstartpwstartphendpwendgradientph_pw_phpwZ
grad_indexZindex_actualZ	grad_partr   r  r<  h_window_sizer  Zindices_sizerc  r;  pooled_heightpooled_widthrt  w_window_sizewidthrJ   rK   rV   j  sl      
  

z,max_pool2d_with_indices_backward.<locals>.fnr   )rX   r1   rr   r   r   r%  rz  r  r   r.   r%   r  r+  r   rt   Zdecide_layoutr   )fallback_max_pool2d_with_indices_backwardr   rY   r  r   r   rH  r  )rL  rP   rc  rt  r;  rh  rd  r   Z	gO_strider   Zx_bufferZx_strideZis_channels_lastr&  heightr   r_  ri  rV   r   rJ   r  rK    max_pool2d_with_indices_backward  s    
	

       
        ;r  r  c                    s*   |   ^ }}}|   fdd}|S )Nc              
      s   |\|\ |\}}t t t   tjt |tjt t  tjt |tj}t | fddS )Nc                      s      fS rR   rJ   rJ   )h_start_indexr\  rw  r`  w_start_indexr]  rJ   rK   r~    r  z3pad_adaptive_loader.<locals>.load.<locals>.<lambda>)r=   r  r  r  r]   r  r  )r`  Z
incrementsZstart_indicesZend_indicesh_end_indexw_end_indexr  pad_valr]  )r  r\  rw  r`  r  rK   r     s$    z!pad_adaptive_loader.<locals>.loadrb  )rP   r  r   r8  rz  r   rJ   r  rK   pad_adaptive_loader  s    r  c                    s(   |\|\  fdd}|S )Nc                    s   | ^ }}}|} |}|}|}d }	t td td D ]<\}
}|||
|g||g||g}|	d kr||}	qLt||	}	qL|	S r,  r   rK  r   r=   r\   )rW  r  r`  rq  rr  r  r  r  r  totalr\  rw  rC  h_end_index_fnh_start_index_fnkernel_maxesw_end_index_fnw_start_index_fnrJ   rK   fn_sum  s"    $z)_adaptive_pooling_idx_sum.<locals>.fn_sumrJ   )r  Zstart_index_fnsZend_index_fnsr  rJ   r  rK   _adaptive_pooling_idx_sum  s    r  c                    s  t tstt|dkst   ^ }}}tjj	|}tjj	|}|\}}||krr||krrt
S |dks|dkr|||f}t|  dS || dkr|| dkr|| || g}t|S t|| d |}	t|| d |}
t|||g } }dd }dd }tj|||d	}tj|||d	}tj|||d	}tj|||d	}|	|
 }|d
kr~t|S t|	|
g||g||g tt fdd}tj |||d}|S )Nr    r   rZ  r"   c                 S   s   t | | |S rR   r   r   out_diminp_dimrJ   rJ   rK   start_index  s    z)_adaptive_avg_pool2d.<locals>.start_indexc                 S   s   t | d | | d |S r*  r  r  rJ   rJ   rK   	end_index  s    z'_adaptive_avg_pool2d.<locals>.end_indexr  r  rf  c                    s   t  | t | S rR   )r=   truedivr  r  r  Zones_loaderrP   rJ   rK   rV   (  s     z _adaptive_avg_pool2d.<locals>.fnr   )rX   r1   rr   r   r%  r   r>   r   r   r  r  r^  rt   r   
avg_pool2dr4   rY   r   r   fallback_adaptive_avg_pool2dr  r  	ones_liker.   r   )rP   r  r&  h_inr  r{  r}  o_sizerc  h_kernel_maxw_kernel_maxr_  ro   r  r  r  r  r  r  ri  rV   rvrJ   r  rK   _adaptive_avg_pool2d  sT    


r  c                    s~   |\}|\}}dd }dd }	t j|||dt j|	||d t j||dt j|	|d fdd}
|
S )Nc                 S   s   t | | |S rR   r  r  rJ   rJ   rK   r  =  s    z._adaptive_pooling_idx_max.<locals>.start_indexc                 S   s   t | d | | d |S r*  r  r  rJ   rJ   rK   r  @  s    z,_adaptive_pooling_idx_max.<locals>.end_indexr  c                    s   | ^ }}}|} |}|}|}d }d }	t td td D ]\}
}||
|g||g||g}t||
  | | tj}r|	d kr|}	ntt||||	}	|d kr|}qPt	||}qPr|	S |S d S r,  )
r   rK  r   r=   r  r]   r  r2  ro  r  )rW  r`  rq  rr  r  r  r  r  rs  rt  r\  rw  rC  r   r  r  r  r  rp  r  r  r  rJ   rK   fn_maxH  s8    $ z)_adaptive_pooling_idx_max.<locals>.fn_max)r   r   )r  in_sizes	out_sizesrp  r  r  r{  r}  r  r  r  rJ   r  rK   _adaptive_pooling_idx_max7  s    "r  c              	   C   s  t | tstt|dkst|   |  ^ }}}tjj	|}tjj	|}|\}}|dksj|dkr|||f}t
||  |  dt
|tj|  dfS || dkr(|| dkr(|| || g}t|ddgdrt| |S t| ||ddgddgdd\}	}
t|
|d ||ddgd}|	|fS t|| d |}t|| d |}t|||g }|  }|| }|d	krt| |S t||g||g||gdt| td
d}t||g||g||gdt| td
d}tj|  |||d}tj|  tj||d}||fS )Nr    r   rZ  r"   )rh  F)rt  r;  rh  rd  )r;  rf  rn  )r  r  r  rp  r  Tr   )rX   r1   rr   r   r%  r   r>   r   r   r  r^  rt   r   r]   r  rj  max_pool2d_with_indicesr  r  r4   rY   fallback_adaptive_max_pool2dr  r  r   r.   r   )rP   r  r&  r  r  r{  r}  r  rc  rV  r  r   r  r  r_  ro   ri  Zinner_func_max_valZinner_func_max_idxr  rirJ   rJ   rK   adaptive_max_pool2dr  s      

    

r  c                    sN      t  d    fdd}|S )Nr"   c                    s   | f}t | }t   }t || | t ||  }t |tj}t |t d tj}t ||t  tjS r*  )	r=   r  rt   rL  r   r]   r  r  r2  )r`  r   sampleZi_exprZ
alpha_exprZseq_ir  r   r   in_sz	kernel_szout_szsamplesZsamples_loaderrJ   rK   r     s    z)_fractional_pooling_offsets.<locals>.load)r   r   )r  r  r  r  r   r   rJ   r  rK   _fractional_pooling_offsets  s    r  c                    s   |    |  ^ }\}}|\}}|| dkrBt| ||S tjt|g|d}	|	dd |	dd|   fdd}
t|||g }tj	| 
 |  tj|
dd	|d
}tj	| 
 tjtj|
dd	|d
}||fS )Nrf  )r  r  r  r  r   r  r"   c              	      s   | ^ }}}t  ||}t ||}d }d }ttd td D ]\}	}
|||	 ||
 f}|rt ||	  | |
 tj}|d kr|}n$t t 	t 
||t |||}|d kr|}qTt ||}qT|r|S |S d S r,  )r=   r  r   rK  r   r  r]   r  r2  or_ro  rI  r  )rW  rp  r`  rq  rr  r  r  rs  rt  r\  rw  rC  r   Z
h_index_fninp_hinp_wrc  Z
w_index_fnr]  rJ   rK   rV     s2    $   z!fractional_max_pool2d.<locals>.fnFry  r   T)r%  r   fallback_fractional_max_pool2dr   r   r  r   rY   r.   r   r   rt   r]   r  )rP   rc  r  Zrandom_samplesr&  kernel_hkernel_wr{  r}  Zgen_offsets_for_dimrV   r_  r  r  rJ   r  rK   fractional_max_pool2d  sH       

r  c                    s"      ^ }}}tjj|}tjj|}|^ }}}	|| dkrr||	 dkrrt|| ||	 gddS t||}
t||	}dd fdd}tj	||d}tj	|||d}tj	|	|d}tj	||	|d}t
|
|g||g||g  fd	d
}tj  |t|d}|S )Nr   r"   )divisor_overridec                 S   s   t | | t|S rR   )r   ru   r  r  rJ   rJ   rK   r  6  s    z0upsample_nearest2d_backward.<locals>.start_indexc                    s    | d ||S r*  rJ   r  )r  rJ   rK   r  9  s    z.upsample_nearest2d_backward.<locals>.end_indexr  c                    s    | t S rR   )r  r  )r  rP   rJ   rK   rV   H  s    z'upsample_nearest2d_backward.<locals>.fnr   )r%  r   r>   r   r   r  r  r4   r   r   r  r.   r   r   rt   rY   )rP   r  Z
input_sizer.  r/  r&  r  r  Zout_hZout_wr  r  r  r  r  r  r  rV   r  rJ   )r  r  rP   rK   upsample_nearest2d_backward$  s8    

r  rJ   c              
   C   s   t | ||||||ddS )Nr    r  _avg_poolndrP   rc  rt  r;  rd  count_include_padr  rJ   rJ   rK   r  ]  s    
r  c              
   C   s   t | ||||||ddS )Nrd   r  r  r  rJ   rJ   rK   
avg_pool3ds  s    
r  c                    s  	s	sdg t t 		t t| tsBttksRtt	ksbttksrtt|  d d fkst|   |  d   }|   d  t 	fddtD  \}	}
t	st	|
r
t
| dd
d}n|  
d	}t|t|	 }|  ttj}|d
krdkrVt}ndkrft}ntd || 	 |S 	fdd|r|r|rd| nd| 
fdd}n	
fdd}tj|  ||d}|S )Nr   r"   r    c              	      s"   g | ]}t | | qS rJ   )re  r   )rd  r8  rc  r;  rt  rJ   rK   rQ     s   z_avg_poolnd.<locals>.<listcomp>r  r  TFrf  rd   zUnknown dim: c                    s   | d   }|  d   d }t jfddtD  D ]F fddtD }|||}|d krv|}q<t||}q<|S )Nc                    s   g | ]}t  | qS rJ   )r   r   )rc  rJ   rK   rQ     s     z/_avg_poolnd.<locals>.fn_sum.<locals>.<listcomp>c                    s,   g | ]$} | |  |  |  qS rJ   rJ   r   )r   r\  r;  rt  rJ   rK   rQ     s     r  )rW  r  r`  r  r   rC  )r   rc  r;  rt  )r   r\  rK   r    s     z_avg_poolnd.<locals>.fn_sumr  c                    s   t | t  S rR   )r=   r   r  r  )ro   r  r  r]  rJ   rK   rV     s    z_avg_poolnd.<locals>.fnc           	         s   | d   }|  d  }g }t D ]x}|| |  |  }t||  | |  } st|d}t|| }t|| tj}|| q(t	
tj|}t| |S r   )r   ru   ZMinMaxr=   r  r]   r.  r   r   r6  r   r  )	rW  r`  rq  Zdivide_factorsr   hstarthendfactordivide_factor)r  r   r  r8  rc  r;  rt  r]  rJ   rK   rV     s     r   )r:   rX   r1   rr   r   r   r%  r   r   r   r^  r   rY   rt   r   r6  operatorr   fallback_avg_pool2dfallback_avg_pool3d
ValueErrorr.   r   r   )rP   rc  rt  r;  rd  r  r  r   r&  r{  Z
ceil_modeshad_paddingr_  ri  fallbackrV   r  rJ   )rd  r  r   ro   r  r8  rc  r;  r  rt  r]  rK   r    sr    




 




r  c                    s  d ksdkst ds s,ddgt| ts:t t|tsHt tdksXt tdksht tdksxt t| dkst |   | ^ }td|\}	}
td|\}}|  d pd p|
p||  ^ }	
t| }|	 }t
fddtd d D t
fddtd d D  }|d	krt| ||S fd
d  	
fdd}tj|  |||d}|S )Nr   zdivisor must be not zeror    rx  r"   c                 3   s:   | ]2}t |d   t d | d   d    dV  qdS r  r  r7  r  rJ   rK   rW   (  s   z&avg_pool2d_backward.<locals>.<genexpr>c                 3   s:   | ]2}t |d   t d| d   d    d V  qdS r  r  r  r  rJ   rK   rW   ,  s   rf  c              	      sX  t d tj}t d tj}t d tj}t d tj}t d tj}t d tj}t t | ||}t t |||}	t t ||t t  tj|}
t t |	|t t tj|}t 	|t dtj}t 	|	t dtj}	t |
t  tj}
t |t tj}t t |
|t ||	}|S )z{
        This computes the scaling factor that we will divide an element
        by when `count_include_pad=False`
        r   r"   )
r=   r  r]   r.  r  r   r  r\   r  r  )r  r  Zstride_hZstride_wZpad_hZpad_wr  r  r  Zwstartr  Zwendr  )r  rc  r;  rt  r  rJ   rK   !compute_pool_size_without_padding?  s,    

z>avg_pool2d_backward.<locals>.compute_pool_size_without_paddingc                    s\  | ^ }}}|d  }|d  }t t|d  
d  
d tj}t t|d  
d  
d tj}t t|
d d tj}t t|
d d tj}t |t dtj}t |t dtj}t |t tj}t |t 	tj}d }tD ]6}	tD ]$}
t 	|t |	tj}t 	|t |
tj}d k	r`}n(sls~d d  }n
 ||}t 
|t jt |t |t dtjddt jt |t |t dtj	ddf|}t t ||t ||}|d kr,t ||t dtj}nt |t 	|||}qq|d k	sXt|S )Nr   r"   Fr  r  )r=   r  r   r]   r.  r  r  r  r   r\   r  r  r  r  r  r2  r  rr   )rW  r`  r8  rz  r  r  r  r  r  r  r  r  r  r  partr  )r  r  r  r<  r  r  rc  r;  r  r  rt  r  rJ   rK   rV   [  sv      

  


zavg_pool2d_backward.<locals>.fnr   )rr   rX   r1   r   r   r%  re  r   rY   rt   r  r   fallback_avg_pool2d_backwardr.   r   r   )rL  rP   rc  rt  r;  rd  r  r  r&  r{  r|  r}  r~  r   r_  ro   ri  rV   r  rJ   )r  r  r  r<  r  r  r  rc  r;  r  r  rt  r  r  rK   avg_pool2d_backward  s^    
"Ar  c                 C   s   |   }t|tr|g}n|s*tt|}t|dkrTt|dksPtd| g S t|}tt|D ]j}|| dk r||  t|rt|nd7  < d||   krt|k shn t|dkr|| dkshtqhtt|t|kstd|S )Nr   )rJ   r  r  zinvalid axis: r"   zreduction axis not unique)	r   rX   rp   r   r   rZ   rr   rY   r[   )rP   r  rs  r   rJ   rJ   rK   _validate_reduction_axis  s    
 <r  c          
         s   |d k	rt | |} |  tt| |}g }g g }g ttD ]>}||krj| ||  qD| ||  qD fdd}rt}	D ]}t	d|	|< qn|}	| 
  t|  |p|  |  ||	|dS )Nc                    s   t |t kstr>t  t ks,t fddD  t  t ksRtd gt  t |  }tt t|D ]\}}|||< q|S )Nc                    s   g | ]} | qS rJ   rJ   r   r   rJ   rK   rQ     s     z9_make_reduction_inner.<locals>.loader.<locals>.<listcomp>)r   rr   r   chainr   )r   Zreduction_indexr  rW  varZinner_loaderkeepdimsZkept_idxZreduced_idxrs  r   rK   r    s     
z%_make_reduction_inner.<locals>.loaderr"   )r   	dst_dtyper  r   r   reduction_ranges)r   r   r[   r  r   r   r   rY   ru   r   r   r   r   rt   )
rP   r  r  ro   r   Z
kept_sizesZreduced_sizesr   r  r_  rJ   r  rK   _make_reduction_inner  s:    



r  )r  c                    s   dd d fdd}|S )NFrn   c                   sB   t | ||| d}tjf | d|}t|jjtr>|  |S )Nr  r  ro   r   )r  Z
input_node)r  r/   r   rX   r   r   )rP   r  r  ro   r   ry  r   r  rJ   rK   r     s     zmake_reduction.<locals>.inner)NFrJ   )r  r   r   rJ   r  rK   make_reduction  s    r  c                C   sJ   |d k	rt | |} |  }t| |}t|  |  f|  f|  |dS )N)r   dtypes	inner_fnsrs  r  )r   r   re  r   r   rt   r   )rP   r  ro   rs  rJ   rJ   rK   _make_scan_inner  s    

r  c                   s   |d k	rt | |} |   t| |}|  }|tjtjfkrHt | tj} t| ||}t	 fdd|D }t
||  |  }t|t| }t t|||S )Nc                 3   s   | ]} | V  qd S rR   rJ   r   rs  rJ   rK   rW     s     zmean.<locals>.<genexpr>)r   r   r  rt   r]   float16r  r   sum_r;   r%   r*   r   r)   r   rY   div)rP   r  keepdimro   Zoutput_dtype
sum_resultdenomrJ   r   rK   r  
  s    

r  c           
         s   |d krd}|    t| |}t| |dd}|r8|  tt| |}t|||}t fdd|D }|r|t	|| d}t
||  |  }t|t|  }t||}	|s|	fS |r|nt||}|	|fS )Nr"   T)r  c                 3   s   | ]} | V  qd S rR   rJ   r   r   rJ   rK   rW   (  s     z var_mean_sum_.<locals>.<genexpr>r   )r   r  r  r   squarer  r  r;   ru   r  r%   r*   rt   r   r)   r   rY   r  r@  )
rP   r  
correctionr  return_meanZx_meanZdiffsr  r  Zx_varrJ   r   rK   var_mean_sum_  s&    

r
  c                 C   sV   t | |}t| ||d d d}|d }t|d }t|tjoTt|tjk oTt|dkS )Nr  r   r  r"   )	r  r  r;   rX   ru   r   rp   r#   Zunroll_reductions_threshold)rP   r  r  r   r   Zreduction_numelrJ   rJ   rK   use_two_step_variance5  s    
    
r  c                   s    d krd t | ||d d d}|d}|d |d tjjf |fd|  d|\}}}	|  |  |  t| |}t	fdd	|D d
d  fdd}
t
|
|}|r|  ||fS |fS )Nr"   r  r   r  r  Zwelford_reduce)r  r  ro   c                 3   s   | ]} | V  qd S rR   rJ   r   r   rJ   rK   rW   [  s     z$var_mean_welford_.<locals>.<genexpr>c                 S   s4   t | tjr(| js(tt| tj|S t	| |S rR   )
rX   ru   rv   Z	is_numberr=   r   r  r]   r  r  r  rJ   rJ   rK   get_constant_or_index_expr]  s    z5var_mean_welford_.<locals>.get_constant_or_index_exprc                    s4    }}t d}| t |||  S r   )r=   r  r  )r   cNzero)r  ro   r  rnumelrJ   rK   r"  b  s    

z#var_mean_welford_.<locals>.scale_fn)r  rB  r%   ZWelfordReductionr   rt   r   r   r  r;   r   )rP   r  r  r  r	  r   r  r  m2r   r"  r  rJ   )r  ro   r  r  rs  rK   var_mean_welford_E  s>        



r  c                   sx   |    t }t| |dd} t| ||||d}t| ||drHtf |ntf |}t fdd|D }|st|d S |S )NFr
  )rP   r  r  r  r	  )r  r  c                 3   s   | ]}t | d dV  qdS )Fr
  Nr  rN   Z	out_dtyperJ   rK   rW     s     z#var_mean_helper_.<locals>.<genexpr>r   )rt   r   r   r   r  r
  r  rZ   )rP   r  r  r  r	  Zcompute_dtyper   r   rJ   r  rK   var_mean_helper_p  s     	r  )r  r  c                C   s   t | |||ddS )NFr  r  r  r	  r  rP   r  r  r  rJ   rJ   rK   var_  s        r  c                C   s   t | |||ddS )NTr  r  r  rJ   rJ   rK   var_mean  s        r  c                 C   st   |dk rt t| | |S |dkr0td|S |dkr<| S t | |d |}t||}|d dkrpt|| }|S )Nr   r"   r    )pow_recursiver=   r  r  r   )rP   r   ro   ry  rJ   rJ   rK   r    s    r  c                 C   s   t | |S rR   )r=   powr   r   rJ   rJ   rK   
pow_native  s    r  )r   c                    sd  t tr$tkr$t tS t tr>dkr>t S t trXdkrXt S tdd  fD }t|}t tod  k odk n  p|odk}|rވ   fdd	}t	j
    |  d
S t  tr dk rtdS  dkrt rtS |rZt  tr:t S t trPt S t S t S )Nr!  r"   c                 s   s"   | ]}t |tjr| V  qd S rR   )rX   r%   r1   rt   rN   rJ   rJ   rK   rW     s      zpow.<locals>.<genexpr>i    r   c                    s   t |   S rR   )r  rt   r  r   r   r  rJ   rK   rV     s    zpow.<locals>.fnr   r    )rX   r   rp   r  sqrtr  r   r   r   r.   r   r   rt   r   r   rG  r   exp2fallback_pow_scalarfallback_pow_tensor_scalarfallback_pow_tensor_tensorr  )r   r   ro   Zis_integer_powZembed_exponentrV   rJ   r  rK   r    s@    
"




r  c                 C   s   t | tr| j}n| }t |tr&|j}t |tjsftj|  |  |	 | 
 dj}t |tjsftt |tjr| s| st |jtjs|  |j|_| S tjj|||d | S )Nr   unsafe_alias)rX   r1   r   r%   r  r.   r   r   rt   r   r   rr   Zis_input_bufferZis_module_bufferZ	NopKernelr   r  Zrealize_into)changedrC  r&  Zchanged_datarJ   rJ   rK   r    s:    

  r  c                 C   s   t | t| |S rR   )r  rG  )rP   r  rJ   rJ   rK   fill_  s    r(  c                 C   s4   t ||  }t||  }t||  }t| |S rR   )r   r   r   rt   r   r   r  )r  r  r  rJ   rJ   rK   r    s    r  c                 C   s   t | |S rR   )r=   floordivr  rJ   rJ   rK   r)    s    r)  c                 C   s   t | |S rR   )r=   truncdivr  rJ   rJ   rK   r*    s    r*  c                 C   s   t | ot |}t| ot|}|dkrP|r4td|rBt| |S tt| |S |dkr|rdtd|rrt| |S tt| |S t| |S )NrL  z5floordiv operands can not be boolean at the same timerN  z5truncdiv operands can not be boolean at the same time)ry   r{   rr   r)  rL  r  r*  rN  )r   r   Zrounding_modeZboth_integerZboth_booleanrJ   rJ   rK   div_mode  s    r+  c                 C   s<   t | ot |}|rt| |S ttjj}t|| |S d S rR   )r{   logical_andr,   r  r   r"  r   )r   r   Z	both_boolrV   rJ   rJ   rK   r   )  s
    
r   r  c              
   C   s   t | tjrt| jS t | tjr.t|  S t | tjr>| S t | tjsNdS t	j
j|  }t|0 ttjdd | j|   }W 5 Q R X W 5 Q R X t |t	j
jjstt |jtjr|jS dS )z:Try convert an arbitrary IR node into an ir.Constant valueNZallow_indexingT)rX   r%   Z
MutableBoxget_constant_valuer   rm  rn  r   ZLoopsr]   Z	_inductorZops_handlerZExtractConstantsHandlerr   r>   Zset_ops_handlerr   objectr+  r   Zinner_fn_argsvirtualizedZOpsValuerr   r   )rP   r  r   rJ   rJ   rK   r-  3  s&    
  "r-  c                 C   s|   t dd | |fD }|r$t| |S t| }d k	rf|jdkrRttd|j}n
d|j }t| |S dd }t|| |S )Nc                 s   s   | ]}t |pt|V  qd S rR   )r{   ry   rN   rJ   rJ   rK   rW   T  s     zdiv_prim.<locals>.<genexpr>r   infr  c                  W   s
   t j|  S rR   )r=   r  r   rJ   rJ   rK   rV   a  s    zdiv_prim.<locals>.fn)	r   r*  r-  r   mathcopysignr   r   r   )r   r   is_integralZdivisorr  rV   rJ   rJ   rK   div_primR  s    



r4  c                 C   s    t | |ftjd\} }t| |S r  )r   r   INT_TO_FLOATr4  r  rJ   rJ   rK   r  g  s
     
r  c                 C   s4   t | pt| }|rdd }ndd }t|| |S )Nc                 S   s   t | |S rR   )r=   modr  rJ   rJ   rK   rV   y  s    zfmod.<locals>.fnc                 S   s   t | |S rR   )r=   fmodr  rJ   rJ   rK   rV   ~  s    )r{   ry   r   )r   r   r3  rV   rJ   rJ   rK   r7  s  s
    
r7  c                 C   s:   |   }t|st|r&t| t } dd }t|| S )Nc                 S   s
   t | S rR   )r=   rsqrtrx   rJ   rJ   rK   _rsqrt  s    zrsqrt.<locals>._rsqrt)rt   r   r   r   r]   r  r   )rP   ro   r9  rJ   rJ   rK   r8    s
    r8  c                C   sB   t |  st|  r&|d kr&tj}td|d}|| |||dS )Nr  r  rn   r   rt   r   r]   r  r  rP   r  r  ro   rV   rJ   rJ   rK   r    s    

r  c                 C   s   t |  st|  r&|d kr&tj}t|  dkr\|dksBt|pL|  }t| |ddS dd }t	| ||d}t
jjf |d|i\}|d krt| ||d	S |S )
Nr   r   r   Tr
  c                 S   s   | \}|\}t ||fS rR   )r=   r\   a_tupleb_tupler   r   rJ   rJ   rK   
combine_fn  s    zcumsum.<locals>.combine_fnr  ro   r@  r   ro   )r   rt   r   r]   r  r   r   rr   r   r  r%   Scanr   fallback_cumsumrP   r  ro   r@  r   ry  rJ   rJ   rK   cumsum  s"    

rF  c                 C   s   t |  st|  r&|d kr&tj}t|  dkr\|dksBt|pL|  }t| |ddS dd }t	| ||d}t
jjf |d|i\}|d krt| ||d	S |S )
Nr   r<  Tr
  c                 S   s   | \}|\}t ||fS rR   )r=   r   r=  rJ   rJ   rK   r@    s    zcumprod.<locals>.combine_fnrA  r@  rB  )r   rt   r   r]   r  r   r   rr   r   r  r%   rC  r   fallback_cumprodrE  rJ   rJ   rK   cumprod  s"    

rH  c                 C   sr   dd }|   }t|  dkr4|dks,tt| S t| ||d}tjjf |d|i\}|d krnt	| |dS |S )Nc              	   S   s\   | \}|\}t ||}t ||}||kt | B }t |t t || | |fS rR   )r=   r  r  rE  r2  log1pexp)r>  r?  r   r   Zmin_vZmax_vr  rJ   rJ   rK   log_add_exp_helper  s    z(logcumsumexp.<locals>.log_add_exp_helperr   r<  rA  r@  r  )
rt   r   r   rr   r  r  r%   rC  r   fallback_logcumsumexp)rP   r   rK  ro   r   ry  rJ   rJ   rK   logcumsumexp  s    rM  c                 C   s   t |  dkr2|dkstt| t| tjdfS |  }tj	d|dd}|tj
krXdn|jrjt|jn
t|j}t| ||d}|tjf|d< |  d	d
 f|d< tjjf |d|i\}}|d krt| |dS ||fS )Nr   r<  rn   argmaxFro   Zarg_break_ties_leftrA  r  c                 S   s   dS NrindexrJ   r8  rJ   rJ   rK   r~    r  zcummax.<locals>.<lambda>r  r@  r  )r   r   rr   r  r  r]   r  rt   r%   get_reduction_combine_fnrz   r  r  minr  r  r   rC  r   fallback_cummax)rP   r  ro   r@  Z	min_valuer   r   r   rJ   rJ   rK   cummax  s,      

rU  c                 C   s   t |  dkr2|dkstt| t| tjdfS |  }tj	d|dd}|tj
krXdn|jrjt|jn
t|j}t| ||d}|tjf|d	< |  d
d f|d< tjjf |d|i\}}|d krt| |dS ||fS )Nr   r<  rn   argminFrO  TrA  r  c                 S   s   dS rP  rJ   r8  rJ   rJ   rK   r~    r  zcummin.<locals>.<lambda>r  r@  r  )r   r   rr   r  r  r]   r  rt   r%   rR  rz   r  r  r  r  r  r   rC  r   fallback_cummin)rP   r  ro   r@  Z	max_valuer   r   r   rJ   rJ   rK   cummin  s,      

rX  c                C   sB   t |  st|  r&|d kr&tj}td|d}|| |||dS )Nr  r  rn   r:  r;  rJ   rJ   rK   r  $  s    

r  c                 C   s   t | tj} td| ||dS )Nr   r  r  )r   r]   rz   r  rP   r   r  rJ   rJ   rK   
reduce_any/  s    r[  c                 C   s2   |d k	r$t | ||dt| ||dfS t | d |dS NrY  )reduce_amaxreduce_argmaxrZ  rJ   rJ   rK   
reduce_max5  s
    r_  c                 C   s2   |d k	r$t | ||dt| ||dfS t | d |dS r\  )reduce_aminreduce_argminrZ  rJ   rJ   rK   
reduce_min@  s
    rb  xor_sumr  rS  rN  r  rV  
logical_or)r   r   c                 C   s   t | |tj|dS )Nr#  r}   r   r%  r   r5  )r   r#  r   rJ   rJ   rK   register_pointwise_numericZ  s    rg  c                 C   s   t | tjddS )NT)r}   r$  rf  r   rJ   rJ   rK    register_pointwise_numeric_ldf64c  s
    ri  r0  logical_not)r   )r}   r   r   identity)pointwise_overrides_datac                 #   s~   t |  t|  jd }|d kr"d S  fdd}t|tjjrh| D ] }t||}| j||fV  qDn| j||fV  d S )Nc                    s    j d krt| S d S rR   )Ztritonr  rh  r  rJ   rK   make_triton_fallback  s    
z6_get_pointwise_overrides.<locals>.make_triton_fallback)	rl  rS   r#  rX   r]   r^   r_   ra   r}   )nsr#  r   rm  Zolnamer"  rJ   r  rK   _get_pointwise_overrides  s    
ro  re  c                    s,   | t |< t|   fdd}t| | d S )Nc                     s>    | |}g }t | d |D ]\}}|t||dd q|S )Nr   Tr%  )r   r   r  )r   r   resultsZmut_resultsr   ry  outplace_oprJ   rK   rV     s
    
z$register_foreach_inplace.<locals>.fn)rE   rD   r\   r   )aten_opZoutplace_aten_oprr  rV   rJ   rq  rK   register_foreach_inplace  s    
rt  c                    s   t | d d fdd}|S )Nr|   c                     s*    | |}t || d  }t| d |S r   )r   rt   r  )r   r   ry  rq  rJ   rK   rV   7  s    
zregister_inplace.<locals>.fn)r   )rs  rr  rV   rJ   rq  rK   register_inplace6  s    
ru  c                 C   s   d S rR   rJ   )r   rS  r  rJ   rJ   rK   sym_constrain_range`  s    rv  c                 C   s&   t jjjd }t|tjst|jj	S rf  
r>   r   r   r  rX   r]   r  rr   r   r  r   r   rC  rJ   rJ   rK   sym_sizee  s    ry  c                 C   s&   t jjjd }t|tjst|jj	S rf  rw  rx  rJ   rJ   rK   
sym_stridev  s    rz  c                 C   s   |   S rR   )r  )r   rJ   rJ   rK   	sym_numel~  s    r{  c                 O   s   t dd S )NzHelpful for debuggingrF   )r  r   r   rJ   rJ   rK   foobar  s    r|  c                 C   s   |    t| S rR   )r   r  rx   rJ   rJ   rK   _realize  s    r}  c                 C   s   |    t| | | S rR   )r   r%   ZResizeStorageBytes)variabler_  rJ   rJ   rK   resize_storage_bytes_  s    r  c                 C   s"   |    |   tt| |S rR   )r   r1   r   r%   ZSetSourceTensorKernel)r  Zsource_tensorrJ   rJ   rK   set__source_tensor  s    r  c          	         sx  t | tstt |ttfs t|d kr.tj}|tjkrFtd| |tj	kr`t
|dks`t|tjkrzt
|dkszt|  |  }|  }t | jtjr| j | _t rtjjjrt|rtdqt|rt|jqdndtjjdrt |||dS t!| gd	g}|"  tj#$||}t%||||&  fd
d}t'j(|||t|d}|S )Nzunsupported memory format: re   rf   nanTr  r   rZ  r"   c                    sH   |  t  tj}t tj}t ||}t | fddS )Nc                      s
    gS rR   rJ   rJ   )
flat_indexflat_loaderrJ   rK   r~    r  z*resize.<locals>.inner_fn.<locals>.<lambda>)r=   r  r]   r  r  r  )rW  Zflat_index_exprlimitr  r  Z	old_numelZout_indexerZuninitalized_val)r  rK   r     s
    zresize.<locals>.inner_fnr   ))rX   r1   rr   rY   rZ   r]   Zcontiguous_formatZpreserve_formatr!  Zchannels_lastr   Zchannels_last_3dr  rt   r   r   r%   rm  rn  r  utilsr  Zfill_uninitialized_memoryr   r   r   r  r  r>   r   r   Zstatically_known_equalsr  rw  r   r+  Z stride_ordered_for_memory_formatrq  r-  r.   r   )	rP   rs  r  ro   r   Zx_flatZ
out_strider   r   rJ   r  rK   resize  sZ    



	   r  )auto_functionalizedc                 C   s<   ddl m} ||}tj| |||d dd | D S )Nr   )kernel_side_table)
kernel_idxgridZkernel_argsc                 S   s    i | ]\}}t |tr||qS rJ   r   )rO   rv  rC  rJ   rJ   rK   rl    s     
  z'triton_kernel_wrap_.<locals>.<dictcomp>)*torch._higher_order_ops.triton_kernel_wrapr  Zget_constant_argsr%   ZUserDefinedTritonKernelr   )r  constant_args_idxr  r   r  Zconstant_argsrJ   rJ   rK   triton_kernel_wrap_  s    
r  c           
      C   s   i }|  D ]r\}}t|tjrv|j}d}	t|tjrPt|tjsHd}	qP|j}q*|	rfttj|}||krvt	|}|||< qt
| |||dS )NFT)r  r  r  r   )r   rX   r%   r1   r   rm  rr  rH  r  r  r  )
r  r  r  r   Ztensors_to_cloneru  r#  r   rP   Zhas_non_rv_viewsrJ   rJ   rK   triton_kernel_wrap  s*    
r  c                 C   sh   t | sttt |rFd}tjjjdd  }r>| d| }|tj_t	j
| |||}tttj|S )Nz"control flow operator: torch.cond.r  r  )r+   r   r   r>   r   r   r  r@  r  r%   ZConditionalr   rY   r1   )predZtrue_fnZfalse_fnZoperandsrI   r  ry  rJ   rJ   rK   rH     s    rH   c                 C   sd   t tt|| rBd}tjjjdd  }r:| d| }|tj_t	j
| |||}tttj|S )Nz(control flow operator: torch.while_loop.r  r  )r   r   r+   r>   r   r   r  r@  r  r%   Z	WhileLoopr   rY   r1   )Zcond_fnZbody_fnZcarried_inputsZadditional_inputsrI   r  ry  rJ   rJ   rK   
while_loop   s    r  )r@  r   c                    s   ddl m m}  fddt||D }|| |fdd}t|d |d d}td	d
 |D |d< tdd
 |D |d< tjj	f |d|i}|d d krt
d|S )Nr"   )InputDescriptorlower_pointwise_subgraphc                    s    g | ]} |  | d qS )rZ  )rt   r   rN   )r  rJ   rK   rQ   0  s   z$associative_scan.<locals>.<listcomp>c                    s    t | t | S rR   )r  r  )lhsrhs)lowered_combine_fnrJ   rK   wrapped_combine_fn6  s    z,associative_scan.<locals>.wrapped_combine_fnr   rA  c                 s   s   | ]}|  V  qd S rR   r  rN   rJ   rJ   rK   rW   =  s     z#associative_scan.<locals>.<genexpr>r  c                 s   s   | ]}|  V  qd S rR   r   rN   rJ   rJ   rK   rW   >  s     r  r@  z/Unable to generate code for associative_scan op)Zsubgraph_loweringr  r  r   r  r  rZ   r%   rC  r   r!  )r@  r   r   r  Zsubgraph_inputsr  r   ry  rJ   )r  r  rK   associative_scan,  s    


r  c                 C   s   d S rR   rJ   )tokensrJ   rJ   rK   _sink_tokensE  s    r  c                 O   s   t jj|f||}ddlm} ||||}|d k	s8ttjj| }|d krR|fS t	
t jtj|}t|ttfsz||fS |f|S d S )Nr   )get_effect_key)r%   ZEffectfulKernelr   Ztorch._higher_order_ops.effectsr  rr   r>   r   Zeffectful_opsr  ra  ZMultiOutputr1   rX   rY   rZ   )tokenr   r   r   ry  r  Zeffect_typeZeffectful_kernelrJ   rJ   rK   with_effectsJ  s    r  c                 C   s"   t | } tjtjj| || | S rR   )r  r%   _CollectiveKernelcreate_inplace_c10d_functionalall_reduce_r  r   	reduce_op
group_namerJ   rJ   rK   _all_reducec  s       r  c                 C   s   t jtjj| || | S rR   )r%   r  r  r  r  r  r  rJ   rJ   rK   _all_reduce_k  s       r  c                 C   s(   dd | D } t jtjj| || | S )Nc                 S   s   g | ]}t |qS rJ   )r  r{  rJ   rJ   rK   rQ   t  s     z)_all_reduce_coalesced.<locals>.<listcomp>r%   r  r  r  all_reduce_coalesced_r  r   r  r  rJ   rJ   rK   _all_reduce_coalescedr  s    r  c                 C   s   t jtjj| || | S rR   r  r  rJ   rJ   rK   _all_reduce_coalesced_}  s    r  c                 C   s   t jt jtjj| ||S rR   )r%   r1   r   r  create_out_of_placer  all_gather_into_tensorr  )r   
group_sizer  rJ   rJ   rK   _all_gather_into_tensor  s    r  c              	   C   s"   t tjjtjtjj	| ||S rR   )
r  r  r%   r1   r   r  r  r   all_gather_into_tensor_coalescedr  )r   r  r  rJ   rJ   rK   !_all_gather_into_tensor_coalesced  s    r  c              	   C   s    t jt jtjj| |||S rR   )r%   r1   r   r  r  r  reduce_scatter_tensorr  )r   r  r  r  rJ   rJ   rK   _reduce_scatter_tensor  s    r  c              
   C   s$   t tjjtjtjj	| |||S rR   )
r  r  r%   r1   r   r  r  r  reduce_scatter_tensor_coalescedr  )r   r  r  r  rJ   rJ   rK    _reduce_scatter_tensor_coalesced  s    r  c              	   C   s    t jt jtjj| |||S rR   )r%   r1   r   r  r  r  all_to_all_singler  )r   Zoutput_split_sizesZinput_split_sizesr  rJ   rJ   rK   _all_to_all_single  s    r  c                 C   s"   t | } tjtjj| || | S rR   )r  r%   r  r  r  
broadcast_r  r   r  r  rJ   rJ   rK   
_broadcast  s       r  c                 C   s   t jtjj| || | S rR   )r%   r  r  r  r  r  r  rJ   rJ   rK   _broadcast_  s       r  c                 C   s   t jtjj|  | S rR   )r%   Z_WaitKernelZcreate_waitr  wait_tensorr  )r   rJ   rJ   rK   _wait_tensor  s    r  c              	   C   s$   t jt jtjjjj	| |||S rR   )
r%   r1   r   r  r  r]   r=   _dtensorshard_dim_alltoallr  )r   Z
gather_dimZ	shard_dimr  rJ   rJ   rK   _shard_dim_alltoall  s    
r  zRInductor support for distributed collectives depends on building torch.distributedr  )quantized_lowerings)mkldnn_lowerings)NN)NNNNFN)F)F)F)N)N)N)r   r   rb  r"   T)N)N)N)r   )r   )r   r   r"   )r   r   r"   )r   r   r"   )r   T)r   )r   )r   )r   )T)N)N)T)NT)F)r   NNr"   )NN)F)r   FF)F)F)F)F)N)r    F)N)N)NN)NN)NNN)NNN)r   )Nr  N)F)r  )NNNN)rJ   r   FTN)rJ   r   FTN)N)N)NF)N)N)F)F)N)NF)NN)NN)N)N)NF)NF)NF)NF)NN)NN(  r   r   loggingr1  r  r  r  collectionsr   typingr   r   r   r   r   r   r	   r
   Zunittest.mockr   ru   r]   Z$torch.ao.quantization.fx._decomposedZtorch.fxZtorch.utils._pytreer  Z_pytreer  Z(torch._higher_order_ops.associative_scanr   r  r   r   Ztorch._prims_commonr   r   r   r   r   r   r   r   r   r   r   Ztorch.fx.experimental.sym_noder   r   Ztorch.utils._sympy.functionsr   r   r   r   Z_dynamo.utilsr!    r#   r$   r%   r&   decompositionr'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r/  r=   r>   	getLoggerr"  r  r?   r^   r   __annotations__r@   r[   rA   r  Ztr_c10dr  rB   rC   rD   r   rE   Zquantized_decomposedrL   rM   rc   rw  r  r  ZbmmZconvolutionZconvolution_backwardr  r  mmr1  r2  Z_int_mmr  r  Zint16r.  r  r  r  r   Z	complex32Z	complex64rz   r  rq   rp   rs   ry   r{   r   r   r   r   r   r   r   r   r   r   r   ro   r   r  r  r  r  r  r   r   Z
device_putr!  r%  r/  r1  r2  r   aliasdetachZdetach_ZliftZview_ofr8  r~   r9  r@  rA  rD  rE  rI  rJ  rL  rM  r  rN  r   rX  rY  r`  Z_unsafe_viewZreshapera  slicerj  rx  rz  r  Zquantize_per_channelr  Zdequantize_per_channelr  Zquantize_per_tensorr   r  Zdequantize_per_tensorr  r%  r  r  r  r  r  r  r  r  r  r  r  rT  r  re  r  r  	lru_cacher  r  r  r  r  r	  r
  r  r#  r&  Zrngprimsr/  r0  r4  Z	bernoullir1  r6  r9  r<  r=  r'  rC  r?  rA  rF  rE  rD  randintZforce_stride_orderrK  r.  rM  rQ  rN  Zlookup_seedrR  randomr  rV  rZ  r_  rc  rd  r  rw  ZFALLBACK_ALLOW_LISTr  Z_adaptive_avg_pool3dZadaptive_max_pool3dZfractional_max_pool3dZmax_pool3d_with_indicesuniformZexponentialZ_pdist_forwardZsoft_margin_loss_backwardZsearchsortedZ_cdist_forwardZ_cdist_backwardZmax_unpool2dZmax_unpool3dZ
_trilinearZsegment_reduceZ_segment_reduce_backwardZhistcZ	histogramZbin_ctZ_histogramdd_bin_edgesZ_histogramdd_from_bin_ctsZaddbmmZaddmvZ_addmm_activationZ
_cudnn_rnnZ_cudnn_rnn_backwardZ_embedding_bagZ_embedding_bag_forward_onlyZ_embedding_bag_dense_backwardZ*_embedding_bag_per_sample_weights_backwardZ_fused_moving_avg_obs_fq_helperZ*_fused_moving_avg_obs_fq_helper_functionalZavg_pool3d_backwardZ max_pool3d_with_indices_backwardZ_adaptive_avg_pool2d_backwardZ_adaptive_avg_pool3d_backwardZadaptive_max_pool2d_backwardZadaptive_max_pool3d_backwardZfractional_max_pool2d_backwardZfractional_max_pool3d_backwardZreplication_pad1d_backwardZreplication_pad2d_backwardZupsample_linear1d_backwardZupsample_bicubic2d_backwardZupsample_trilinear3d_backwardZgrid_sampler_2d_backwardZ_pdist_backwardsortZstableZkthvalueZtopkrT  ZmedianZ	nanmedianZrandpermZresize_Z
resize_as_Z_linalg_detZlinalg_householder_productZlinalg_inv_exZlinalg_ldl_factor_exZlinalg_ldl_solveZ	linalg_luZlinalg_lu_factor_exZlinalg_lu_solveZlinalg_matrix_expZ	linalg_qrZ_linalg_slogdetZ_linalg_solve_exZlinalg_solve_triangularZ_linalg_svdZ	lu_unpackZormqrZ_linalg_check_errorsZlinalg_pinvZatol_rtol_tensorZ_linalg_eighZtriangular_solveZlinalg_cholesky_exZcholesky_inverseZcholesky_solveZgeqrfZ_fft_r2cZnonzerogcdZ_thnn_fused_lstm_cellZ_primsZ	rng_primsZrun_and_save_rng_stateZrun_with_rng_stateZmasked_scatterZmasked_scatter_backwardr  ZangleZ_efficientzerotensorZ(_sparse_coo_tensor_with_dims_and_tensorsZ	to_sparseZ
_to_sparser   r  Z'_scaled_dot_product_efficient_attentionZ0_scaled_dot_product_efficient_attention_backwardZ#_scaled_dot_product_flash_attentionZ,_scaled_dot_product_flash_attention_backwardZ+_scaled_dot_product_flash_attention_for_cpuZ4_scaled_dot_product_flash_attention_for_cpu_backwardZ_flash_attention_forwardZ_flash_attention_backwardZ_efficient_attention_forwardZ_efficient_attention_backwardZ
_scaled_mmZindex_reducer  r  r  r  r  r  r  r  Zscalar_tensorr  Z
LongTensorr  r  r  r  rG  r  r^  r  r3  r  r  Z
zeros_liker  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r'  r+  r-  r5  r6  r  Zreflection_pad1d_backwardZreflection_pad2d_backwardZreflection_pad3d_backwardrM  rO  rW  rv   rP  rQ  r[  r^  re  rj  rm  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r
  r  r  r  r  r  r  r  r  r  ZTensor_Tensorr$  ZScalarr"  ZTensor_Scalarr#  r  r(  r  r)  r*  r  r+  r   r  r   r-  r4  Ztrue_divider5  r5  r7  r8  r  r  rF  rD  rH  rG  rM  rL  rU  rT  rX  rW  r  r   r[  r  r_  rS  rb  rc  r  r]  r  r`  rN  r^  rV  ra  r\   rg  ri  rJ  r!  expm1Zrelur  r   r  r  cossinabsZbitwise_andZbitwise_left_shiftZbitwise_notZ
bitwise_orZbitwise_right_shiftZbitwise_xorlgammaerfZspecial_erfrI  tantanhr,  rj  rd  logical_xorr  r  Z	clamp_minZ	clamp_maxnegr  	remaindersignZsignbitZ	_neg_viewr>  r  r  ro  r  necoshsinhacosacoshasinasinhatan2atanatanhr2  erfcZerfinvhypotlog10log2Z	nextafterZcodegen.commonrl  ro  r#  r   r}   r   Z_foreach_addZforeach_add_listZforeach_add_scalarZ_foreach_mulZforeach_mul_listZforeach_mul_scalarZ_foreach_subZ_foreach_negZ_foreach_absZ_foreach_powZScalarAndTensorZ_foreach_divZforeach_div_listZforeach_div_scalarZ_foreach_sqrtZ_foreach_maximumZ_foreach_minimumZ_foreach_clamp_minZ_foreach_clamp_maxZ_foreach_reciprocalZ_foreach_signZ_foreach_copyrt  Z_foreach_add_Z_foreach_mul_Z_foreach_div_ru  Zadd_Zbitwise_and_Zbitwise_left_shift_Zbitwise_not_Zbitwise_or_Zbitwise_right_shift_Zbitwise_xor_Zmul_Zdiv_ZTensor_modeZlogical_and_Zlogical_not_Zlogical_or_Zlogical_xor_Zsub_Zrelu_Zsigmoid___and__
__lshift____or__
__rshift____xor____iand____ilshift____ior____irshift____ixor__rv  ry  rz  r{  r   methodfuncr|  Z_inductor_testr   r}  Zinductorr  set_Zsource_Tensorr  r  Z*torch._higher_order_ops.auto_functionalizer  r  r  Zhigher_orderrH   r  ZSubgraphr  r  r  Z)torch.distributed._functional_collectivesr  Z
all_reducer  r  r  Zall_reduce_coalescedr  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  ImportErrorr:  r   r  Zregister_quantized_opsZregister_woq_mm_opsr  Zregister_onednn_fusion_opsrJ   rJ   rJ   rK   <module>   st   (44,
  
	%4
.     
=I
,
















1
C2 , $  - )s6
	


		
%/
&







	
	

$    .


0
@



C8



,



   
   G&

Z  !!("k /  
   
     
  


`1( &    A !

  

E6 

M 

E      0 
 
        s 
	 $1
+

 

 
/#
	$











  
	 
 




    

  
  
 
   
 
   
 
 


@
 


	


