U
    zâhÙ^  ã                %   @   sÖ  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlZd dlmZ	 d dl
mZ d dlZd dlmZmZmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	l
mZmZmZ d
dl m!Z!m"Z" d
dlm#Z#m$Z$m%Z% e &e'¡Z(ej)j*Z*ej)j+Z+ej)j,Z,ej)j-Z-ee*j.e*j/e*j0e*j1e*j2e*j3e*j4e*j5e*j6e*j7e*j8e*j9e*j:e*j;e*j<e*j=e*j>e*j?e*j@e*jAe*jBe*jCe*jDe*jEe*jFe*jGe*jHe*jIe*jJe*jKee*jLe*jMe*jNe*jOjPe,jQg$ƒZReƒ eR–ZSe*jTe*jUjVe*jWe*jXe*jYe*jZe*j[e*j\j]e*j^e*j_e*j`gZaeeSeaƒ dd„ Zbebe*jcjdgƒdd„ ƒZeebe*jfjdgƒdd„ ƒZgebe*jhjVgƒdddœdd„ƒZhebe*jigƒed~dd„ƒƒZiebe*jjgƒdd„ ƒZjebe*jkjVgƒdd„ ƒZkebe*jlgƒdd„ ƒZlebe*jmjngƒddd„ƒZoebe*jpgƒed d!„ ƒƒZpebe*jqgƒed€d"d#„ƒƒZqebe*jrgƒed$d%„ ƒƒZrebe*jsjVgƒdd&d'„ƒZsebe*jtgƒd(d)„ ƒZtebe*jugƒdd*œd+d,„ƒZuebe*jvgƒd-d.„ ƒZvebe*jwe*jxgƒd/d0„ ƒZwebe*jyjVgƒdd1œd2d3„ƒZyebe*jze+jzgƒd4d5„ ƒZzebe*j{e+j{gƒd6d7„ ƒZ{ebe*j|ƒd‚d9d:„ƒZ|ebe*j}ƒdƒd;d<„ƒZ}ebe*j~gƒd=d>„ ƒZ~ebe*jgƒd8d?œd@dA„ƒZebe*j€jVgƒdBdC„ ƒZebe*j€j‚gƒdDdE„ ƒZƒej]eej„ ej„dFœdGdH„Z…ebe*j†ƒddddIœdJdK„ƒZ†ebe*j‡ƒddddIœdLdM„ƒZ‡ebe*jˆƒdddd8d8ej‰dNœdOdP„ƒZˆebe*jŠjVƒddddIœdQdR„ƒZŠebe*jŠj‹ƒddddIœdSdT„ƒZŒebe*jjVƒdUdV„ ƒZebe,jQjVƒej]ej]ej]ej]dWœdXdY„ƒZQebej)j,jŽƒdZd[„ ƒZebe*jgƒed„ej]ej]e‘e‘e’ej]d\œd]d^„ƒƒZebe*j“j”ƒd…d_d`„ƒZ•ebe*j–j”ƒd†dadb„ƒZ—ebe*j˜j”ƒdcdd„ ƒZ™e*jšjV ›ejœjjž¡ebe*jšƒej]ej]ejej] ejej] ejej] e’eŸeŸdeœdfdg„ƒƒZše   d¡dhdi„ ƒZ¡djdk„ Z¢ebe*j£ƒdldm„ ƒZ£ebe-j¤j¥ƒej]e‘e‘eŸej‚dnœdodp„ƒZ¦ebe*j§ƒd‡dqdr„ƒZ§ebe*j¨ƒdˆdsdt„ƒZ¨ebe*jWjVƒedudv„ ƒƒZWebe*j©ƒdwdxœe‘eªe’dyœdzd{„ƒZ©ebe*j«ƒd‰d|d}„ƒZ«dS )Šé    N)ÚOptional)Úcore_aten_decompositionsÚget_decompositionsÚremove_decompositions)Ú_grid_sampler_2dÚpw_cast_for_opmath)Úextra_random_decomps)Úcounters)Ú	out_dtype)Úpad_listlike)Úelementwise_dtypesÚELEMENTWISE_TYPE_PROMOTION_KINDÚtype_to_dtypeé   )ÚconfigÚinductor_prims)Úis_gpuÚ,needs_fallback_due_to_atomic_add_limitationsÚuse_scatter_fallbackc                 C   s8   t | ƒr| gn| D ]}|tkrt d| ¡ qt | t¡S )Nzduplicate decomp: %s)ÚcallableÚdecompositionsÚlogÚwarningÚdecompÚregister_decomposition)ÚopsÚop© r   úO/var/www/html/venv/lib/python3.8/site-packages/torch/_inductor/decomposition.pyr   j   s    r   c                 C   s   d S ©Nr   ©ÚtensorÚmsgr   r   r   Úassert_async_msg_decomps   s    r#   c                 C   s   d S r   r   r    r   r   r   Ú"functional_assert_async_msg_decompy   s    r$   )ÚminÚmaxc                C   s   d S r   r   )Úsymbolr%   r&   r   r   r   Úsym_constrain_range_for_size~   s    r(   c                 C   s(   |d k	r|   |¡} |d k	r$|  |¡} | S r   )Ú	clamp_minÚ	clamp_max)Úxr%   r&   r   r   r   Úclampƒ   s
    

r,   c                 K   s6   |  d¡}|d kr2tt|ƒƒ|d< tj| |f|ŽS tS )NÚdtype)Úgetr   ÚtypeÚtorchÚfullÚNotImplemented)ÚsizeÚ
fill_valueÚkwargsr-   r   r   r   r1      s
    
r1   c                    sJ   dgt ˆ ƒ }t|ƒD ]\}}|||< qtj‡ fdd„|D ƒf|Ž |¡S )Nr   c                    s   g | ]}ˆ | ‘qS r   r   )Ú.0Úl©r3   r   r   Ú
<listcomp>Ÿ   s     z"empty_permuted.<locals>.<listcomp>)ÚlenÚ	enumerater0   ÚemptyÚpermute)r3   Zphysical_layoutr5   ÚpermÚpr7   r   r8   r   Úempty_permutedš   s    
r@   c                 C   st   |
d rt | jjƒstS t | dgttd|  ¡ ƒƒ ¡}t 	| |||||||||	|
d |
d dg¡\}}}|||fS )Né   r   r   F)
r   Údevicer/   r2   ÚatenÚsumÚlistÚrangeÚdimÚconvolution_backward)Úgrad_outputÚinputÚweightZ
bias_sizesÚstrideÚpaddingÚdilationZ
transposedZoutput_paddingÚgroupsZoutput_maskZ	grad_biasZgrad_inpZgrad_weightÚ_r   r   r   rH   ¢   s"     õ
rH   c                 C   s   d| }t  | | ¡d|  S )Ng      $@g      ð?)rC   Úround)r+   ÚdecimalsZten_pow_decimalsr   r   r   Ú	round_decÃ   s    rS   c                 C   s¨   t jrB| jd dks"|jd dkrB|  d¡| d¡ jdd}|S | jjdkr¤|  d¡dkr¤| d¡dkr¤td d  d7  < t	j|  
d¡| 
d¡ ddd	 d¡S tS )
Nr   rA   éÿÿÿÿ©rG   ÚcpuÚinductorZdecompose_bmmT©rG   Úkeepdim)r   Úcoordinate_descent_tuningÚshapeÚ	unsqueezerD   rB   r/   r3   r	   r0   Úsqueezer2   )ÚselfZbatch2Úoutr   r   r   ÚbmmÉ   s      ÿþr`   c                 C   sØ   | j jdkrÔ| d¡dkrr| d¡dkrrtd d  d7  < tj| d¡| d¡ ddd d¡}|| ||   S | d¡dkrÔ| d¡d	krÔ| d¡d	krÔtd d  d7  < |j| jddd}|| ||   S t	S )
NrV   r   r   rT   rW   Zdecompose_addmmTrX   é   )
rB   r/   r3   r	   r0   rD   r]   r\   ÚTr2   )r^   Zmat1Zmat2ÚbetaÚalphar_   r   r   r   ÚaddmmÙ   s       ÿþ*re   c                    sX  ddl m}m} tjrNˆjd dks2ˆ jd dkrNˆ d¡ˆ  d¡ jddS ˆjj	dkrT|ˆ 
d¡dkƒrò|ˆ 
d¡dkƒrò|ˆ  
d¡dkƒròˆjˆ jkrò|t ˆ¡t ˆ ¡ dkƒròtd	 d
  d7  < t ‡ ‡fdd„tˆ 
d¡ƒD ƒ¡S |ˆ 
d¡dkƒrT|ˆ  
d¡dkƒrTtd	 d
  d7  < tjˆ d¡ˆ  d¡ ddd d¡S tS )Nr   )Údefinitely_trueÚguard_size_obliviousr   rA   rU   rV   rT   é    rW   Zdecompose_mmc                    s    g | ]}ˆ|d d …f ˆ  ‘qS r   r   )r6   Úi©Úinput2r^   r   r   r9      s     zmm.<locals>.<listcomp>TrX   )Ú%torch.fx.experimental.symbolic_shapesrf   rg   r   rZ   r[   r\   rD   rB   r/   r3   r-   r0   Únumelr	   ÚcatrF   r]   r2   )r^   rk   rf   rg   r   rj   r   Úmmê   s:    ÿþý
üû$ÿ  ÿþro   c                    sp   ddl m‰  ‡ fdd„}tt|| ƒƒ}t|ƒdkr>|d  ¡ S dt|ƒ  k rZt| ƒk rln ntj ||¡S t	S )Nr   ©rg   c                    s    t | jƒdkpˆ | jd dkƒS )Nr   r   )r:   r[   )r+   rp   r   r   Únon_empty_tensor  s    zcat.<locals>.non_empty_tensorr   )
rl   rg   rE   Úfilterr:   ÚclonerC   rn   Údefaultr2   )ZtensorsrG   rq   Zfiltered_tensorsr   rp   r   rn     s     rn   c                 C   s~   |   ¡ r.t t | j¡tdƒt | j| j¡¡S t| t	j
d\}}tjtj|| jd}t | dk |d¡}t t | ¡tdƒ|¡S )NÚnan)Ztype_promotion_kind©r-   rB   r   g        )Ú
is_complexr0   ÚwhereÚisnanÚrealÚfloatÚatan2Úimagr   r   ZINT_TO_FLOATZscalar_tensorÚmathÚpirB   )r+   rP   r-   r   Úretr   r   r   Úangle1  s    
  ÿþ
r   ©rd   c                C   sv   t  | ¡o|  ¡ }t  |¡o"| ¡ }|r,|s0tS |}|d k	rD|| }t  | j|j¡}|  | jj¡| |jj¡  |¡S r   )r0   Z	is_tensorrw   r2   Zpromote_typesr-   Úviewrz   )r+   Úyrd   Zx_is_complex_tensorZy_is_complex_tensorÚzZcomplex_typer   r   r   ÚaddE  s    r†   c                 C   s   |   ¡ rtdƒ‚| S )NzTODO: implement this)rw   ÚAssertionError©r^   r   r   r   Úconj_physicalR  s    r‰   c                 C   s   | S r   r   rˆ   r   r   r   ÚliftX  s    rŠ   )Ú	generatorc                C   s(   |d kst ‚tj| tjd| k  | j¡S )N)r-   )r‡   r0   Ú	rand_likeÚfloat32Útor-   )r^   r‹   r   r   r   Ú	bernoulli]  s    r   c                 C   s   t  t  |¡|| kB | |¡S r   ©r0   rx   ry   ©r^   Úotherr   r   r   Úfminc  s    r“   c                 C   s   t  t  |¡|| k B | |¡S r   r   r‘   r   r   r   Úfmaxh  s    r”   Fc                 C   s    | j tjkrtj| ||dS tS ©NrX   )r-   r0   ÚboolÚanyr2   ©r^   rG   rY   r   r   r   Úamaxm  s    r™   c                 C   s    | j tjkrtj| ||dS tS r•   )r-   r0   r–   Úallr2   r˜   r   r   r   Úamint  s    r›   c                 C   s   t  | |||¡ ¡ S r   )r0   Znarrowrs   )r^   rG   ÚstartÚlengthr   r   r   Únarrow_copy{  s    rž   ©Úimplicitc                C   s   t j| ||d ¡ S )NrŸ   )rC   Úexpandrs   )r^   r3   r    r   r   r   Úexpand_copy€  s    r¢   c                 C   s   t  | |¡ ¡ S r   )rC   rƒ   rs   )r^   r3   r   r   r   Úview_copy_default…  s    r£   c                 C   s   |   |¡ ¡ S r   )rŽ   rs   )r^   r-   r   r   r   Úview_copy_dtypeŠ  s    r¤   )r!   Úmemory_formatÚreturnc                 C   s$   |t jks|d krt | ¡S |S d S r   )r0   Úpreserve_formatÚutilsZsuggest_memory_format)r!   r¥   r   r   r   Úget_like_layout  s    
r©   )r-   rB   r¥   c                K   s8   t j|  ¡ •f|p| j|p| jdœ|—Žjt| |ƒdS ©Nrv   ©r¥   )r0   Zrandr3   r-   rB   rŽ   r©   ©r^   r-   rB   r¥   r5   r   r   r   rŒ   ™  s    ÿýüûrŒ   c                K   s8   t j|  ¡ •f|p| j|p| jdœ|—Žjt| |ƒdS rª   )r0   Zrandnr3   r-   rB   rŽ   r©   r¬   r   r   r   Ú
randn_like£  s    ÿýüûr­   )r-   ÚlayoutrB   Ú
pin_memoryÚrequires_gradr¥   c                C   s<   t j|  ¡ •||p| j|p| j|p&| j|djt| |ƒdS )N)r-   r®   rB   r°   r«   )r0   r1   r3   r-   r®   rB   rŽ   r©   )r^   r4   r-   r®   rB   r¯   r°   r¥   r   r   r   Ú	full_like­  s    úùr±   c                K   s>   t jjd||  ¡ •f|p| j|p$| jdœ|—Žjt| |ƒdS )Nr   rv   r«   ©rC   ÚrandintÚlowr3   r-   rB   rŽ   r©   )r^   Úhighr-   rB   r¥   r5   r   r   r   Úrandint_likeÃ  s    ýûúùr¶   c                K   s>   t jj|||  ¡ •f|p| j|p$| jdœ|—Žjt| |ƒdS rª   r²   )r^   r´   rµ   r-   rB   r¥   r5   r   r   r   Úrandint_like_lowÏ  s    ýûúùr·   c                 K   s   t jjd| |f|ŽS ©Nr   )rC   r³   r´   )rµ   r3   r5   r   r   r   r³   Ý  s    r³   )rJ   rK   Úbiasr¦   c                 C   s*   t jj |¡}t jj | ||| ¡ d ¡S r¸   )r0   r   Z
_quantizedZ$wrapped_fbgemm_pack_gemm_matrix_fp16Z!wrapped_fbgemm_linear_fp16_weightr3   )rJ   rK   r¹   Zpacked_weightr   r   r   Ú#linear_dynamic_fp16_unpacked_weightâ  s       
ÿrº   c                 C   sP   dd„ }|| ddd…f ƒ}|| ddd …f ƒ}| dd d…f   tj¡| | S )Nc                    sx   ‡ fdd„dD ƒ\}}}}t jdkrL||d>  |d>  |d>   tj¡d S |d> |d>  |d>  |  tj¡d S d S )	Nc                 3   s"   | ]}ˆ d |f   tj¡V  qdS ©.N)rŽ   r0   Zint32)r6   Ún©Úu8r   r   Ú	<genexpr>ï  s     zPq_embedding_bag_byte_unpack_decomp.<locals>.bitcast_u8_to_f32.<locals>.<genexpr>)r   r   rA   é   Úlittleé   ra   é   r»   )ÚsysÚ	byteorderrƒ   r0   r   )r¾   r+   r„   r…   Úwr   r½   r   Úbitcast_u8_to_f32î  s    
(z=q_embedding_bag_byte_unpack_decomp.<locals>.bitcast_u8_to_f32.iøÿÿÿéüÿÿÿ)rŽ   r0   r   )ÚpackedrÇ   ÚscalesÚoffsetsr   r   r   Ú"q_embedding_bag_byte_unpack_decompì  s    rÌ   )ÚaÚgridÚinterpolation_modeÚpadding_modeÚalign_cornersr¦   c                 C   s@   | j t  d¡ko$|dko$| jtjd }t| |||||d}|S )NrV   r   r«   )rÎ   rÏ   rÐ   rÑ   Ú_expand_grid)rB   r0   Zis_contiguousZcontiguous_formatÚdecomp_grid_sampler_2d)rÍ   rÎ   rÏ   rÐ   rÑ   rÒ   Úoutputr   r   r   Úgrid_sampler_2dú  s    ÿýúrÕ   c                 C   s   t jj| t j ||¡|dS ©Nr‚   )rC   Ú_foreach_addÚListÚ_foreach_mul©r^   Zleft_tensorsZright_tensorsZscalarr   r   r   Ú_foreach_addcmul_scalar  s
      ÿrÛ   c                 C   s   t jj| t j ||¡|dS rÖ   )rC   r×   rØ   Z_foreach_divrÚ   r   r   r   Ú_foreach_addcdiv_scalar   s
      ÿrÜ   c              	   C   s"   t j | t j t j || ¡|¡¡S r   )rC   r×   rØ   rÙ   ÚScalarZ_foreach_sub)Zstart_tensorsZend_tensorsrK   r   r   r   Ú_foreach_lerp_scalar'  s     ÿþrÞ   )rJ   rK   r¹   Úrunning_meanÚrunning_varÚtrainingÚexponential_average_factorÚepsilonc              
   C   sB   t  | |||||||¡\}}	}
|r,||	|
fS || d¡| d¡fS )N)r   )rC   Únative_batch_normZ	new_zeros)rJ   rK   r¹   rß   rà   rá   râ   rã   rÍ   ÚbÚcr   r   r   Úmiopen_batch_norm1  s     ø

ýrç   c                   C   s   t t–S r   )r   r   r   r   r   r   Úfast_random_decompsQ  s    rè   c                   C   s   t jr
tS tƒ S )z"decomps can change based on config)r   Zfallback_randomr   rè   r   r   r   r   Úselect_decomp_tableV  s    ré   c                 C   sF   t | jjƒrBt | |g¡\} }| d¡ d¡d }t | |||¡S t	S )NrT   r   r   )
r   rB   r/   rC   Zbroadcast_tensorsÚreshapeZcumsumr   Zmasked_scatter_with_indexr2   )r^   ÚmaskÚsourceZ
source_idxr   r   r   Úmasked_scatter]  s
    rí   )rJ   Ú	quant_minÚ	quant_maxÚepsr-   c           	      C   sv   t  | ¡\}}|| t|| ƒ }t  |t  |g¡¡}|t  || ¡ t j¡ }t  |||¡}| t j	¡| t j
¡fS r   )r0   Zaminmaxr{   r&   ÚTensorrQ   rŽ   Úintr,   Zfloat64Úint64)	rJ   rî   rï   rð   r-   Zmin_valZmax_valÚscaleZ
zero_pointr   r   r   Úchoose_qparams_tensorh  s    rõ   c                 C   s.   |   ¡ }t ||g| |j¡|¡}| | j¡S r   )Úflattenr0   Z	index_putrê   r[   )r^   Úindexrì   Ú
accumulateZ	flattenedr   r   r   Úputt  s      
 ÿrù   c                 C   s   t j| |||d}|  |¡S )N)rø   )rC   rù   Zcopy_)r^   r÷   rì   rø   r_   r   r   r   Úput_}  s    rú   c                 C   sD   | | }t j||dd}t | ||¡}| j|kr<| |¡}| ¡ S )NTrX   )r0   rD   r   Úfmar-   rŽ   Ú
contiguous)rI   rÔ   rG   Zinput_dtypeZnew_grad_outputZsum_new_gradZ
grad_inputr   r   r   Ú_softmax_backward_dataƒ  s    

rý   T©Úinclude_self)rG   Úreduction_typerÿ   c                C   sf  |dkržt | jƒsž| jjp | jj}t |¡}|rJ| }t | ¡ |||¡}	n2|  ||d¡}t | ¡ |||¡}	|	 	|	dk d¡}	| |||¡}|r–||	 S ||	 S t
tjj|| j|j|jjdƒrÀtS | j|d d …  ¡ | jd |…  ¡  }
| ¡ f| j|d d … | jd |… ˜}t| j| | jƒdtd| j| ƒ˜}| tj¡ |
¡ |¡ |¡}| j|||||dS )NZmeanr   r   Trþ   )r   )r   r-   Zis_floating_pointrw   r0   Z	ones_likeZ	index_addZ
index_fillZ
zeros_likeZmasked_fillr   rC   Zscatter_reduce_ÚtworB   r/   r2   r[   rm   rF   ÚndimrŽ   ró   Zrepeat_interleaverê   r=   Zscatter_reduce)r^   rG   r÷   Úsrcr   rÿ   Ztrue_divisionZonesr_   ÚcountsZrepeatsZindex_shaper>   Zscatter_indexr   r   r   Úindex_reduce”  sP    
ÿ
ú(($ÿþýÿûr  c           
      C   sÆ   |dkrddg}|dkr ddg}|d kr,|}t |dƒ}t |dƒ}t |dƒ}t |dƒ}|d |d  }tjj ||¡s†|t tj¡jkrŠtS t	 
| |||||¡\}}t	 ||d |  d¡||¡}	||	fS )Nr   r   rA   rT   )r   r0   Z	_inductorZloweringZ'should_fallback_max_pool2d_with_indicesZiinfoZint8r&   r2   ÚprimsZ#_low_memory_max_pool2d_with_offsetsZ)_low_memory_max_pool2d_offsets_to_indicesr3   )
r+   Zkernel_sizerL   rM   rN   Z	ceil_modeZwindow_sizeÚvalsrË   Úindicesr   r   r   Úmax_pool2d_with_indicesÃ  sF    



 ÿÿüúûr	  )NN)r   )r   r   )r   )NF)NF)r   r   F)r   )r   )F)F)Nr   r   F)¬Ú	functoolsÚloggingr~   rÄ   Útypingr   r0   Ztorch._decompÚ_decompr   Ztorch._prims_commonZ_prims_commonr¨   Z$torch.ao.quantization.fx._decomposedr   r   r   Ztorch._decomp.decompositionsr   rÓ   r   Z$torch._decomp.decompositions_for_rngr   Ztorch._dynamo.utilsr	   Z!torch._higher_order_ops.out_dtyper
   Ztorch._inductor.utilsr   r   r   r   Ú r   r   r   r   r   Ú	getLoggerÚ__name__r   r   rC   r  Z	quantizedZquantized_decomposedZ_adaptive_avg_pool2d_backwardZarangeZbitwise_and_Zbitwise_or_Z
clamp_min_ÚdistZ
empty_likeÚflipZgeluZhardtanhZindex_selectZlcmZ
leaky_reluZlinalg_vector_normZ_log_softmaxZ max_pool2d_with_indices_backwardZ_native_batch_norm_legitZ#_native_batch_norm_legit_functionalZ$_native_batch_norm_legit_no_trainingZ_batch_norm_with_updateZ"_batch_norm_with_update_functionalZ_batch_norm_no_updateZbatch_norm_backwardrä   Znative_group_normZnative_layer_normZnll_loss2d_backwardZ_softmaxZsin_Zsqrt_Z_to_copyZtril_indicesZtriu_indicesZupsample_bilinear2dZvecrº   Zinductor_decompositionsr   Z_unsafe_indexZ+_scaled_dot_product_flash_attention_for_cpurt   rý   r*   r)   ZgluZselect_scatterÚsplitrñ   r]   rD   ZunbindZdecomps_to_excluder   Z_assert_asyncr"   r#   Z_functional_assert_asyncr$   r(   r,   r1   r@   rH   rQ   rR   rS   r`   re   ro   rn   r   r†   r‰   rŠ   Zdetach_r   r“   r”   r™   r›   rž   r¢   Z	view_copyr£   r-   r¤   r¥   r©   rŒ   r­   r±   r§   r¶   Z	low_dtyper·   r³   Zembedding_bag_byte_unpackrÌ   rÕ   rò   r–   Z_foreach_addcmulrÝ   rÛ   Z_foreach_addcdivrÜ   Z_foreach_lerprÞ   rç   Zpy_implZ_CZDispatchKeyZAutogradr{   Ú	lru_cacherè   ré   rí   Zchoose_qparamsr!   rõ   rù   rú   r  Ústrr	  r   r   r   r   Ú<module>   sÄ  
Üÿ(
õ
	







 


#!












 þ
		÷

  ÿ


  þ	

   ûú



	


ø



    ÿ
ÿ  ÿ.       ÿ