U
    zhg;                     @   s   d dl mZmZ d dlZd dlm  mZ d dlm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZmZmZmZmZmZ dd	lmZmZ dd
lmZmZmZ ddlmZ dd ZdS )    )ListOptionalN)mm_args   )ir)CppPackedGemmTemplate)	TensorBox)addadd_needs_realized_inputsatenpermuteregister_loweringto_dtype)autotune_select_algorithmExternKernelChoice)use_aten_gemm_kernelsuse_cpp_packed_gemm_templateuse_max_autotune)Vc                     sr  t jjrnt jjjt jjjt jjjt jjjt	j
jt jjjg} tt jjjtttddd}tt jjjjttttddd}tt jjjjttttddd}tt jjjtttd	d
d}tt jjjjttttddd}tt jjjtttddd}tt	j
jtttttttttt tttttttddd}tt jjjd dtttttddd}tt jjjjd dttttttddd}	tt jjjd dtttttddd}
tt jjjjd dtt jjjjd dttttttddd}t jjrdtt jjjd d!tjjd" | t jjj tt jjjd d#ttttt d$ fd%d&}t|  n d S )'N)xweightbiasc
           
      S   s$   t tj| |||||||||	
S N)r   creater   ZConvolutionUnary)
r   r   r   paddingstridedilationgroupsattrscalars	algorithm r!   R/var/www/html/venv/lib/python3.8/site-packages/torch/_inductor/mkldnn_lowerings.pyconvolution_unary"   s    z5register_onednn_fusion_ops.<locals>.convolution_unary)r   otherr   r   c                 S   s*   t tj| |||||||||	|
||S r   )r   r   r   ZConvolutionBinaryr   r$   r   r   r   r   r   r   binary_attrZbinary_alpha
unary_attrunary_scalarsZunary_algorithmr!   r!   r"   convolution_binary>   s"    z6register_onednn_fusion_ops.<locals>.convolution_binaryc                 S   s*   t tj| |||||||||	|
||S r   )r   r   r   ZConvolutionBinaryInplacer%   r!   r!   r"   convolution_binary_inplace`   s"    z>register_onednn_fusion_ops.<locals>.convolution_binary_inplace)r   wbc              
   S   s   t tj| |||||S r   )r   r   r   ZLinearUnary)r   r+   r,   r   r   r    r!   r!   r"   linear_unary   s    z0register_onednn_fusion_ops.<locals>.linear_unary)r   yr+   r,   c              	   S   s   t tj| ||||S r   )r   r   r   ZLinearBinary)r   r.   r+   r,   r   r!   r!   r"   linear_binary   s    z1register_onednn_fusion_ops.<locals>.linear_binaryc                 S   s&   t tj| |||||||||	|
S r   )r   r   r   ZConvolutionTransposeUnary)r   r   r   r   Zoutput_paddingr   r   r   r   r   r    r!   r!   r"   convolution_transpose_unary   s    z?register_onednn_fusion_ops.<locals>.convolution_transpose_unaryr   Zw0Zw1Zw2Zw3hxZcxreverseZbatch_sizesmodeZhidden_sizeZ
num_layersZ
has_biasesbidirectionalZbatch_firsttrainc                 S   s4   t tjtj| |||||||||	|
|||||S r   )pytreeZtree_mapr   r   r   ZMkldnnRnnLayerr1   r!   r!   r"   mkldnn_rnn_layer   s*    z4register_onednn_fusion_ops.<locals>.mkldnn_rnn_layer)Ztype_promotion_kind)r   packed_weightw_scalew_zpr   c                 S   s2   t tj| |||||||||	|
||||||S r   )r   r   r   ZQConvPointWisePT2E)r   x_scalex_zpr9   r:   r;   r   r   r   r   r   o_inv_scaleo_zero_pointoutput_dtyper   r   r    r!   r!   r"   qconvolution_unary   s*    z6register_onednn_fusion_ops.<locals>.qconvolution_unary)r   accumr9   r:   r;   r   c                 S   s~   |dkrB|t jt jfkrB| t jt jfkrB| |krBt||}ttj| |||||||||	|
|||||||||||S )Nsum)	torchfloat32bfloat16	get_dtyper   r   r   r   ZQConvPointWiseBinaryPT2E)r   r<   r=   rB   Zaccum_scaleZaccum_zpr9   r:   r;   r   r   r   r   r   r>   r?   r@   r&   alphar'   r(   unary_algorithmmr!   r!   r"   qconvolution_binary   sF    


z7register_onednn_fusion_ops.<locals>.qconvolution_binaryc                 S   s*   t tj| |||||||||	|
||S r   )r   r   r   ZQLinearPointwisePT2E)r   r<   r=   r9   r:   r;   r   r>   r?   r@   r   r   r    r!   r!   r"   qlinear_unary@  s"    z1register_onednn_fusion_ops.<locals>.qlinear_unary)r   r9   r:   r;   r   x2c                 S   s   |dkrX|	t jt jfkrD|
 t jt jfkrD|
 |	krXt|
|	}
n|
 |	ksXtdttj	| |||||||||	|
|||||||S )NrC   zCdtype of accum for qlinear post op sum should be the same as output)
rD   rE   rF   rG   r   AssertionErrorr   r   r   ZQLinearPointwiseBinaryPT2E)r   r<   r=   r9   r:   r;   r   r>   r?   r@   rL   Zx2_scaleZx2_zpr&   rH   r'   r(   rI   r!   r!   r"   qlinear_binaryb  sF    
z2register_onednn_fusion_ops.<locals>.qlinear_binaryzmkl::_mkl_linearF)Zhas_out_variantZkernel_creatorlayout)r   packed_worig_wr,   c                   s   t  r  j| ||f|d |dgng }t rzt|ddg}t| ||d^ }}} }t|| |rztj||| ||gdddgd | t	j
jkst| t	j
jkstdd	 d
d	 d}	td|| ||g||	d}
|d k	rt|
|}
|
S )N)B
batch_sizer   r   rO   T   )Ztrans_wZinput_indicesc                 S   s   t jj|   S r   r   graph	constantsget_namer   r!   r!   r"   <lambda>      zGregister_onednn_fusion_ops.<locals>.mkl_packed_linear.<locals>.<lambda>c                 S   s   t jj|   S r   rV   rZ   r!   r!   r"   r[     r\   )r   rU   Zpacked_linear)input_gen_fns)r   bindr   r   r   r   r   Zadd_choicesrY   r   rW   rX   rM   r   r	   )r   rQ   rR   r,   rT   rP   choicesZtransposed_w_r]   resultZaten_mkl_linearr!   r"   mkl_packed_linear  sR       	  
z5register_onednn_fusion_ops.<locals>.mkl_packed_linear) rD   Z_CZ_has_mkldnnZopsZmkldnnZ_convolution_pointwiseZ_convolution_pointwise_Z _convolution_transpose_pointwiseZ_linear_pointwiser   r8   defaultZonednnZqconv2d_pointwiser   r   binaryboolr   intZqlinear_pointwiseZbinary_tensorZhas_mklr   ZmklZ_mkl_linearr   ZMKLPackedLinearr   appendr   r
   )Zcpu_needs_realized_inputsr#   r)   r*   r-   r/   r0   r8   rA   rJ   rK   rN   rc   r!   rb   r"   register_onednn_fusion_ops   s    
	!!  
()
 >!
 
 :
3
ri   )typingr   r   rD   Ztorch.utils._pytreeutilsZ_pytreer7   Z torch._inductor.kernel.mm_commonr    r   Zcodegen.cpp_gemm_templater   r   Zloweringr	   r
   r   r   r   r   Zselect_algorithmr   r   r   r   r   Zvirtualizedr   ri   r!   r!   r!   r"   <module>   s    