U
    yhf                     @   s  d dl Z d dlmZ ddlmZ ddlmZmZmZm	Z	 d dl
mZ d dlmZ d dlmZ d d	lmZmZmZmZ d d
lmZ d dlmZ dgZe jddde jde jde jde jde jde j dge jde j dge jde jddde jde jde jde jde jde j dge jde j dge jde jde jde jde jde jde jde j dge jde j dge jdfZ!dd Z"dd Z#e jde jddde $e j%j&e jddde jde jde jde jde jde j dge jde j dge jde jde jdf
Z'dd Z(dd Z)e jddde jde jde jde jde jde j dge jde j dge jde jddde jde jde jde jde jde j dge jde j dge jde jde jde jde jde jde jde j dge jde j dge jdfZ*dd Z+dd  Z,e jddde jde jde jde jde jde jddde jde jde jde jde jde jde jde jde jde j dge jde j dge jdf
Z-d!d" Z.d#d$ Z/d%d& Z0d'd( Z1e jddde jde jde jde jde jde j dge jde j dge jde jde jde jde jde j dge jde j dge jdf	Z2d)d* Z3d+d, Z4e jdd-d-d-e jde jde jde jde jde j dge jde j dge jdfZ5d.d/ Z6d0d1 Z7e jddde jde jde jde jde jde j dge jde j dge jdfZ8d2d3 Z9d4d5 Z:e jdd-d-d-e jde jd-e jde jd-e jddddfZ;d6d7 Z<d8d9 Z=e jddde jde jd-e jde jd-e jddddfZ>d:d; Z?d<d= Z@e jAjd>d?d@ZBeG dAdB dBZCeCe'ee(ee)ee	dddde $e j%j&d-idCee	dddde $e j%j&d-idCeCe!ee"ee#eeeCe*ee+ee,eedDgdEeedDgdEeCe-ee.ee/eCe-ee0ee1eCe2ee3ee4eeeCe5ee6ee7eCe8ee9ee:eCe;ee<ee=eBeBeCe>ee?ee@eBeBg
ZDeedFdGdZEdS )H    N)GraphModule   )_WrapperModule)"_get_aten_graph_module_for_pattern"remove_tensor_overload_for_qdq_ops'_replace_literals_with_new_placeholders,_replace_literals_with_existing_placeholders)quantized_decomposed_lib)replace_pattern)	out_dtype)OptionalCallableTupleAny)	dataclass)partial reference_representation_rewrite   )r      Zdtype   )r   r   ic                 C   sf   t jj| ||||t j}t jj|||||	t j}t jjj|||
}t jj|||||t j}|S N)	torchopsquantized_decomposeddequantize_per_tensorint8atenlineardefaultquantize_per_tensor)x_i8x_scalex_zero_pointx_quant_minx_quant_max	weight_i8weight_scaleweight_zero_pointweight_quant_minweight_quant_max	bias_fp32	out_scaleout_zero_pointout_quant_minout_quant_maxx_fp32weight_fp32out_fp32out_i8 r5   c/var/www/html/venv/lib/python3.8/site-packages/torch/ao/quantization/pt2e/representation/rewrite.py_qdq_quantized_linear*   s4                   r7   c                 C   s   t jj| ||} t jj|||	}| t j}|t j}tt jjjjt j	|| || d }|| }tt jjj
jt j	|
|}|| }tt jjjjt j	||| | | }t jj|||t j}|S r   )r   r   r   clamptoint16r   r   r    int32divTensormulr   )r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   x_i16
weight_i16acc_i32
bias_scalebias_i32r4   r5   r5   r6   _reference_quantized_linear9   s"    	
$rD   c
                 C   s   t jj| |||t j\}
}t jj| |
|||t j}t jj||
|||t j} t jj|||||t j}t jjj	| ||	}|S r   )
r   r   r   choose_qparamsr   r!   r   r   r   r    )r1   r%   r&   x_epsr'   r(   r)   r*   r+   r,   r#   r$   r"   r2   r3   r5   r5   r6   _qdq_dynamic_quantized_linearh   s6                   rG   c
                 C   s   t jj| |||t j\}
}| |
 } t | } | jt jd}|| }t |||}|jt jd}t jj	|||}|t j
}|t j
}tt jj	jjt j|| || d }|
| }tt jj	jjt j|	|}|| }||
|  }|S Nr   )r   r   r   rE   r   roundr9   r;   r8   r   r:   r   r   r    r<   r=   )r1   r%   r&   rF   r'   r(   r)   r*   r+   r,   r#   r$   x_i32r"   r?   r@   rA   rB   rC   r3   r5   r5   r6   #_reference_dynamic_quantized_linearw   s,    

rK   )r      rL   rL   c                 C   s   ddg}ddg}ddg}d}ddg}d}t jj| ||||t j}t jj|||||	t j}t jjj|||
||||||	}t jj|||||t j}|S Nr   r   F)	r   r   r   r   r   r   convolutionr    r!   )r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   stridepaddingdilation
transposedoutput_paddinggroupsr1   r2   r3   r4   r5   r5   r6   _qdq_quantized_conv2d   sT              
             rU   c                 C   s  ddg}ddg}ddg}d}ddg}d}t jj| ||} t jj|||	}| t j}|t j}tt jjjjt j	|| || d ||||||}|| }tt jjj
jt j	|
|}|d}|d}|| }tt jjjjt j	||| | | }t jj|||t j}|S )Nr   r   F)r   r   r   r8   r9   r:   r   rN   r    r;   r<   r=   Z	unsqueezer>   r   )r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   rO   rP   rQ   rR   rS   rT   r?   r@   rA   rB   rC   r4   r5   r5   r6   _reference_quantized_conv2d   sL    
      


   
rW   c
                 C   sh   t jj| ||||	t j}
t jj|||||	t j}|
| }t jj|}t jj|||||	t j}|S r   )r   r   r   r   r   r   Zrelur!   r"   r#   r$   y_i8y_scaley_zero_pointr-   r.   	quant_min	quant_maxr1   Zy_fp32r3   r4   r5   r5   r6   _qdq_quantized_add_relu
  s         r^   c
                 C   s   |  tj}
| tj}ttjjjjtj|
| || }
ttjjjjtj|| || }|
| | }tjj|||	 tj	}|S )z
    See comments for `_reference_quantized_add` for more information on
    how to derive the formula for out_i8 based on x_i8 and y_i8
    )
r9   r   r;   r   r   r   r>   r=   r8   r   r"   r#   r$   rY   rZ   r[   r-   r.   r\   r]   rJ   Zy_i32out_i32r4   r5   r5   r6   _reference_quantized_add_relu  s      ra   c
                 C   sZ   t jj| ||||	t j}
t jj|||||	t j}|
| }t jj|||||	t j}|S r   )r   r   r   r   r   r!   rX   r5   r5   r6   _qdq_quantized_add)  s         rb   c
                 C   s   |  tj}
| tj}t|| |
|   tj}
t|| ||   tj}|
| | }d}d}	tjj|||	 tj}|S )a  
    # How to Derive the formula for out_i8 based on x_i8 and y_i8
    # (since quantized add takes x_i8, y_i8 and their quantization parameters, and produce an out_i8)

    # out_i8 is quantized output, we can write down the formula for it first:
out_i8 = out_f32 / out_scale + out_zero_point           (1)

    # then out_fp32 is computed from x_f32 + y_f32, and the x_fp32 and y_fp32 are the dequantized x_i8 and y_i8
    out_f32 = x_f32 + y_f32           (2)
    x_fp32 = (x_i8 - x_zero_point) * x_scale         (3)
    y_fp32 = (y_i8 - y_zero_point) * y_scale         (4)

    # applying the above fomula to the out_i8 equation we can get the following:
    out_i8 = out_fp32 / out_scale + out_zero_point             # (1)
       = (x_f32 + y_f32) / out_scale + out_zero_point      # applying (2) to substitute out_fp32 with x_fp32 + y_fp32
       = ((x_i8 - x_zero_point) * x_scale + (y_i8 - y_zero_point) * y_scale) / out_scale + out_zero_point  # apply (3) and (4)
    r   r   )r9   r   r;   rI   r   r   r8   r   r_   r5   r5   r6   _reference_quantized_add2  s    rc   c	                 C   sj   d}	d}
d}d}d}t jj| ||||t j}t jjj||	|
|||\}}t jj|||||t j}|S rM   )	r   r   r   r   r   r   max_pool2d_with_indicesr    r!   )r"   r#   r$   r%   r&   r-   r.   r/   r0   kernel_sizerO   rP   rQ   	ceil_moder1   r3   _r4   r5   r5   r6   _qdq_quantized_max_pool2d^  s          rh   c	                 C   s~   d}	d}
d}d}d}t | ||} | t j}t jjj|| |	|
|||\}}|||  | }t |||}|t j}|S rM   )	r   r8   r9   r;   r   r   rd   r    r   )r"   r#   r$   r%   r&   r-   r.   r/   r0   re   rO   rP   rQ   rf   rJ   r`   rg   r3   r4   r5   r5   r6   _reference_quantized_max_pool2dk  s&    
ri   rL   c                 C   s   t jj| ||||t j}|S r   )r   r   r   r!   r   r1   scale
zero_pointr\   r]   xr5   r5   r6   _quantize_per_tensor_int8  s    rn   c                 C   sH   | | }t |}|jt jd}|| }t |||}|jt jd}|S rH   )r   rI   r9   r;   r8   r   rj   r5   r5   r6   #_reference_quantize_per_tensor_int8  s    
ro   c                 C   s   t jj| ||||t j}|S r   )r   r   r   r   r   )r"   rk   rl   r\   r]   r1   r5   r5   r6   _dequantize_per_tensor_int8  s    rp   c                 C   s0   t jj| ||} | t j| | jt jdS rH   )r   r   r   r8   r9   float32)r"   rk   rl   r\   r]   r5   r5   r6   %_reference_dequantize_per_tensor_int8  s    rr   c              	   C   s    t jj| |||||t j}|S r   )r   r   r   Zquantize_per_channelr   )r1   scaleszero_pointsch_axisr\   r]   r4   r5   r5   r6   _quantize_per_channel_int8  s          rv   c                 C   sP   t | |d} t jjt | | t j| ||}t ||d}|t jS NrV   )	r   	transposer   r   r8   rI   r9   r;   r   )r1   rs   rt   ru   r\   r]   r`   r5   r5   r6   $_reference_quantize_per_channel_int8  s    (ry   c              	   C   s    t jj| |||||t j}|S r   )r   r   r   Zdequantize_per_channelr   )r"   rs   rt   ru   r\   r]   r3   r5   r5   r6   _dequantize_per_channel_int8  s          rz   c                 C   sR   t jj| ||} t | |d} | t j}|| t j| }t ||d}|S rw   )r   r   r   r8   rx   r9   r;   float)r"   rs   rt   ru   r\   r]   rJ   r3   r5   r5   r6   &_reference_dequantize_per_channel_int8  s    r|   Zgmc                 C   s   t | dgdddddS )NrV   rL      r   )r   r   r   )exclude_literalsliteral_to_ph_idx)r   r}   r5   r5   r6   '_replace_ph_qdq_per_channel_replacement  s
    
r   c                   @   sf   e Zd ZU dZeedf ed< eed< eed< dZe	ee
ge
f  ed< dZe	ee
ge
f  ed< dS )	_RewriteInfozData needed for rewrite, this includes example inputs, pattern and replacement functions
    and post transformation functions for the exported pattern and replacement GraphModule
    .example_inputspatternreplacementNpattern_post_transreplacement_post_trans)__name__
__module____qualname____doc__r   r   __annotations__r   r   r   r   r   r5   r5   r5   r6   r     s   
r   )r   rV   )r   )modelreturnc                 C   s   t |  tD ]z}|j}|j}|j}|j}|j}t||}t | t||}t | |r^||}|rj||}|  |  t	| ||}q| S r   )
r   _REWRITE_INFO_LISTr   r   r   r   r   r   Z	recompiler
   )r   Zrewrite_infor   r   r   r   r   matchesr5   r5   r6   r   F  s&    

)Fr   Ztorch.fxr   Zexport_utilsr   utilsr   r   r   r   Z$torch.ao.quantization.fx._decomposedr	   Ztorch.fx.subgraph_rewriterr
   Z!torch._higher_order_ops.out_dtyper   typingr   r   r   r   dataclassesr   	functoolsr   __all__randintr   Zrandnr{   ZzerosintZtensorZ _QUANTIZED_LINEAR_EXAMPLE_INPUTSr7   rD   Zfinforq   epsZ(_DYNAMIC_QUANTIZED_LINEAR_EXAMPLE_INPUTSrG   rK   Z _QUANTIZED_CONV2d_EXAMPLE_INPUTSrU   rW   Z)_QUANTIZED_ADD_OR_ADD_RELU_EXAMPLE_INPUTSr^   ra   rb   rc   Z$_QUANTIZED_MAX_POOL2D_EXAMPLE_INPUTSrh   ri   Z(_QUANTIZE_PER_TENSOR_INT8_EXAMPLE_INPUTSrn   ro   Z*_DEQUANTIZE_PER_TENSOR_INT8_EXAMPLE_INPUTSrp   rr   Z)_QUANTIZE_PER_CHANNEL_INT8_EXAMPLE_INPUTSrv   ry   Z+_DEQUANTIZE_PER_CHANNEL_INT8_EXAMPLE_INPUTSrz   r|   Zfxr   r   r   r   r5   r5   r5   r6   <module>   s  "&:	!		      O