U
    yhD                     @   sX  d dl Z d dlmZmZ d dlZd dlmZ d dlmZm	Z	 d dl
mZmZ eddZejejejejgZejejgZdd	 eD Zed
d	 eD  dd Zed eeddejeeeeejejdddZeeddejeeeeejejdddZed eeddejejejeeejejdddZ eeddejejejeeejejdddZ!ed eeddejejejejejejejdddZ"eeddejejejejejejejddd Z#ed! eed"ddd#ejeeeeejeej ejd$d%d"Z$eed"ddd#ejejejeeejeej ejd$d&d'Z%ed( eed)ddd#ejejejeeejeej ejd$d*d+Z&eed)ddd#ejejejeeejeej ejd$d,d-Z'ed. eed/ddd#ejejejejejejeej ejd$d0d1Z(eed/ddd#eej ejd2d3d4Z)ed5 eed6dejeeeejeejejf d7d8d9Z*ed: eed;dejeeeejeejejf d7d<d=Z+eed6dejeeeejeejejf d>d?d@Z,eed;dejeeeejeejejf d>dAdBZ-dCdD Z.edE eedFdejejejeeeejejdGdHdFZ/eedFdejejejeeeejejdGdIdJZ0edK eedLddd#ejejeej eeeejeej ejdM	dNdLZ1eedLddd#ejejeej eeeejeej ejdM	dOdPZ2edQ eedRdejejeejejf dSdTdRZ3eedRdejejeejejf dSdUdVZ4edW eedXdYejejeejejf dSdZdXZ5ed[ eed\dejejeejejf dSd]d\Z6eed\dejejeejejf dSd^d_Z7d`da Z8edb eedcdejejejeeejdddedcZ9eedcdejejejeeejdddfdgZ:edh eedidej;fejejejeeejejdjdkdiZ<eedidej;fejejejeeejejdjdldmZ=edn eedoddejejejeeejdddqdoZ>eedoddejejejeeejdddrdsZ?edt eeduddpej;fejejeej eeejeejdvdwduZ@edx G dydz dzejAjBZCeed{d|ejejejeeeejd}d~d{ZDeed{dejejejeeeejd}ddZEdS )    N)OptionalTuple)_unsqueeze_multiple)determine_qparamsvalidate_qmin_qmax)implLibraryZquantized_decomposedZDEFc                 C   s&   i | ]}|t |jt |jfqS  )torchZiinfominmax.0kr	   r	   V/var/www/html/venv/lib/python3.8/site-packages/torch/ao/quantization/fx/_decomposed.py
<dictcomp>   s      r   c                 C   s.   i | ]&}|t t|jt t|jfqS r	   )intr
   finfor   r   r   r	   r	   r   r      s      c                 C   s^   |t krtd| t | \}}| |ks>td| d|  ||ksZtd| d| d S )NzUnsupported dtype: z9quant_min out of bound for dtype, quant_min_lower_bound: z quant_min: z9quant_max out of bound for dtype, quant_max_upper_bound: z quant_max: )_DTYPE_TO_QVALUE_BOUNDS
ValueErrorAssertionError)	quant_min	quant_maxdtypeZquant_min_lower_boundZquant_max_upper_boundr	   r	   r   _quant_min_max_bounds_check   s    

r   zxquantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> Tensorquantize_per_tensorZCompositeExplicitAutograd)inputscale
zero_pointr   r   r   returnc                 C   sp   | j tjtjfkr| tj} | j tjks:td| j  t||| d| }tt	| | | |||S )a   Affine quantization for the Tensor using the same quantization parameters to map
    from floating point to quantized values

    Args:
       input (torch.Tensor): original float32 or bfloat16 Tensor
       scale (float): quantization parameter for affine quantization
       zero_point (int): quantization parameter for affine quantization
       quant_min (int): minimum quantized value for output Tensor
       quant_max (int): maximum quantized value for output Tensor
       dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

    Returns:
       Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
       are not stored in the Tensor, we are storing them in function arguments instead
    <Expecting input to have dtype torch.float32, but got dtype:       ?)
r   r
   float16bfloat16tofloat32r   r   clampround)r   r   r   r   r   r   Z	inv_scaler	   r	   r   r   &   s    ZMetac                 C   sH   | j tjtjfkr| tj} | j tjks:td| j  tj| |dS )Nr    r   )r   r
   r"   r#   r$   r%   r   
empty_liker   r   r   r   r   r   r	   r	   r   quantize_per_tensor_metaF   s    	r+   zquantize_per_tensor.tensor(Tensor input, Tensor scale, Tensor zero_point, int quant_min, int quant_max, ScalarType dtype) -> Tensorzquantize_per_tensor.tensorc                 C   sV   |  dkstd|   |  dks<td|   t| | | |||S z Affine quantization for the Tensor using the same quantization parameters to map
    from floating point to quantized values
    Same as `quantize_per_tensor` but scale and zero_point are Scalar Tensor instead of
    scalar values
       >Expecting zero_point tensor to be one element, but received : 9Expecting scale tensor to be one element, but received : numelr   r   itemr*   r	   r	   r   quantize_per_tensor_tensorX   s    r3   c                 C   s   | j tjtjfkr| tj} | dks<td|  | dksZtd|  | j tjksvtd| j  tj| |dS )Nr-   r.   r/   r    r(   )	r   r
   r"   r#   r$   r%   r1   r   r)   r*   r	   r	   r   quantize_per_tensor_tensor_metaj   s    	r4   zquantize_per_tensor.tensor2(Tensor input, Tensor scale, Tensor zero_point, Tensor quant_min, Tensor quant_max, ScalarType dtype) -> Tensorzquantize_per_tensor.tensor2c                 C   s^   |  dkstd|   |  dks<td|   t| | | | | |S r,   r0   r*   r	   r	   r   quantize_per_tensor_tensor2   s    r5   c                 C   s   t | |||||S N)r4   r*   r	   r	   r    quantize_per_tensor_tensor2_meta   s    	r7   zdequantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensordequantize_per_tensor	out_dtype)r   r   r   r   r   r   r:   r   c                C   sZ   | j |ks td| d| j  |dkr.tj}|tkrH| || | S td| dS )a   Affine dequantization for the Tensor using the same quantization parameters to map
    from quantized values to floating point values

    Args:
       input (torch.Tensor): Tensor with dtype matching `dtype` argument,
       e.g. (`torch.uint8`), it is a per tensor quantized Tensor if combined with
       quantization parameters in the argument of this function (scale/zero_point)

       scale (float): quantization parameter for affine quantization

       zero_point (int): quantization parameter for affine quantization

       quant_min (int): minimum quantized value for input Tensor (not used in computation,
       reserved for pattern matching)

       quant_max (int): maximum quantized value for input Tensor (not used in computation,
       reserved for pattern matching)

       dtype (torch.dtype): dtype for input Tensor (not used in computation,
       reserved for pattern matching)

       out_dtype (torch.dtype?): optional dtype for output Tensor

    Returns:
       dequantized float32 Tensor
    Expecting input to have dtype: z
, but got N,Unsupported dtype in dequantize_per_tensor: )r   r   r
   r%   r   r$   r   r   r   r   r   r   r   r:   r	   r	   r   r8      s    % c                C   s   |d krt j}t j| |dS Nr(   )r
   r%   r)   r=   r	   r	   r   dequantize_per_tensor_meta   s    r?   zdequantize_per_tensor.tensor(Tensor input, Tensor scale, Tensor zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensorzdequantize_per_tensor.tensorc             	   C   sZ   |  dkstd|   |  dks<td|   t| | | ||||dS z Affine dequantization for the Tensor using the same quantization parameters to map
    from quantized values to floating point values
    Same as `dequantize_per_tensor` but scale and zero_point are Scalar Tensor instead of
    scalar values
    r-   r.   r/   r9   r1   r   r8   r2   r=   r	   r	   r   dequantize_per_tensor_tensor   s    rB   c                C   s   |d krt j}| dks,td|  | dksJtd|  | j|ksbtd| |tkrxt j| |dS td| d S )Nr-   r.   r/   r;   r(   r<   )r
   r%   r1   r   r   r   r)   r   r=   r	   r	   r   !dequantize_per_tensor_tensor_meta   s    rC   zdequantize_per_tensor.tensor2(Tensor input, Tensor scale, Tensor zero_point, Tensor quant_min, Tensor quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensorzdequantize_per_tensor.tensor2c             	   C   sb   |  dkstd|   |  dks<td|   t| | | | | ||dS r@   rA   r=   r	   r	   r   dequantize_per_tensor_tensor2  s          rD   )r:   r   c             	   C   s   t | ||||||dS )Nr9   )rC   r=   r	   r	   r   "dequantize_per_tensor_tensor2_meta*  s    rE   zrchoose_qparams.tensor(Tensor input, int quant_min, int quant_max, float eps, ScalarType dtype) -> (Tensor, Tensor)zchoose_qparams.tensor)r   qminqmaxepsr   r   c              	   C   s|   | j tjtjtjfks&td| j  |tksFtdt  d| t|| t	| \}}t
|||||t|gddS )\   Given an input Tensor, derive the per tensor affine quantization parameter
    (scale and zero_point) for target quantized Tensor from the Tensor

    Args:
       input (torch.Tensor): floating point input Tensor
       quant_min (int): minimum quantized value for target quantized Tensor
       quant_max (int): maximum quantized value for target quantized Tensor
       dtype (torch.dtype): dtype for target quantized Tensor

    Returns:
       scale (float): quantization parameter for the target quantized Tensor
       zero_point (int): quantization parameter for the target quantized Tensor
    CExpecting input to have dtype torch.float32/16/b16, but got dtype: $Expecting target dtype to be one of , but got: F)has_customized_qrange)r   r
   r%   r"   r#   r   r   keysr   aminmaxr   Tensorr   rF   rG   rH   r   min_valmax_valr	   r	   r   choose_qparams_tensor;  s*    


     
 rT   z|choose_qparams_symmetric.tensor(Tensor input, int quant_min, int quant_max, float eps, ScalarType dtype) -> (Tensor, Tensor)zchoose_qparams_symmetric.tensorc              
   C   s   | j tjtjtjfks&td| j  |tksFtdt  d| t|| t	| \}}t
|||||t|gdtjdS )rI   rJ   rK   rL   F)rM   Zqscheme)r   r
   r%   r"   r#   r   r   rN   r   rO   r   rP   Zper_tensor_symmetricrQ   r	   r	   r   choose_qparams_symmetric_tensorb  s,    



rU   )r   r   r   rH   r   r   c                 C   sj   | j tjtjtjfks&td| j  ||k sBtd| d| tjdtj| jdtjdtj	| jdfS )NrJ   zKExpecting quant_min to be smaller than quant_max but received min:         z max: r-   r   device)
r   r
   r%   r"   r#   r   emptydoublerW   int64r   r   r   rH   r   r	   r	   r   choose_qparams_tensor_meta  s    
r\   c                 C   s(   t jdt j| jdt jdt j| jdfS )Nr-   rV   )r
   rX   rY   rW   rZ   r[   r	   r	   r   $choose_qparams_symmetric_tensor_meta  s    r]   c                 C   s6   t t|  }d||< ||d< | t|}||fS )Nr   )listrangedimpermutetuple)xaxisZnew_axis_listyr	   r	   r   _permute_to_axis_zero  s
    rf   zquantize_per_channel(Tensor input, Tensor scales, Tensor zero_points, int axis, int quant_min, int quant_max, ScalarType dtype) -> Tensorquantize_per_channel)r   scaleszero_pointsrd   r   r   r   r   c              	   C   s   | j tjtjfkr| tj} | j tjks:td| j  ||  k sXtd|   t||| t	| |\} }t
| }t| dD ]4}	tt| |	 d||	   ||	  ||||	< q|t|}
|
|S )au   Affine per channel quantization for the Tensor using the same quantization
    parameters for each channel/axis to map from floating point to quantized values

    Args:
       input (torch.Tensor): original float32 or bfloat16 Tensor
       scales (torch.Tensor): a list of scale quantization parameter for
       affine quantization, one per channel
       zero_point (torch.Tensor): a list of zero_point quantization parameter for
       affine quantization, one per channel
       quant_min (int): minimum quantized value for output Tensor
       quant_max (int): maximum quantized value for output Tensor
       dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

    Returns:
       Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
       are not stored in the Tensor, we are storing them in function arguments instead
    r    Expecting axis to be < r   r!   )r   r
   r"   r#   r$   r%   r   r`   r   rf   
zeros_liker_   sizer&   r'   ra   rb   )r   rh   ri   rd   r   r   r   permute_axis_listresioutr	   r	   r   rg     s    
 
c                 C   sr   | j tjtjfkr| tj} | j tjks:td| j  ||  k sXtd|   t||| tj	| |dS )Nr    rj   r(   )
r   r
   r"   r#   r$   r%   r   r`   r   r)   )r   rh   ri   rd   r   r   r   r	   r	   r   quantize_per_channel_meta  s    
rq   zdequantize_per_channel(Tensor input, Tensor scales, Tensor? zero_points, int axis, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensordequantize_per_channel)	r   rh   ri   rd   r   r   r   r:   r   c                C   s   | j |ks td| d| j  |dkr.tj}||  k sLtd|   t||| t| |\} }tj| |d}	t| 	dD ]6}
|dk	r||
 nd}| |
 
|| ||
  |	|
< q|	t|}|S )a   Affine per channel dequantization for the Tensor using the same quantization
    parameters for each channel/axis to map from quantized values to floating point values

    Args:
       input (torch.Tensor): Tensor with dtype matching `dtype` argument,
       e.g. (`torch.uint8`), it is a per channel quantized Tensor if combined with
       quantization parameter in the argument of this function (scales/zero_points/axis)

       scales (torch.Tensor): a list of scale quantization parameter for
       affine quantization, one per channel

       zero_points (torch.Tensor): a list of zero_point quantization parameter for
       affine quantization, one per channel

       quant_min (int): minimum quantized value for output Tensor (not used in computation,
       reserved for pattern matching)

       quant_max (int): maximum quantized value for output Tensor (not used in computation,
       reserved for pattern matching)

       dtype (torch.dtype): requested dtype for output Tensor (not used in computation,
       reserved for pattern matching)

       out_dtype (torch.dtype?): optional dtype for output Tensor

    Returns:
       dequantized float32 Tensor
    Expecting input to have dtype , but got dtype: Nrj   r(   r   )r   r   r
   r%   r`   r   rf   rk   r_   rl   r$   ra   rb   )r   rh   ri   rd   r   r   r   r:   rm   rn   ro   zprp   r	   r	   r   rr     s    (  c                C   sf   | j |ks td| d| j  |d kr.tj}||  k sLtd|   t||| tj| |dS )Nrs   rt   rj   r(   )r   r   r
   r%   r`   r   r)   )r   rh   ri   rd   r   r   r   r:   r	   r	   r   dequantize_per_channel_meta3  s     rv   zLchoose_qparams_per_token(Tensor input, ScalarType dtype) -> (Tensor, Tensor)choose_qparams_per_token)r   r   r   c                 C   sx   |   jddd}|jtjkr&| }|tjkrFd}d|d  d }ntd| |jdd		|}t
|}||fS )
  Choose quantization parameters for per token quantization. This means for a N dimension Tensor
    (M1, M2, ...Mn, N), we calculate scales/zero_points for each N elements and quantize
    every N elements with the same quantization parameter. The dimension for scales/zero_points
    will be (M1 * M2 ... * Mn)

    Args:
       input (torch.Tensor): original float32/float16 Tensor
       dtype (torch.dtype): dtype (e.g. torch.uint8) for input Tensor

    Returns:
        scales and zero_points, both float32 Tensors
    Tr`   Zkeepdim      r-   z/unsupported dtype in choose_qparams_per_token: gh㈵>r   )absamaxr   r
   r"   floatint8	Exceptionr&   divrk   )r   r   rh   Zn_bitsr   ri   r	   r	   r   rw   L  s    

c                 C   s6   d|  df}tj|tj| jdtj|tj| jdfS Nr-   ry   rV   rl   r
   rX   rY   rW   rZ   r   r   rl   r	   r	   r   choose_qparams_per_token_metar  s    	  r   z]_choose_qparams_per_token_asymmetric_impl(Tensor input, ScalarType dtype) -> (Tensor, Tensor))_choose_qparams_per_token_asymmetric_implZCompositeImplicitAutogradc                 C   s   d\}}t j| ddd}t j| ddd}t |t |}t |t |}t t jj}|| t	||  }	|	j
|d}	||	 }
||	 }||
 }|| }t || dk||
 || }t 
||| }|	t j|t jfS )rx   )i   ry   Trz   r}   r   )r
   Zaminr   r   rk   r   r   r%   rH   r   r&   wherer'   r$   )r   r   rF   rG   rR   rS   Zmin_val_negZmax_val_posrH   r   Zdescaled_minZdescaled_maxZzero_point_from_min_errorZzero_point_from_max_errorr   r	   r	   r   r     s&    
zWchoose_qparams_per_token_asymmetric(Tensor input, ScalarType dtype) -> (Tensor, Tensor)#choose_qparams_per_token_asymmetricc                 C   s
   t | |S r6   )r   )r   r   r	   r	   r   r     s    	c                 C   s6   d|  df}tj|tj| jdtj|tj| jdfS r   r   r   r	   r	   r   (choose_qparams_per_token_asymmetric_meta  s    	  r   c                 C   sf   t t|  d d }|| ks>td| d|  || ksbtd| d|  d S )Nry   znum_tokens: z	 scales: z zero_points: )mathprodr^   rl   r1   r   )r   rh   ri   Z
num_tokensr	   r	   r   !_per_token_quant_qparam_dim_check  s    

r   z}quantize_per_token(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype) -> Tensorquantize_per_tokenr   rh   ri   r   r   r   c                 C   sB   t ||| t| || | d| | |||} | S )a  Per token quantization for the Tensor using the quantization parameters to map
    from floating point to quantized values. This means for a N dimension Tensor
    (M1, M2, ...Mn, N), we calculate scales/zero_points for each N elements and quantize
    every N elements with the same quantization parameter. The dimension for scales/zero_points
    will be (M1 * M2 ... * Mn)

    Args:
       input (torch.Tensor): original float32 or bfloat16 Tensor
       scales (float32 torch.Tensor): quantization parameter for per token affine quantization
       zero_points (int32 torch.Tensor): quantization parameter for per token affine quantization
       quant_min (int): minimum quantized value for output Tensor
       quant_max (int): maximum quantized value for output Tensor
       dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

    Returns:
       Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
       are not stored in the Tensor, we are storing them in function arguments instead
    r!   )r   r   muladdr'   r&   r$   r   r	   r	   r   r     s
    $c                 C   s   t ||| tj| |dS r>   r   r
   r)   r   r	   r	   r   quantize_per_token_meta	  s    	r   zdequantize_per_token(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype, ScalarType output_dtype) -> Tensordequantize_per_tokenr   rh   ri   r   r   r   output_dtypec                 C   s   | | } |  || } | S )a  Per token dequantization for the Tensor using the quantization parameters to map
    from floating point to quantized values. This means for a N dimension Tensor
    (M1, M2, ...Mn, N), we calculate scales/zero_points for each N elements and quantize
    every N elements with the same quantization parameter. The dimension for scales/zero_points
    will be (M1 * M2 ... * Mn)

    Args:
       input (torch.Tensor): quantized Tensor (uint8, int8 etc.)
       scales (float32 torch.Tensor): quantization parameter for per token affine quantization
       zero_points (int32 torch.Tensor): quantization parameter for per token affine quantization
       quant_min (int): minimum quantized value for input Tensor
       quant_max (int): maximum quantized value for input Tensor
       dtype (torch.dtype): dtype (e.g. torch.uint8) for input Tensor
       output_dtype (torch.dtype): dtype (e.g. torch.float32) for output Tensor

    Returns:
       dequantized Tensor with dtype `output_dtype`
    )r$   r   r	   r	   r   r     s    c                 C   s   t ||| tj| |dS r>   r   r   r	   r	   r   dequantize_per_token_meta=  s    
r   zquantize_per_channel_group(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype, int group_size) -> Tensorquantize_per_channel_group   c           	      C   s   |dkst || jd kr2|jd dkr2| jd }| jd | dksHt |  dksXt | d|}t| dkszt |dd}|dd}|d| |	 
|||| }|S )Nr-   ry   r   r|   r!   )r   shaper`   reshaper
   isnansumr   r   r'   Zclamp_r$   
reshape_as)	r   rh   ri   r   r   r   
group_sizeZto_quantZ
input_int8r	   r	   r   r   S  s*    
 	c                 C   sf   |dkst || jd kr2|jd dkr2| jd }| jd | dksHt |  dksXt tj| |dS )aX  Groupwise quantization within each channel for an 2-d Tensor using the quantization parameters
    to map from floating point to quantized values. This means for each row of a 2-d Tensor
    (M, N), we calculate scales/zero_points for each `group_size` elements
    and quantize every `group_size` elements with the same quantization parameter.
    The dimension for scales/zero_points will be (M * ceil(N, group_size),)

    Args:
       input (torch.Tensor): original float32 or bfloat16 Tensor
       scales (float32 torch.Tensor): quantization parameter for per channel group affine quantization
       zero_points (int32 torch.Tensor): quantization parameter for per channel group affine quantization
       quant_min (int): minimum quantized value for output Tensor
       quant_max (int): maximum quantized value for output Tensor
       dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

    Returns:
       Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
       are not stored in the Tensor, we are storing them in function arguments instead
    r-   ry   r   r|   r(   )r   r   r`   r
   r)   )r   rh   ri   r   r   r   r   r	   r	   r   quantize_per_channel_group_metaz  s    
r   zdequantize_per_channel_group(Tensor input, Tensor scales, Tensor? zero_points, int quant_min, int quant_max, ScalarType dtype, int group_size, ScalarType output_dtype) -> Tensordequantize_per_channel_group)w_int8rh   ri   r   r   r   r   r   c                 C   s   |dkst || jd kr2|jd dkr2| jd }| jd | dksHt |  dksXt | d|}|dd}|dk	r|dd}	ntjg tj|jd}	||		|
| |}
|
S )a!  Groupwise dequantization within each channel for an 2-d Tensor using the quantization parameters
    to map from floating point to quantized values. This means for each row of a 2-d Tensor
    (M, N), we calculate scales/zero_points for each `group_size` elements
    and quantize every `group_size` elements with the same quantization parameter.
    The dimension for scales/zero_points will be (M * ceil(N, group_size),)

    Args:
       input (torch.Tensor): quantized Tensor (uint8/int8 etc.)
       scales (float32 torch.Tensor): quantization parameter for per channel group affine quantization
       zero_points (int32 torch.Tensor): quantization parameter for per channel group affine quantization
       quant_min (int): minimum quantized value for input Tensor
       quant_max (int): maximum quantized value for input Tensor
       dtype (torch.dtype): dtype (e.g. torch.uint8) for input Tensor
       output_dtype (torch.dtype): dtype (e.g. torch.float32) for output Tensor

    Returns:
       dequantized Tensor with dtype `output_dtype`
    r-   ry   r   r|   NrV   )r   r   r`   r   r
   Zzerosint32rW   subr   r   r$   )r   rh   ri   r   r   r   r   r   Zw_int8_groupedru   Zw_dqr	   r	   r   r     s    "
zyfake_quant_per_channel(Tensor input, Tensor scales, Tensor zero_points, int axis, int quant_min, int quant_max) -> Tensorc                   @   s$   e Zd Zedd Zedd ZdS )FakeQuantPerChannelc                 C   s   |j tjkr|tj}|j tjkr0|tj}|j tjksLtd|j  || k sjtd|  ttd|tt|d |j	 }t
||}t
||}	t|d|  |	 }
t|
|||	 | }t|
|k|
|k}| | |S )Nr    rj   r   r-   r!   )r   r
   r%   r$   r   r   r`   r^   r_   ndimr   r'   r&   logical_andZsave_for_backward)ctxr   rh   ri   rd   r   r   Zbroadcast_dimsZunsqueeze_scalesZunsqueeze_zero_pointstemprp   maskr	   r	   r   forward  s    "


zFakeQuantPerChannel.forwardc                 C   s   | j \}|| d d d d d fS r6   )Zsaved_tensors)r   gyr   r	   r	   r   backward  s    zFakeQuantPerChannel.backwardN)__name__
__module____qualname__staticmethodr   r   r	   r	   r	   r   r     s   
r   fake_quant_per_channelZAutograd)r   rh   ri   rd   r   r   r   c                 C   s   t | |||||S r6   )r   applyr   rh   ri   rd   r   r   r	   r	   r   r     s    	c                 C   s
   t | S r6   )r
   r)   r   r	   r	   r   fake_quant_per_channel_meta   s    	r   )r   )r   )Fr   typingr   r   r
   Ztorch._refsr   Ztorch.ao.quantization.utilsr   r   Ztorch.libraryr   r   Zquantized_decomposed_libZuint8r   Zint16r   Z_INTEGER_DTYPESZfloat8_e5m2Zfloat8_e4m3fnZ_FLOAT_DTYPESr   updater   definerP   r   r   r   r   r+   r3   r4   r5   r7   r8   r?   rB   rC   rD   rE   rT   rU   r\   r]   rf   rg   rq   rr   rv   rw   r   r   r   r   r   r   r   r%   r   r   r   r   r   ZautogradFunctionr   r   r   r	   r	   r	   r   <module>   s0  







	/
	
	
	
	
	
"
)



,


9

!
+


"

 
  
 $
 %.


