U
    yh,$                     @   s6  d dl Z d dlmZ d dlmZ ddlmZ ddlmZmZ ddl	m
Z
mZmZ ddlmZ dd	lmZ d d
lmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ d dlm Z  dddgZ!eeedddZ"eeedddZ#e j$j%j&j'e j$j%j&j(e j$j%j)j'gZ*ee+dddZ,dee+e+edddZ-dS )    N)GraphModule)Node   )prepare)_fuse_conv_bn_qat_fold_conv_bn_qat)_get_node_name_to_scope_fuse_conv_bn__disallow_eval_train) reference_representation_rewrite)#_convert_to_reference_decomposed_fx)	QuantizerQuantizationSpecBaseQuantizationSpecFixedQParamsQuantizationSpecSharedQuantizationSpecDerivedQuantizationSpecQuantizationAnnotation)PassManager)DuplicateDQPass)PortNodeMetaForQDQ)constant_foldprepare_pt2eprepare_qat_pt2econvert_pt2e)model	quantizerreturnc                 C   sf   t jd | j}t| }t|  ||  ||  ||  t	| |dd} | j
| t| } | S )aR  Prepare a model for post training quantization

    Args:
      * `model` (torch.fx.GraphModule): a model captured by `torch.export` API
        in the short term we are using `torch._export.capture_pre_autograd_graph`,
        in the long term we'll migrate to some `torch.export` API
      * `quantizer`: A backend specific quantizer that conveys how user want the
        model to be quantized. Tutorial for how to write a quantizer can be found here:
        https://pytorch.org/tutorials/prototype/pt2e_quantizer.html

    Return:
      A GraphModule with observer (based on quantizer annotation), ready for calibration

    Example::

        import torch
        from torch.ao.quantization.quantize_pt2e import prepare_pt2e
        from torch._export import capture_pre_autograd_graph
        from torch.ao.quantization.quantizer import (
            XNNPACKQuantizer,
            get_symmetric_quantization_config,
        )

        class M(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.linear = torch.nn.Linear(5, 10)

           def forward(self, x):
               return self.linear(x)

        # initialize a floating point model
        float_model = M().eval()

        # define calibration function
        def calibrate(model, data_loader):
            model.eval()
            with torch.no_grad():
                for image, target in data_loader:
                    model(image)

        # Step 1. program capture
        # NOTE: this API will be updated to torch.export API in the future, but the captured
        # result shoud mostly stay the same
        m = capture_pre_autograd_graph(m, *example_inputs)
        # we get a model with aten ops

        # Step 2. quantization
        # backend developer will write their own Quantizer and expose methods to allow
        # users to express how they
        # want the model to be quantized
        quantizer = XNNPACKQuantizer().set_global(get_symmetric_quantization_config())
        m = prepare_pt2e(m, quantizer)

        # run calibration
        # calibrate(m, sample_inference_data)
    z+quantization_api.quantize_pt2e.prepare_pt2eFZis_qat)torch_C_log_api_usage_oncemetar   r	   transform_for_annotationannotatevalidater   updater
   r   r   original_graph_metaZnode_name_to_scope r)   U/var/www/html/venv/lib/python3.8/site-packages/torch/ao/quantization/quantize_pt2e.pyr   &   s    =


c                 C   sf   t jd | j}t| }||  ||  ||  t|  t	| |dd} | j
| t| } | S )a_  Prepare a model for quantization aware training

    Args:
      * `model` (torch.fx.GraphModule): see :func:`~torch.ao.quantization.quantize_pt2e.prepare_pt2e`
      * `quantizer`: see :func:`~torch.ao.quantization.quantize_pt2e.prepare_pt2e`

    Return:
      A GraphModule with fake quant modules (based on quantizer annotation), ready for
      quantization aware training

    Example::
        import torch
        from torch.ao.quantization.quantize_pt2e import prepare_qat_pt2e
        from torch._export import capture_pre_autograd_graph
        from torch.ao.quantization.quantizer import (
            XNNPACKQuantizer,
            get_symmetric_quantization_config,
        )

        class M(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.linear = torch.nn.Linear(5, 10)

           def forward(self, x):
               return self.linear(x)

        # initialize a floating point model
        float_model = M().eval()

        # define the training loop for quantization aware training
        def train_loop(model, train_data):
            model.train()
            for image, target in data_loader:
                ...

        # Step 1. program capture
        # NOTE: this API will be updated to torch.export API in the future, but the captured
        # result shoud mostly stay the same
        m = capture_pre_autograd_graph(m, *example_inputs)
        # we get a model with aten ops

        # Step 2. quantization
        # backend developer will write their own Quantizer and expose methods to allow
        # users to express how they
        # want the model to be quantized
        quantizer = XNNPACKQuantizer().set_global(get_symmetric_quantization_config())
        m = prepare_qat_pt2e(m, quantizer)

        # run quantization aware training
        train_loop(prepared_model, train_loop)

    z/quantization_api.quantize_pt2e.prepare_qat_pt2eTr   )r   r    r!   r"   r   r#   r$   r%   r   r   r&   r
   r'   r)   r)   r*   r   r   s    9


)nr   c                 C   s   | j dko| jtkS )aT  If there is any pure ops between get_attr and quantize op they will be const propagated
    e.g. get_attr(weight) -> transpose -> quantize -> dequantize*
    (Note: dequantize op is not going to be constant propagated)

    This filter is added because we don't want to constant fold the things that are not
    related to quantization
    Zcall_function)optarget
_QUANT_OPS)r+   r)   r)   r*   _quant_node_constraint   s    r/   FT)r   use_reference_representationfold_quantizer   c                 C   s   t jd t|ts&td| d| j}t| } t| } t	t
 g}|| j} t	t g}|| j} |rvt| t |rt| } | j| t| } | S )a  Convert a calibrated/trained model to a quantized model

    Args:
      * `model` (torch.fx.GraphModule): calibrated/trained model
      * `use_reference_representation` (bool): boolean flag to indicate whether to produce referece representation or not
      * `fold_quantize` (bool): boolean flag for whether fold the quantize op or not

    Returns:
        quantized model, either in q/dq representation or reference representation

    Example::

        # prepared_model: the model produced by `prepare_pt2e`/`prepare_qat_pt2e` and calibration/training
        # `convert_pt2e` produces a quantized model that represents quantized computation with
        # quantize dequantize ops and fp32 ops by default.
        # Please refer to
        # https://pytorch.org/tutorials/prototype/pt2e_quant_ptq_static.html#convert-the-calibrated-model-to-a-quantized-model
        # for detailed explanation of output quantized model
        quantized_model = convert_pt2e(prepared_model)

    z+quantization_api.quantize_pt2e.convert_pt2ezjUnexpected argument type for `use_reference_representation`, please make sure you intend to pass argument z to convert_pt2e)r   r    r!   
isinstancebool
ValueErrorr"   r   r   r   r   Zgraph_moduler   r   r/   r   r&   r
   )r   r0   r1   r(   pmr)   r)   r*   r      s&    




)FT).r   Ztorch.fxr   r   Zpt2e.preparer   Zpt2e.qat_utilsr   r   Z
pt2e.utilsr   r	   r
   Zpt2e.representationr   Zquantize_fxr   Ztorch.ao.quantization.quantizerr   r   r   r   r   r   r   Z"torch.fx.passes.infra.pass_managerr   Z,torch.ao.quantization.pt2e.duplicate_dq_passr   Z-torch.ao.quantization.pt2e.port_metadata_passr   Z%torch._export.passes.constant_foldingr   __all__r   r   ZopsZquantized_decomposedZquantize_per_tensordefaultZtensorZquantize_per_channelr.   r3   r/   r   r)   r)   r)   r*   <module>   sJ   $	MI


  