U
    U?hd#                     @   s6  d dl Z d dlZd dlZd dlmZmZ d dlZd dlmZ	 d dl
Z
d dlmZmZmZmZ d dlmZ ddlmZ ddlmZ eeZG dd	 d	Zd
d Zedkr2e Zejreej ejZ ej!Z"ej#$e"re%de" d e&de" de
'e Z(ee(ej)ej*ej+dZ,e,-  e,j(.e"d dS )    N)ListTuple)
GraphProto
ModelProto	NodeProtoTensorProto)quantize_matmul_bnb4   )	ONNXModel)attribute_to_kwargc                   @   s   e Zd ZdZdZdZdeeedddZe	e
e eeef dd	d
ZejejdddZee
e edddZe
e dddZdd ZdS )MatMulBnb4QuantizerzMPerform 4b quantization of constant MatMul weights using FP4 or NF4 data typer   r	   N)model
quant_type
block_sizec                 C   s@   |pg }|t jt jfkstt|| _|| _|| _t|| _	d S )N)
r   FP4NF4AssertionErrorr
   r   r   r   setnodes_to_exclude)selfr   r   r   r    r   `/var/www/html/venv/lib/python3.8/site-packages/onnxruntime/quantization/matmul_bnb4_quantizer.py__init__&   s    
zMatMulBnb4Quantizer.__init__)
graph_pathreturnc                 C   sL   t t|d ddD ]2}|| }|jD ]}|j| kr&||f    S q&qdS )Nr	   )NN)rangeleninitializername)r   r   gidgraphZtensorr   r   r   Z__get_initializer.   s    

z%MatMulBnb4Quantizer.__get_initializer)fpweightr   c                 C   s   t |jdkrtd|  }|j\}}|| }| j}|| d | }|d d }tj|dd}	tj||jd}
t	|	||
|| j
|| |	|
fS )z4b quantize fp32/fp16 weight   z9Current bnb4 block quantization only supports 2D tensors!r	   Zuint8)dtype)r   shape
ValueErrorZ	transposecopyr   npZzerosr$   r   r   )r   r"   Z
fpweight_trowscolsZnumelr   Z
num_blocksZquantized_numelpackedabsmaxr   r   r   bnb4_block_quant7   s    
z$MatMulBnb4Quantizer.bnb4_block_quant)nodegraph_stackr   c                 C   s  |j dkr|S td|j d |j| jkrFtd|j d |S |jd }t||\}}|dkrvtd |S tj	
|}t|jd	krtd
 |S | |\}}tj	|}	|jd |	_|jD ]}
|
j|kr|j|
  qqtj	|}|jd |_|j|	|g i }|j\}}||d< ||d< | j|d< | j|d< tjjd|jd |	j|jg|jd g|jrz|jd nddd|}td|j d |S )zdIf the node is MatMul with fp32 const weight, quantize the weight with int4, and return the new nodeZMatMulzstart to quantize z ...zexclude to quantize z$ as specified by nodes_to_exclude...r	   Nz2MatMul doesn't have const weight. Skip to quantizer#   z)MatMul weight is not 2D. Skip to quantizeZ_Bnb4Z_absmaxKNr   r   
MatMulBnb4r    com.microsoft)inputsoutputsr   domainzcomplete quantization of )r2   )op_typeloggerdebugr   r   inputr   %_MatMulBnb4Quantizer__get_initializeronnxZnumpy_helperZto_arrayr   r%   r-   Z
from_arrayremover   extendr   r   helper	make_nodeoutput)r   r.   r/   ZinputBBZBs_graphZB_arrayr+   r,   ZB_quantr;   Zabsmax_tensorkwargsr)   r*   Zmatmul_bnb4_noder   r   r   _bnb4_matmul_node_weightM   sV    








 
	z,MatMulBnb4Quantizer._bnb4_matmul_node_weight)r/   c                 C   s  g }|d }|j D ]}dd |jD }t|ri }|jD ]}|jtjjkrh||j |j	| 
|i}nN|jtjjkrg }	|jD ] }
||
 |	| 
|g q|j	|	i}nt|}|| q8tjj|j|j|jfd|j	i|}|| || q|d |j | |  |S )Nr   c                 S   s,   g | ]$}|j tjjks$|j tjjkr|qS r   )typer=   AttributeProtoGRAPHGRAPHS).0attrr   r   r   
<listcomp>   s    z9MatMulBnb4Quantizer._process_subgraph.<locals>.<listcomp>r   r.   )r.   	attributer   rF   r=   rG   rH   appendgr   _process_subgraphrI   Zgraphsr?   r   updater@   rA   r8   r;   rB   rE   Z
ClearFieldpop)r   r/   Z	new_nodesr!   r.   Zgraph_attrsrD   rK   kvvalueZsubgraphr   r   r   rP      sD    



  
z%MatMulBnb4Quantizer._process_subgraphc                 C   sd   | j  g}| j  }d}|D ]}|jdkrd}q|sL|tjddg | | | j 	  d S )NFr4   Tr	   )
r   r!   opset_importr7   r?   r=   r@   Zmake_opsetidrP   Zclean_initializers)r   r/   rU   Zhas_ms_domainZopsetr   r   r   process   s    


zMatMulBnb4Quantizer.process)N)__name__
__module____qualname____doc__r   r   r   intr   staticmethodr   r   r   r   r<   nptZ	ArrayLiker(   Zndarrayr-   r   rE   rP   rV   r   r   r   r   r      s   7&r   c                  C   s   t jdd} | jdddd | jdddd | jd	d
dtjtjgdd | jdd
ddd | jddd
dd | jd
d | jddtd
g dd |  S )Na  Blockwise FP4/NF4 quantization for MatMul 2D weight matrices.

A weight matrix is partitioned into blocks, where each block is a contiguous
subset inside the flattened transposed weight matrix. Each block is quantized
into a set of 4b integers with an absolute value scaling factor.
)descriptionz--input_modelTzPath to the input model file)requiredhelpz--output_modelzPath to the output model filez--quant_typeFr	   z&Quantization data type. 0: FP4, 1: NF4)r_   defaultchoicesr`   z--block_size@   zVBlock size for blockwise quantization. Note: bnb.nn.Linear4bit only uses block_size=64)r_   ra   r`   z-vz	--verbose
store_true)r_   action)verbosez--nodes_to_exclude+zBSpecify the nodes to be excluded from quantization with node names)nargsrF   r_   ra   r`   )	argparseArgumentParseradd_argumentr   r   r   set_defaultsstr
parse_args)parserr   r   r   rn      s:    	
	rn   __main__zfile z already exists)r   T)/ri   loggingostypingr   r   numpyr(   Znumpy.typingr]   r=   Zonnx.onnx_pbr   r   r   r   Zonnxruntime.capi._pybind_stater   Z
onnx_modelr
   Zquant_utilsr   	getLoggerrW   r9   r   rn   argsrf   setLevelDEBUGZinput_modelZinput_model_pathZoutput_modelZoutput_model_pathpathexistserror	Exceptionloadr   r   r   r   ZquantrV   Zsave_model_to_filer   r   r   r   <module>   s6   
 "'

