U
    zhg                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZmZmZmZmZmZ d dlZd dlmZ d dlZd dlmZ d dlmZ d dlmZ d dlm Z  d d	l!m"Z"m#Z# d
dl$m%Z% d
dlm&Z&m'Z' e(e)Z*edZ+edZ,e,- Z.e.r8d dl/Z0g Z1dZ2e.r|ddddgZ1e0j3j4j56 7ddZ8d9dd e1D Z2dddgZ:G dd dZ;dd Z<d Z=G d!d" d"Z>e?dd#d$ Z@d%d&d'd(ZAd)d* ZBd+d, ZCG d-d. d.eDZEd/d0 ZFdQd1d2ZGdRd%d%d3d4d5ZHd6d7 ZId8d9 ZJd:d; ZKdSd%d%d3d<d=ZLed> d?d>d@dAdBZMe+eee+ ge+f dCdDdEZNeNejOZPeNeQdFZReNd ZSeNd%ZTeNd%ZUG dGdH dHZVG dIdJ dJZWG dKdL dLZXdTeee gee f eYeeeYeZf  eeZ eeYef dNdOdPZ[dS )U    N)Counter)import_module)AnyCallableDictListOptionalTypeVar)Tensor)rand_strided)is_float_dtype)StorageWeakRef)ContentStoreReaderContentStoreWriter   )config)clone_inputsget_debug_dirTztorch._inductor.config z1//caffe2/torch/fb/sparsenn:sparsenn_operators_gpuz-//caffe2/torch/fb/sparsenn:sparsenn_operatorsz///deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpuz+//deeplearning/fbgemm/fbgemm_gpu:sparse_opszfbcode://
c                 C   s   g | ]}d | dqS )ztorch.ops.load_library("z") .0xr   r   K/var/www/html/venv/lib/python3.8/site-packages/torch/_dynamo/debug_utils.py
<listcomp>6   s     r   Zbuck2runz@mode/dev-nosanc                   @   s&   e Zd Zdd Zdd Zd	ddZdS )
BuckTargetWriterc                 C   s   t jt j|\| _| _| jdd| _| jdd d| j | _| j| jdd  | _| jdd  | _| j}||dd  dd  }d| d	| j | _	d S )
Nz.pyr   /.zfbcode.   zfbcode/r   :)
ospathsplitabspathsubdirpy_filereplacetargetfindcmd_line_path)selffilenametmpr   r   r   __init__=   s    zBuckTargetWriter.__init__c                 C   sD   d dd tD }td| j d| j dt d| d| j d	S )
Nr   c                 S   s   g | ]}d | dqS )z	        "z",r   r   r   r   r   r   L   s     z*BuckTargetWriter.build.<locals>.<listcomp>za
load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")

python_binary(
    name="z",
    srcs = ["z"],
    compile = False,
    deps = [
        "//caffe2:torch",
        "//caffe2/functorch:functorch",
        "//triton:triton",
        "z",
    ],
    cpp_deps = [
z
    ],
    main_module = "z",
    par_style = "xar",
)
)join
extra_depstextwrapdedentr+   r)   
cur_targetr%   )r.   Zextra_cpp_depsr   r   r   buildK   s    zBuckTargetWriter.buildTc              	   C   sZ   t j| jd}t|d}||   W 5 Q R X t| jg }|rVt	
dd| |S )NZTARGETSwzFFound an example that reproduces the error. Run this cmd to repro - %s )r$   r%   r2   r(   openwriter7   BUCK_CMD_PREFIXr-   logwarning)r.   Z	print_msgZtarget_filefdZ	cmd_splitr   r   r   r;   d   s    zBuckTargetWriter.writeN)T)__name__
__module____qualname__r1   r7   r;   r   r   r   r   r   <   s   r   c                  C   sL   t jt d} | d kr.t  dt  } t j| sHt j	| dd | S )NZminifierz
/minifier_T)exist_ok)
r$   r%   r2   r   tempfile
gettempdirgetpassgetuserexistsmakedirs)r%   r   r   r   minifier_dirr   s    rJ      c                   @   s   e Zd Zejjejjejjejjejj	ejj
ejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjgZedd Zedd ZdS )NNModuleToStringc                 C   sL   t  }|  D ] \}}t|tjkr|| qt|dkrHtd| dS )Nr   z-We have not tested reprs of some modules - %sT)	setnamed_childrentyperL   
safe_reprsaddlenr=   r>   )gmZcant_convert_moduler   r   r   can_convert_to_string   s    z&NNModuleToString.can_convert_to_stringc                 C   s  ddl m} d}td}|  D ]V\}}|  }t| d }|d k	rZ|jrZ| d}||d  d| d| d	7 }q"| j	
 D ]\}}	|	d krq|	 tkrdd
lm}
 |
jtkstt|	}n@t|	rdt|	j d|	j d}ndt|	j d|	j d}|	jr| d}||d  d| d| d7 }q| j
 D ]d\}}|d krXqBd}|jrhd}dt|j d|j | d}||d  d| d| d	7 }qB||| jd d	7 }|S )Nr   )
_addindent    z
            from torch.nn import *
            class Repro(torch.nn.Module):
                def __init__(self):
                    super().__init__()
            z.cuda()   zself.z = r   )
PRINT_OPTSztorch.randn(z, dtype=)ztorch.randint(1, size=zself.register_buffer('z', z)
r   z, device="cuda"ztorch.nn.Parameter(torch.randn(z))rK   )Ztorch.nn.modules.modulerW   r4   r5   rN   __repr__next
parametersZis_cuda_buffersitemsZnumelMAX_CONSTANT_NUMEL_INLINEZtorch._tensor_strrZ   	thresholdAssertionErrorreprtorchis_floating_pointlistshapedtype_parameterscode)rS   rW   tab	model_strmodule_namerU   Z
module_strZexample_paramZbuffer_namebufferrZ   Z
tensor_str
param_nameparammaybe_devicer   r   r   convert   sF    	

 


 
"	zNNModuleToString.convertN)r@   rA   rB   re   nnZLinearZConv1dZConv2dZConv3dZBatchNorm1dZBatchNorm2dZBatchNorm3dZ	LayerNormZDropoutZSoftmaxZReLUZGELUZIdentityZ	MaxPool2dZ	EmbeddingZTanhZConvTranspose1dZGLUZLSTMZFlattenZAdaptiveAvgPool2drP   staticmethodrV   rs   r   r   r   r   rL   ~   s4   
rL   c               	   C   s   t j sdS d} zBtddg}| d}ddd |D }| | d7 } W n" ttj	fk
rv   | d	7 } Y nX t
d
d tt j D }| d7 } | D ]\}}| d| d| d7 } q| d7 } | S )Nz:# torch.cuda.is_available()==False, no GPU info collected
z# CUDA Info: 
Znvccz	--versionr   r   c                 S   s    g | ]}|d krd| dqS ))r   #  
r   )r   sr   r   r   r      s      z-_cuda_system_info_comment.<locals>.<listcomp>z# nvcc not found
c                 s   s   | ]}t j|V  qd S N)re   cudaZget_device_name)r   ir   r   r   	<genexpr>   s    z,_cuda_system_info_comment.<locals>.<genexpr>z# GPU Hardware Info: 
rv   z : rw   )re   rz   Zis_available
subprocesscheck_outputdecoder&   r2   FileNotFoundErrorCalledProcessErrorr   rangeZdevice_countr`   )rm   Zcuda_version_outZcuda_version_linescommentZ	gpu_namesnamecountr   r   r   _cuda_system_info_comment   s$    
r   F)stable_outputc              	   C   s\   dd l }dd l}| rdS |jjj }d|jj  d|jj  d|j	j  d| d	S )Nr   z*# config omitted due to stable_output=Truez~import torch._dynamo.config
import torch._inductor.config
import torch._functorch.config
import torch.fx.experimental._config
r   )
Ztorch._functorch.configZtorch._inductor.configZfxZexperimental_configZcodegen_config_dynamor   Z	_inductorZ
_functorch)r   re   Zexperimental_configr   r   r   generate_config_string   s    


r   c                   C   s   t jt dS )Nzminifier_launcher.py)r$   r%   r2   rJ   r   r   r   r   get_minifier_repro_path  s    r   c              
   C   s   t  }td| tr"t|  z$t|d}||  W 5 Q R X W n6 tk
r| } ztd t	d|W 5 d }~X Y nX d S )NzWriting minified repro to:
%sr8   r   z(Could not write to {minified_repro_path})
r   r=   r>   use_buckr   r;   r:   OSError	exceptionNotImplementedError)contentsZminified_repro_pathr?   er   r   r   helper_for_dump_minify  s    
r   c                   @   s   e Zd ZdS )AccuracyErrorN)r@   rA   rB   r   r   r   r   r   "  s   r   c                 C   sB   t | }tt| D ](}t|| tjr|| | | j q|S )z
    This clone inputs is different from utils clone_input. In case of minifier,
    all the tensors are leaf tensors while creating a new graph. So, we set the
    requires_grad field w/o checking the leafness of the tensor.
    )r   r   rR   
isinstancere   r
   Zrequires_grad_requires_grad)example_inputsZcloned_inputsidxr   r   r   clone_inputs_retaining_gradness&  s
    r   c           	      C   s   ddl m}m}m} t| } |s*t|}t| dr>| d t| drP| |n| | }|r`|S ||rx||}|	  || |d|S )z
    Runs a forward and possibly backward iteration for a given mod and args.

    When disable_clone is True, we will use args as-is without cloning.
    This is higher fidelity but we may destroy the args in the process.
    r   )collect_resultsreduce_to_scalar_lossrequires_bwd_pass	zero_gradTZ_boxed_callN)
Ztestingr   r   r   copydeepcopyr   hasattrr   Zbackward)	rS   argsonly_fwdZdisable_cloner   r   r   outZlossr   r   r   run_fwd_maybe_bwd3  s    


r   require_fp64ignore_non_fpc             
   C   s   ddl m} t| ||}d}tjrvz(tt| t|\}	}
t|	|
|}W n* t	k
rt   |rft
dtd Y nX zt|||}W n2 t	k
r } ztd W Y dS d}~X Y nX ||||tjd|d}|S )aa  
    Check two models have same accuracy.

    require_fp64: if True, raise an error if we unable to calculate the fp64 reference
    ignore_non_fp: if True, do not compare outputs which are not floating point.  This
        is mostly useful for the minifier (which wants to avoid quantizing floating point
        error into integer/boolean error)
    r   )sameNzCould not generate fp64 outputszWhile minifying the program in accuracy minification mode, ran into a runtime exception which is likely an unrelated issue. Skipping this graph.T)ZtolZ	equal_nanr   )utilsr   r   r   Zsame_two_models_use_fp64cast_to_fp64r   r   r   	ExceptionRuntimeErrorr=   r>   r   Zrepro_tolerance)rS   Zopt_gmr   r   r   r   r   refZfp64_refZ
fp64_modelZfp64_examplesresr   Zpassingr   r   r   same_two_modelsN  s<     r   c                 C   s   | j jD ]}|jdkrj|jtjjjjkrjt	|j
dks:tt|j
d rj|j
d tjkrj|j
d tjf|_
|jdkr|jd}|d k	rt|rt|j}tj|d< ||_q| j   |   | S )NZcall_functionrY   r   r   ri   )graphZnodesopr+   re   ZopsZprimsZconvert_element_typedefaultrR   r   rc   r   float64kwargsgetdictZlintZ	recompile)modelnoderi   Z
new_kwargsr   r   r   cast_dtype_args_to_fp64  s"    



r   c                    sB   ddl m} | } tjkr(t|}| fdd|}||fS )Nr   )tree_mapc                    s"   t | tjr|  r|  S | S ry   )r   re   r
   rf   tor   ri   r   r   <lambda>  s
    
zcast_to.<locals>.<lambda>)Ztorch.utils._pytreer   r   re   r   r   )ri   r   inputsr   r   r   r   cast_to  s    


r   c                 C   s   t tj| |S ry   )r   re   r   )r   r   r   r   r   r     s    r   c             
   C   sd   z,|t | t|}t| |||||d W S  tk
r^ } ztd W Y dS d }~X Y nX d S )Nr   zWhile minifying the program in accuracy minification mode, ran into a runtime exception which is likely an unrelated issue. Skipping this graphF)r   r   r   r   r   r=   r   )rS   r   Zcompiler_fnr   r   r   Zcompiled_gmr   r   r   r   backend_accuracy_fails  s$    	 
r   ztorch._prims_common.StrideTypeztorch._prims_common.ShapeType)striderh   returnc                C   s   | d k	r| S t |S ry   )r   Zmake_contiguous_strides_for)r   rh   r   r   r   _stride_or_default  s    r   )dr   c                    s    fddS )Nc                    s   | d k	r| S  S ry   r   r   r   r   r   r         z_mk_defaulter.<locals>.<lambda>r   r   r   r   r   _mk_defaulter  s    r   cpuc                   @   s4   e Zd Zdd ZdddddZdd Zd	d
 ZdS )NopInputReaderc                 C   s
   d| _ d S )Nr   total)r.   r   r   r   r1     s    zNopInputReader.__init__Ndevice
dtype_hintc                C   s   |  j d7  _ d S )Nr   r   )r.   storage_hashnbytesr   r   r   r   r   storage  s    zNopInputReader.storagec                 O   s   d S ry   r   r.   r   r   r   r   r   tensor  s    zNopInputReader.tensorc                 O   s   d S ry   r   r   r   r   r   symint  s    zNopInputReader.symintr@   rA   rB   r1   r   r   r   r   r   r   r   r     s   r   c                   @   sJ   e Zd ZdddddZdddddZddddddd	d
Zdd ZdS )InputReaderN)pbarc                C   s8   |d krt d |d k	r"t|nd | _g | _|| _d S )Nz0no save_dir specified, will generate random data)r=   r>   r   storer   r   )r.   save_dirr   r   r   r   r1     s
    
zInputReader.__init__r   c                C   s   | j d k	r| j d t|}t|}| jd k	r||d k	r|z| j|}W n tk
r\   Y n X ||jkrxt	d||j |S t	d| ||j
 f}td |d}t|||| S )Nr   zdevice mismatch: %s != %sz1could not load %s, generating random data insteadrh   )r   update_device_or_default_dtype_or_defaultr   Zread_storager   r   r=   r>   itemsizer   r   untyped_storage)r.   r   r   r   r   r   rh   r   r   r   r   r   	  s     

zInputReader.storage)storage_offsetri   r   is_leafc          
   	   K   s   t ||d}t|}t|}t|}t|}tjg ||j|d}	t  |		|||| W 5 Q R X |st
  |	jtjd}	W 5 Q R X t  |		|||| W 5 Q R X tjj|	|ksttj|	| | j|	 |	S )Nr   )ri   r   r   )Zmemory_format)r   _storage_offset_or_defaultr   _is_leaf_or_default_requires_grad_or_defaultre   r   r   Zno_gradset_Zenable_gradcloneZpreserve_format_subclasses
meta_utilssafe_is_leafrc   _utilsZset_tensor_metadatar   append)
r.   r   rh   r   r   ri   r   r   metadatatr   r   r   r     s,       


zInputReader.tensorc                 C   s   | j | |S ry   )r   r   )r.   valr   r   r   r   @  s    zInputReader.symint)N)Nr   r   r   r   r   r     s    !r   c                   @   sT   e Zd ZddddZdd Zddded	d
dZdd	ddZdd	ddZdS )InputWriterFstable_hashc                C   s:   g | _ t | _|| _|d k	r*t||dnd | _i | _d S )Nr   )_lines	itertoolsr   storage_counterr   r   r   seen_storages)r.   r   r   r   r   r   r1   R  s    
zInputWriter.__init__c                 C   s*   dg}| dd | jD  |d |S )Nzdef load_args(reader):c                 s   s   | ]}d | V  qdS )rX   Nr   )r   lr   r   r   r|   b  s     z$InputWriter.lines.<locals>.<genexpr>zload_args._version = 0)extendr   r   )r.   rr   r   r   lines^  s
    
zInputWriter.linesNr   device_hintr   c             
   C   s   t |}| j|}|d k	r |S dt| j }d}td t|krNd|}d}|j}|jdkrr|d k	snt|}t	d |krd|}|
 }	d }
| jd k	r|jjdkr| j|}
| j| d|
d|	| | d || j|< |S )	Nbufr   z, dtype_hint=metaz	, device=z = reader.storage(, r[   )r   r   r   r]   r   r   r   rO   rc   r   r   r   Zwrite_storager   r   )r.   r   r   r   wsvZmaybe_dtype_hintrr   r   r   r   r   r   r   r   m  s0    



zInputWriter.storagec              	   C   s4  | j | |j|jd}g }td |jd| krH|tt	|  t
d |jkrh|d|j td | kr|d|  tj|}|r|dd | D  td |jkr|d|j tjj|}td |kr|d| | jd	d
|tt	|jf| d|   d S )Nr   r   zdtype=zstorage_offset=c                 s   s    | ]\}}| d |V  qdS )=Nr   )r   kr  r   r   r   r|     s     z%InputWriter.tensor.<locals>.<genexpr>zrequires_grad=zis_leaf=zreader.tensor(r   )  # )r   r   ri   r   r   rh   r   r   strtupler   r   r   re   r   Zget_tensor_metadatar   r`   r   r   r   r   r   r   r   r2   )r.   r   r   r   r   Ztensor_metadatar   r   r   r   r     s6      zInputWriter.tensorc                 C   s0   t |tjr|jj}| jd|d|  d S )Nzreader.symint(r  )r   re   ZSymIntr   hintr   r   )r.   r   r   r   r   r   r     s    zInputWriter.symint)	r@   rA   rB   r1   r   r  r   r   r   r   r   r   r   r   Q  s
   r   rz   )funcr   
sym_shapesdefault_sym_shaper   c                    s  ddl m} dd | D }d| }t| }d| d}d| d	}	d
}
G dd d}i }pji  fddtdfdd}| j}| D ]t\}}|dkrqt	
|	|}|r| \}}t|d}|| }|||||< t	
|
|}|r|d||< qdt| jkr~| }||d< t	||D ]>}| \}}}}t|d}|| }t||||| q>|S )a  
    Takes in a function which has been printed with print_readable() and constructs kwargs to run it.

    Handles Tensor inputs, Symints, and a graph module which might have tensor constants.

    Consider a function `forward` defined as follows:

    def forward(self, primals_1: "f32[1001, 6]", primals_2: "f32[s0]", primals_3: "Sym(s0)",):
        _tensor_constant0: "i64[4190]" = self._tensor_constant0
        # Further implementation

    kwargs = aot_graph_input_parser(forward)
    forward(**kwargs)
    r   )dtype_abbrsc                 S   s   i | ]\}}||qS r   r   )r   keyvaluer   r   r   
<dictcomp>  s      z*aot_graph_input_parser.<locals>.<dictcomp>|z(_tensor_constant\d+): \"(z0)\[\s*(.*?)\s*\]\" = self\.(_tensor_constant\d+)(z)\[\s*(.*?)\s*\]zSym\((s\d+)\)c                   @   s   e Zd ZdZdS )z/aot_graph_input_parser.<locals>.TensorContainerz#Container for tensors as attributesN)r@   rA   rB   __doc__r   r   r   r   TensorContainer  s   r  c                    s,   t  kpd k	 fdd  S )Nc                      s
     dS )Nz; not in symbolic_shapes and default sym shape not passed inr   r   r   r   r   r     r   z=aot_graph_input_parser.<locals>.get_sym_int.<locals>.<lambda>)re   _checkr   r  )r
  r	  r  r   get_sym_int  s
    
z+aot_graph_input_parser.<locals>.get_sym_intr   c           
         s   g }g }t | D ]D\}}| }d|krF|}|| || q|t| q|jrbtjntj}||| d}|D ]}	tj	||	 qz|S )Nrx   )ri   r   )
	enumeratestripr   intrf   re   ZrandnZzerosr   Zmark_dynamic)
rh   ri   Zresolved_shapeZdynamic_dimsr{   dimrx   constructorr   r   )r   r  r   r   
gen_tensor  s    
z*aot_graph_input_parser.<locals>.gen_tensorr   ,r   r.   )Ztorch.fx.graphr  r`   r2   valuesinspect	getsourcer
   __annotations__researchgroupsr  r&   group	signaturer^   finditersetattr)r  r   r	  r
  r  Z	dtype_mapZdtype_patternsourceZtensor_assignment_regexZtensor_regexZsym_shape_regexr  r   r  annotationsrq   
annotationmatchZ	data_typeZ	shape_strrh   ri   	container	attr_namerT   r   )r
  r   r  r	  r   aot_graph_input_parser  sD    
r.  )FF)F)F)rz   NN)\r   	functoolsrF   r  r   loggingr$   r!  r}   rD   r4   collectionsr   	importlibr   typingr   r   r   r   r   r	   re   Ztorch._prims_commonZ_prims_commonr   Ztorch._subclasses.meta_utilsr
   Ztorch._dynamo.testingr   r   Z torch.multiprocessing.reductionsr   Ztorch.utils._content_storer   r   r   r   r   r   	getLoggerr@   r=   r   Zinductor_configZ	is_fbcoder   Zlibfb.py.build_infoZlibfbr3   Zextra_importspyZ
build_infoZ	BuildInfoZget_build_ruler*   r6   r2   r<   r   rJ   ra   rL   	lru_cacher   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zfloat32r   r   r   r   r   r   r   r   r   r  r  r.  r   r   r   r   <module>   s    

6	d

 8 *
T[   
