U
    zh1                     @  s6  U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZmZmZmZ d dlmZ d dlmZ erd dlmZmZmZmZ d ad ad ag ad	ed
< g Zded< d a d a!g a"ded< d a#dd Z$eG dd dZ%dd Z&G dd dZ'i Z(ded< eG dd dZ)e)*dddddd d!d"g e)*d#d$d%d&g e)*d'd(d)ddd*d+d,g e)*d-d.d/d0d1d2d3d4d5g e)*d6d7d8d9d*d+d:d;d<d=d>d?d@dAdBdCg dDdE Z+dFdG Z,dHdI Z-dJdK Z.dLdM Z/dNdO Z0dPdQ Z1dRdS Z2dTdU Z3dVdW Z4dXdY Z5e	dZd[d\d]Z6d^d_ Z7d`da Z8dS )b    )annotationsN)	dataclass)	lru_cache)DictListSetTupleTYPE_CHECKINGUnion)config)get_benchmark_name)BaseSchedulerNodeExternKernelSchedulerNodeNopKernelSchedulerNodeSchedulerNodezYList[Tuple[Union[NopKernelSchedulerNode, SchedulerNode, ExternKernelSchedulerNode], int]]nodes_num_elemz%List[Tuple[BaseSchedulerNode, float]]node_runtimesz	List[int]!cpp_outer_loop_fused_inner_countsc                   C  s4   da dadat  t  dadat  da	d S )Nr   )
generated_kernel_countgenerated_cpp_vec_kernel_countnum_bytes_accessedr   clearr   ir_nodes_pre_fusioncpp_to_dtype_countr   num_comprehensive_padding r   r   I/var/www/html/venv/lib/python3.8/site-packages/torch/_inductor/metrics.pyreset4   s    	r   c                   @  s:   e Zd ZU dZded< ded< ded< ded< ded< dS )	CachedMetricsDeltasz]
    The subset of metrics we want update across cache hits, e.g., the
    FxGraphCache.
    intr   r   r   r   r   N)__name__
__module____qualname____doc____annotations__r   r   r   r   r   H   s   
r   c                   C  s   dd t tD S )Nc                 S  s   g | ]
}|j qS r   name).0fieldr   r   r   
<listcomp>W   s     z%get_metric_fields.<locals>.<listcomp>)dataclassesfieldsr   r   r   r   r   get_metric_fieldsV   s    r,   c                   @  s8   e Zd ZdZdd ZddddZeddd	d
ZdS )CachedMetricsHelperz
    A helper class to help calculate and apply counter deltas for those
    metrics we want to save with cache entries (e.g., FxGraphCache) and
    apply on a cache hit.
    c                 C  s&   i | _ t D ]}t | | j |< qd S N)cached_metricsr,   globals)selfmetricr   r   r   __init__a   s    
zCachedMetricsHelper.__init__r   returnc                 C  s2   i }t  D ]}t | | j|  ||< q
tf |S r.   )r,   r0   r/   r   )r1   Zdelta_metricsr2   r   r   r   
get_deltasf   s    
zCachedMetricsHelper.get_deltas)deltac                 C  s(   t  D ]}t |  t| |7  < qd S r.   )r,   r0   getattr)r7   r2   r   r   r   apply_deltasm   s    
z CachedMetricsHelper.apply_deltasN)r    r!   r"   r#   r3   r6   staticmethodr9   r   r   r   r   r-   Z   s
   r-   zDict[str, MetricTable]REGISTERED_METRIC_TABLESc                   @  sV   e Zd ZU ded< ded< dZded< dd	 Zd
d Zdd Zdd Ze	dd Z
dS )MetricTablestr
table_namez	List[str]column_namesr   r   num_rows_addedc                   s   | j t krd S |  t| jt ksDtt| j dt  t| jt  ksztt| j dt   t g}| fdd| jD 7 }| | d S )Nz v.s. c                   s   g | ]} | qS r   r   )r'   Zcolumn_nameZrow_dictr   r   r)      s     z'MetricTable.add_row.<locals>.<listcomp>)	r>   enabled_metric_tableslenr?   AssertionErrorsetkeysr   
_write_row)r1   Zrow_fnrowr   rA   r   add_row}   s"    

zMetricTable.add_rowc                 C  s   d| j  dS )NZmetric_table_z.csv)r>   )r1   r   r   r   output_filename   s    zMetricTable.output_filenamec              	   C  sB   |   }t|d&}tj|dd}|dg| j  W 5 Q R X d S )Nw
lineterminatorZ
model_name)rJ   opencsvwriterwriterowr?   )r1   filenamefdrQ   r   r   r   write_header   s    zMetricTable.write_headerc              	   C  s   |   }| jdkr&tj|s&|   |  jd7  _t|D ]6\}}t|trX|d}n|d krfd}n|}|||< q<t	|d}t
j|dd}|| W 5 Q R X d S )Nr      z.6f arL   rM   )rJ   r@   ospathexistsrU   	enumerate
isinstancefloatrO   rP   rQ   rR   )r1   rH   rS   idxZorig_valnew_valrT   rQ   r   r   r   rG      s    


zMetricTable._write_rowc                 C  s   t | |}|t| < d S r.   )r<   r;   )r&   r?   tabler   r   r   register_table   s    
zMetricTable.register_tableN)r    r!   r"   r$   r@   rI   rJ   rU   rG   r:   rb   r   r   r   r   r<   v   s   
r<   Zslow_fusionZkernel1_pathZkernel1_latencyZkernel2_pathZkernel2_latencyZfused_kernel_pathZfused_kernel_latencyZslow_down_ratioZgraph_statsZgraph_idZnum_nodes_before_fusionZnum_nodes_after_fusionZpersistent_red_perfZkernel1_nameZkernel2_name
size_hintsreduction_hintZspeedupZ'fusion_failure_due_to_indexing_mismatchZpre_grad_graph_idZpost_grad_graph_idZ
node1_nameZ
node2_nameZnode1_debug_strZnode2_debug_strZcommon_buffer_namesZfailure_reasonkernel_metadatakernel_namekernel_pathkernel_categoryline_of_codenum_load	num_storenum_for_loopnum_atomic_addnum_argsxnumelynumelrnumelkernel_args_num_gbc                 C  s8   ddl m} ddlm} || }||}t|jjS )z
    The kernel_module_code is the python module that contains kernel function code.
    kernel function is the proper triton kernel function annotated with
    @triton.jit
    rV   )PyCodeCache)get_triton_kernel)Z	codecachers   wrapper_benchmarkrt   loadinspect	getsourcefn)kernel_module_coders   rt   modZkernelr   r   r   _parse_kernel_fn_code  s
    
r|   c                 C  s   t |  S )zJ
    Return the line of code for the kernel excluding the decorators.
    )rC   
splitlines)proper_kernel_fn_coder   r   r   _parse_kernel_line_of_code  s    r   c                 C  s.   |dkrd S t d| }|s$td|dS )NZforeachzsize_hints=(\[[0-9, ]*\]),zsize_hints missing!rV   researchrD   group)rz   rh   mr   r   r   _parse_size_hints  s
    r   c                 C  s.   | dkrd S t d|}|s$td|dS )N)Z	reductionZpersistent_reductionz$reduction_hint=ReductionHint\.(\w*),z/reduction_hint not found in kernel source code!rV   r   )rh   rz   r   r   r   r   _parse_reduction_hint#  s
    r   c                 C  s
   |  |S r.   )count)r~   patternr   r   r   _count_pattern+  s    r   c                 C  sP   |   d }|dst|d}|d}||d | }|d}t|S )Nr   def (z):rV   ,)r}   
startswithrD   indexsplitrC   )r~   Zdef_lineZ	start_idxZend_idxZdecl_csvcompsr   r   r   _count_args/  s    


r   c                 C  s   |  d}| |d S )z
    Skip decorators.
    r   N)r   )kernel_fn_code	start_posr   r   r   _parse_proper_kernel_fn_code9  s    
r   c                 C  s,   t | d| }|r$t|dS d S d S )Nz
 = ([\d]+)rV   )r   r   r   r   )r~   Znumel_arg_namer   r   r   r   _parse_numelA  s    r   c                 C  s&   t d| }|rt|dS dS dS )z
    inductor meta looks like:
        inductor_meta={... 'mutated_arg_names': [], 'no_x_dim': False, 'kernel_num_gb': 2.0},
    z.kernel_num_gb.:\s*([0-9.]+)rV   N)r   r   r^   r   )r   rh   r   r   r   r   _parse_kernel_args_num_gbI  s    	r   c              
     sh   ddl m} || t |t| t|tttd fdd dS )z
    An utility to log kernel metadata. We may parse metadata from kernel source code here.

    It's fine to parse the generated kernel code here since the logging is
    disabled by default. It would hurt compilation time.
    rV   )"get_kernel_category_by_source_codere   c                     sX    t dt dt dt dttdtdtdt dS )	Nztl.loadztl.storezfor ztl.atomic_addro   rp   rq   )rf   rg   rh   rc   rd   ri   rj   rk   rl   rm   rn   ro   rp   rq   rr   )r   r   r   r   r   rh   r   Zkernel_line_of_coderf   rg   r~   rd   rc   r   r   <lambda>p  s&     z%log_kernel_metadata.<locals>.<lambda>N)	ru   r   r   r   r|   r   r   get_metric_tablerI   )rf   rg   rz   r   r   r   r   log_kernel_metadata\  s    

r   c                  C  sF   t  D ]8\} }| t kr| }tj|r8t| |  qdS )z
    Purge the old log file at the beginning when the benchmark script runs.
    Should do it in the parent process rather than the child processes running
    each individual model.
    N)	r;   itemsrB   rJ   rY   rZ   r[   unlinkrU   )r&   ra   rS   r   r   r   purge_old_log_files  s    

r   zSet[str]r4   c                  C  sP   t j} t }| dD ]4}| }|s(q|tks@td| d|| q|S )Nr   zMetric table name z is not registered)r   rB   rE   r   stripr;   rD   add)Z
config_strZenabledr&   r   r   r   rB     s    
rB   c                 C  s
   | t  kS r.   )rB   r%   r   r   r   is_metric_table_enabled  s    r   c                 C  s    | t kstd|  dt |  S )NzMetric table z is not defined)r;   rD   r%   r   r   r   r     s    r   )9
__future__r   rP   r*   rw   rY   r   r   	functoolsr   typingr   r   r   r   r	   r
   Ztorch._inductorr   Ztorch._inductor.utilsr   Ztorch._inductor.schedulerr   r   r   r   r   r   r   r   r$   r   r   r   r   r   r   r   r,   r-   r;   r<   rb   r|   r   r   r   r   r   r   r   r   r   r   rB   r   r   r   r   r   r   <module>   s     
;	
*