U
    zh                     @  s  U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d dlZd dlmZ d dlmZ d d	lmZmZmZmZmZm Z m!Z!m"Z" d d
l#m$Z$m%Z%m&Z& d dl'm(Z( d dl)m*Z*m+Z+ d dl,m-Z-m.Z. er d dl/m0Z0 da1da2de3d< ej45e6dZ7dd Z8dd Z9ddddZ:ddddZ;ej<dkZ=e>e6Z?e@ ZAde3d< ddd d!ZBd"d# ZCzejDeCd$ W n eEk
r   Y nX G d%d& d&ZFejGHd'd(d)ks
ejGHd*d)d)krneFI  dS )+    )annotationsN)FutureProcessPoolExecutorThreadPoolExecutor)partial)time)AnyCallableDictListOptionalSetTYPE_CHECKING) get_registered_device_interfaces)config)CodeCacheFutureCppCodeCacheCppPythonBindingsCodeCacheCUDACodeCacheHalideCodeCacheLambdaFutureTritonCodeCacheTritonFuture)_warm_process_poolAnyPoolSubprocPool)_async_compile_initializer)_set_triton_ptxas_path_worker_compile_triton)	_Faketqdmtqdm)
HalideMetag        zOptional[float]_t0Zkernel_codec                  C  s6   t   zddlm}  |   W n tk
r0   Y nX dS )zG
    Setup that must be done prior to forking with a process pool.
    r   
triton_keyN)caching_device_propertiesZtriton.compiler.compilerr$   ModuleNotFoundErrorr#    r'   O/var/www/html/venv/lib/python3.8/site-packages/torch/_inductor/async_compile.pypre_fork_setup3   s    
r)   c                  C  s&   t  D ]\} }| r|j  qd S N)r   Zis_availableZWorkerZget_device_properties)_Zdevice_interfacer'   r'   r(   r%   F   s    r%   Nonereturnc                   C  s   t d krt a d S r*   )r"   r   r'   r'   r'   r(   _compile_startL   s    r/   c                  C  s"   t d k	rt } t| t  7 ad a d S r*   )r"   r   _cumulative_compile_time)t1r'   r'   r(   _compile_endR   s    r2   win32zSet[AnyPool]	_pool_setc                  C  s   t D ]} |   qt  dS )z/Shut down all outstanding compile-worker pools.N)r4   shutdown
after_fork)poolr'   r'   r(   shutdown_compile_workersd   s    
r8   c                   C  s   t   tj  dS )z7Reset pools to initial state without shutting them downN)r4   clearAsyncCompileprocess_poolcache_clearr'   r'   r'   r(   r6   k   s    r6   )after_in_childc                   @  s   e Zd ZddddZeedddddZeedd	dd
dZe	ddddZ
e	dddddZd,ddddddZddddZddddZddddd Zd!d" Zd#dd$d%d&Zd'dd(d)d*Zd+S )-r:   r,   r-   c                 C  s   d S r*   r'   )selfr'   r'   r(   __init__x   s    zAsyncCompile.__init__   r   c                   C  s   t jdksttt jS Nr@   )r   compile_threadsAssertionErrorr   r'   r'   r'   r(   r7   {   s    zAsyncCompile.poolr   c                  C  st   t jdkstt jdkr$tt j} nBt  tt j}tt j|t	t
t d} tjjd | jtjd t|  | S )Nr@   
subprocess)Z
mp_contextZinitializer)Zexitpriority)r   rB   rC   Zworker_start_methodr   r)   multiprocessingZget_contextr   r   r   osgetpidutilZFinalizer5   sysmaxsizer4   add)r7   ctxr'   r'   r(   r;      s    
	
zAsyncCompile.process_poolc                 C  s.   t jdkrd S t  t|  t j t  d S rA   )r   rB   r/   r   r;   r2   )clsr'   r'   r(   	warm_pool   s
    
zAsyncCompile.warm_poolzCallable[..., Any]r   )taskr.   c                 C  s   t jdkr| S |  |S rA   )r   rB   r7   submit)rM   rO   r'   r'   r(   rP      s    
zAsyncCompile.submitcudastr)kernel_namesource_code
device_strc                 C  sV   t d| t  t  t||}tjdkrFt|| 	 
t|jS |  |S d S )NzTriton Kernel:
%sr@   )kernel_code_loginfor/   r   r   loadr   rB   r   r;   rP   r   Z_reload_in_subprocZ
precompile)r>   rS   rT   rU   kernelr'   r'   r(   triton   s    
zAsyncCompile.tritonc                 O  s   ddl m} |||S )Nr   )MultiKernelCall)Z$torch._inductor.codegen.multi_kernelr[   )r>   argskwargsr[   r'   r'   r(   multi_kernel   s    zAsyncCompile.multi_kernel)rT   c                   sF   t d| tjdkr"t|jS tj|| jd t	 fddS d S )NzCPP Kernel:
%sr@   Z	submit_fnc                     s     j S r*   )rY   r'   
get_resultr'   r(   <lambda>       z"AsyncCompile.cpp.<locals>.<lambda>)
rV   rW   r   rB   r   rX   rY   Z
load_asyncrP   r   )r>   rT   r'   r`   r(   cpp   s
    
zAsyncCompile.cppz	List[str])argtypesrT   c                 C  s@   t d| tjdkr"t||S tj||| jd}t|S d S )NzCPP+Bindings Kernel:
%sr@   r_   )	rV   rW   r   rB   r   Zload_pybindingZload_pybinding_asyncrP   r   )r>   re   rT   ra   r'   r'   r(   cpp_pybinding   s    
  zAsyncCompile.cpp_pybindingc                   s$   t d  fdd}| |S )NzCUDA Kernel:
%sc                     s   t  d S )Nr   )r   rX   r'   dst_file_extrT   r'   r(   rO      s    zAsyncCompile.cuda.<locals>.task)rV   rW   rP   )r>   rT   rh   rO   r'   rg   r(   rQ      s    zAsyncCompile.cudar!   )metarT   c                 C  sB   t d|| tjdkr$t||S tj||| jd}t|S d S )NzHalide Kernel:
%r
%sr@   r_   )	rV   rW   r   rB   r   Zgenerate_halideZgenerate_halide_asyncrP   r   )r>   ri   rT   ra   r'   r'   r(   halide   s    
  zAsyncCompile.halidezDict[str, Any])scoper.   c                 C  s   t dd | D }t|dtjdd}tjdkr| D ]F\}}tjr\t|ts\|	| t|t
tfr:| ||< |d q:t  d S )Nc                 S  s"   g | ]\}}t |ttfr|qS r'   )
isinstancer   r   ).0keyvaluer'   r'   r(   
<listcomp>   s   z%AsyncCompile.wait.<locals>.<listcomp>zInductor Compilationr   )totaldescdisabledelayr@   )lenitemsr    r   Zdisable_progressrB   Zverbose_progressrl   r   Zset_postfix_strr   r   resultupdater2   )r>   rk   Znum_kernelsZpbarrn   rw   r'   r'   r(   wait   s&    

zAsyncCompile.waitN)rQ   )__name__
__module____qualname__r?   staticmethod	functools	lru_cacher7   r;   classmethodrN   rP   rZ   r^   rd   rf   rQ   rj   ry   r'   r'   r'   r(   r:   w   s$   

r:   ZTORCH_TNT_IN_USE01ZTORCH_WARM_POOL)J
__future__r   r~   loggingrE   rF   rI   concurrent.futuresr   r   r   r   r   typingr   r	   r
   r   r   r   r   ZtorchZtorch._dynamo.device_interfacer   Ztorch._inductorr   Ztorch._inductor.codecacher   r   r   r   r   r   r   r   Z+torch._inductor.compile_worker.subproc_poolr   r   r   Z'torch._inductor.compile_worker.watchdogr   Z%torch._inductor.runtime.compile_tasksr   r   Z	torch.hubr   r    Ztorch._inductor.runtime.hintsr!   r0   r"   __annotations__Z_loggingZgetArtifactLoggerrz   rV   r)   r%   r/   r2   platformZ_IS_WINDOWS	getLoggerlogsetr4   r8   r6   register_at_forkAttributeErrorr:   environgetrN   r'   r'   r'   r(   <module>   sX    $(
	

 