U
    h                     @  s   d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ ddlmZ d dlmZ eded	  Zed
Z G dd dej!Z"ddddZ#G dd dZ$dd Z%i Z&dHddZ'G dd dee  Z(dd Z)dd Z*ddd d!dd"d"d d#d#d$d%d&d'd(d)d*d+d,d-d.d/d0Z+e,e+- D ]Z.e.e+e.< qhG d1d2 d2e(e  Z/ed
d3d4d5d6Z0eddddddd7d8d8d9d:d:d;d<d=d6Z0dIddddddd7d>d8d8d9d:d:d?d@dAd6Z0G dBdC dCZ1G dDdE dEZ2dFdG Z3dS )J    )annotationsdivisionN)defaultdict)cached_property)
CallableGenericIterableOptionalTypeVarUnionoverloadDictAnyTuple   )driver)
ModuleTypez.runtime.jitTc                      s   e Zd ZdZdd fddZedd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Z  ZS )DependenciesFindera  
    This AST visitor is used to find dependencies of a JITFunction. This can
    be used to invalidate a JITFunction's hash when its source code -- or
    that of its dependencies -- changes.

    This visitor also keeps track of the global variables touched by the
    JITFunction.  When we launch the kernel, we check that these have the same
    values as they did when we ran this visitor.  If not, we raise an error (or
    otherwise we could recompile).
    Nonereturnc              
     sR   t    || _t|d| _|| _dddddddd	d
dh
| _i | _	d| _
d S )Nutf-8floatgetattrint
isinstancelenlistmaxminprintrangeF)super__init__namehashlibsha256encodehasherglobalssupported_python_builtinsused_global_valsvisiting_arg_default_value)selfr%   r*   src	__class__ D/var/www/html/venv/lib/python3.8/site-packages/triton/runtime/jit.pyr$   $   s"    
zDependenciesFinder.__init__c                 C  s
   | j  S N)r)   	hexdigestr.   r2   r2   r3   retH   s    zDependenciesFinder.retc                 C  s   t |jtjkr|jS |j| jkr&d S | j|jd }|d k	r| jst |t	krt
|tst|dds|j| jkr|| jf| j|jt| jf< |S )NZ__triton_builtin__F)typectxastStoreidlocal_namesr*   getr-   r   r   JITFunctionr   r+   r,   )r.   nodevalr2   r2   r3   
visit_NameL   s$    




zDependenciesFinder.visit_Namec                   s    fdd|j D S )Nc                   s   g | ]}  |qS r2   )visit).0eltr6   r2   r3   
<listcomp>m   s     z2DependenciesFinder.visit_Tuple.<locals>.<listcomp>)eltsr.   r@   r2   r6   r3   visit_Tuplej   s    zDependenciesFinder.visit_Tuplec                 C  sN   |  |j}t|tjr&|  |j}q|d ks>t|ddtkrBd S t||jS )N__name__ )rC   valuer   r:   	Attributer   TRITON_MODULEattr)r.   r@   lhsr2   r2   r3   visit_Attributeo   s    z"DependenciesFinder.visit_Attributec                   s>   fdd}  j}|d ksD||sDt|tsDtd|j dt|ftj  j	fdd j
D D ]}t|ts~qn||rqn|j}j |j @ D ]\}|\}}j| \}	}|j| \}
}|	|
krtd| d|	 d	j d
|j d|
 dqj|j tt|dd}|| }j|d qnd S )Nc                   s&   t  jrdS t| dd}|tS )NT
__module__rK   )inspect	isbuiltinfuncr   
startswithrN   )rU   module)r@   r2   r3   is_triton_builtiny   s    z8DependenciesFinder.visit_Call.<locals>.is_triton_builtinz
Function "zv" is being called from a Triton function but is not a Triton function itself. Decorate it with @triton.jit to fix thisc                 3  s   | ]}  |jV  qd S r4   )rC   rL   )rD   kwr6   r2   r3   	<genexpr>   s     z0DependenciesFinder.visit_Call.<locals>.<genexpr>Global variable z has value z when compiling z, but inner kernel z has conflicting value z7 from when it was first compiled.  This is not allowed.noinlineFr   )rC   rU   r   r?   AssertionErrorrJ   	itertoolschainmapargskeywords	cache_keyr,   keysRuntimeErrorr%   updatestrr   r)   r(   )r.   r@   rX   rU   objZfunc_cache_keykvar_name_Zv1Zv2r\   keyr2   )r@   r.   r3   
visit_Callw   s<     
&zDependenciesFinder.visit_Callc                 C  s"   dd |j j D | _| | d S )Nc                 S  s   h | ]
}|j qS r2   arg)rD   ro   r2   r2   r3   	<setcomp>   s     z7DependenciesFinder.visit_FunctionDef.<locals>.<setcomp>)ra   r=   generic_visitrH   r2   r2   r3   visit_FunctionDef   s    z$DependenciesFinder.visit_FunctionDefc                   sn    fdd}t |j|j|jr&|jgng |jD ]} | q0||j |jd k	r` |j ||j	 d S )Nc                   s>   z0 j rtd _ | D ]}|d k	r | qW 5 d _ X d S )NFT)r-   r]   rC   )defaultsexprr6   r2   r3   visit_defaults   s    
z:DependenciesFinder.visit_arguments.<locals>.visit_defaults)
r^   r_   posonlyargsra   vararg
kwonlyargsrC   kw_defaultskwargrs   )r.   r@   ru   ro   r2   r6   r3   visit_arguments   s    
(

z"DependenciesFinder.visit_argumentsc                 C  s8   |  |}t|tr(|  jt|O  _n| j| d S r4   )rC   r   r   r=   setadd)r.   r@   targetr2   r2   r3   visitAssnTarget   s    

z"DependenciesFinder.visitAssnTargetc                 C  s4   t |jdkrtd| |jd  | | d S )N   z2Simultaneous multiple assignment is not supported.r   )r   targets	TypeErrorr   rq   rH   r2   r2   r3   visit_Assign   s    zDependenciesFinder.visit_Assignc                 C  s   |  |j | | d S r4   r   r~   rq   rH   r2   r2   r3   visit_AnnAssign   s    z"DependenciesFinder.visit_AnnAssignc                 C  s   |  |j | | d S r4   r   rH   r2   r2   r3   	visit_For   s    zDependenciesFinder.visit_For)rJ   rR   __qualname____doc__r$   propertyr7   rB   rI   rQ   rm   rr   r{   r   r   r   r   __classcell__r2   r2   r0   r3   r      s   $
- 	r   rg   r   c                 C  s&   t | tr| jS t | tr| S t| S r4   )r   r8   rJ   rg   repr)tyr2   r2   r3   _normalize_ty   s
    

r   c                   @  sv   e Zd ZdZddddddZedd	 Zed
d Zedd Zedd Z	edd Z
edd Zedd ZdS )KernelParamzBRepresents a parameter (name plus metadata) to a @jit'ed function.r   zinspect.Parameterbool)numparamdo_not_specializec                 C  s   || _ || _|| _d S r4   )r   _paramr   )r.   r   r   r   r2   r2   r3   r$      s    zKernelParam.__init__c                 C  s   | j jS r4   )r   r%   r6   r2   r2   r3   r%     s    zKernelParam.namec                 C  s(   | j jr| j jtjjkrdS t| j jS )NrK   )r   
annotationrS   	Parameteremptyr   r6   r2   r2   r3   r     s    zKernelParam.annotationc                 C  sZ   | j }dD ]>\}}|||t| d  }|r
||kr
| |   S q
|dkrVdS dS )N))Zuintu)r   ir   u1rK   )r   findr   )r.   r   Zty1Zty2widthr2   r2   r3   annotation_type  s    zKernelParam.annotation_typec                 C  s
   d| j kS )NZ	constexpr)r   r6   r2   r2   r3   is_constexpr  s    zKernelParam.is_constexprc                 C  s   d| j ko| j S )Nconst)r   r   r6   r2   r2   r3   is_const  s    zKernelParam.is_constc                 C  s   | j jS r4   )r   defaultr6   r2   r2   r3   r     s    zKernelParam.defaultc                 C  s   | j jtjjkS r4   )r   r   rS   r   r   r6   r2   r2   r3   has_default"  s    zKernelParam.has_defaultN)rJ   rR   r   r   r$   r   r%   r   r   r   r   r   r   r   r2   r2   r2   r3   r      s    






r   c                 C  sH   t | dr|  d dkrdS t| trD| d dkr8dS | dkrDdS dS )Ndata_ptr   r   Dr   1N)hasattrr   r   r   )vr2   r2   r3   compute_spec_key'  s    
r   Fc                 C  s   | d krdS t | trdS t | trRd| kr8| dkr8dS d| krL| dkrLdS d	S nbt | tr`d
S | j|f}t|d }|d kr|d rdndtt|d 	dd   }|t|< |S d S )Nnonei1   i32                u64i64fp32r   *k*r   .)
r   r   r   r   dtype	dtype2strr>   type_canonicalisation_dictrg   split)ro   r   Zdskresr2   r2   r3   mangle_type7  s$    



*r   c                   @  s$   e Zd ZU ded< ddddZdS )KernelInterfacer   runr   c                   s    fddS )z
        A JIT function is launched with: fn[grid](*args, **kwargs).
        Hence JITFunction.__getitem__ returns a callable proxy that
        memorizes the grid.
        c                    s   j |  dd|S )NFgridwarmup)r   )ra   kwargsr   r.   r2   r3   <lambda>Y      z-KernelInterface.__getitem__.<locals>.<lambda>r2   )r.   r   r2   r   r3   __getitem__S  s    zKernelInterface.__getitem__N)rJ   rR   r   __annotations__r   r2   r2   r2   r3   r   P  s   
r   c           	      C  s@   dd |  D }dd l}| ||| |j|d}||}|S )Nc                 S  s*   i | ]"\}}||j jd kr"t|n|qS r   )r1   rJ   rg   rD   rl   rL   r2   r2   r3   
<dictcomp>^  s      z1serialize_specialization_data.<locals>.<dictcomp>r   )r%   	signature	constantsattrsoptionsrl   )itemsjsonto_dict__dict__dumps)	r%   r   r   r   r   rl   r   rh   Zserialized_objr2   r2   r3   serialize_specialization_data]  s        
r   c                 C  s  t | jt |kstg }g }g }g }g }g }t| j |D ]\\}}	}
|	jtjjkrz|	| |	d| d|  n*|	| d|  |	d| d|  |
j
r|	| q>|	| |
js|	d|  |
jr|	d|
j  q>|	d||
jrdndf  q>d	d
d || D }d	dd |D }d	dd |D }|	d d|}d|}d|||||f }dd | j D }t|d< t|d< t|| |d S )a2  
    Equivalent to sig.bind followed by apply_defaults. This generates a
    native Python function (using exec) which can be memoized on a per-kernel
    basis to avoid having to run these expensive functions -- which constitute
    much of the kernel launch overhead -- every time we run the kernel.
    'z': z	=default_zcompute_spec_key(%s)z"%s"zmangle_type(%s, %s)TrueFalserK   c                 S  s   g | ]}|d  qS , r2   rD   xr2   r2   r3   rF     s     z2create_function_from_signature.<locals>.<listcomp>c                 S  s   g | ]}|d  qS r   r2   r   r2   r2   r3   rF     s     c                 S  s   g | ]}|d  qS r   r2   r   r2   r2   r3   rF     s     z**excess_kwargsr   zFdef dynamic_func(%s):
    return {%s}, (%s), (%s), (%s), excess_kwargsc                 S  s,   i | ]$\}}|j tjjk	rd | |j qS )Zdefault_)r   rS   r   r   )rD   r%   r   r2   r2   r3   r     s    z2create_function_from_signature.<locals>.<dictcomp>r   r   Zdynamic_func)r   
parametersr]   zipr   r   rS   r   r   appendr   r   r   r   joinr   r   exec)sigZkparamsZ	func_argsZdict_entriesconstexpr_valsnon_constexpr_valsZsignature_typesZspecialisationsr%   spkprc   args_strZdict_strZ	func_bodyZfunc_namespacer2   r2   r3   create_function_from_signatureh  sR    




    
r   r   Zfp8e4nvZfp8e5Zfp8e4b15Zfp8e4b8Zfp8e5b16Zfp16Zbf16r   Zfp64i8Zi16r   r   u8u16u32r   )r   Z
float8e4nvZfloat8e5Zfloat8e4b15Zfloat8_e4m3fnZ
float8e4b8Zfloat8_e4m3fnuzZfloat8_e5m2Zfloat8e5b16Zfloat8_e5m2fnuzZfloat16Zbfloat16Zfloat32Zfloat64Zint8Zint16Zint32Zint64Zuint8Zuint16Zuint32Zuint64c                      s   e Zd ZdZdZedd Zedd Zdd Zed&d
dZ	dd Z
dd Zdd Zdd Zdd Zd'ddZedd Zdd Zdd Zdd Zd d! Z fd"d#Zd$d% Z  ZS )(r?   Nr   c                 C  s   t | dr| jS t| trdS t| trVd| kr<| dkr<dS d| krP| dkrPdS d	S n2t| trdd
S | d krpd S tdt|  d|  d S )Nr   r   r   r   r   r   r   r   r   r   zUnsupported type z for )r   r   r   r   r   r   r   r8   rn   r2   r2   r3   _key_of  s    



zJITFunction._key_ofc                 C  sD   t | dr|  tj dkS t| tr:| d dk| dkfS | d kfS )Nr   r   r   r   r   r   r?   divisibilityr   r   rn   r2   r2   r3   _spec_of  s
    

zJITFunction._spec_ofc                   sV   ddl m} dd   fddt| j|D }dd t| j|D }|t|t|S )Nr   )AttrsDescriptorc                 S  sD   t | dr|  tj dkS t| tr4| tj dkS | d kr@dS dS )Nr   r   TFr   )r   r2   r2   r3   is_divisible_by_16  s    

z3JITFunction._get_config.<locals>.is_divisible_by_16c                   s$   h | ]\}} |r|j s|jqS r2   )r   r   rD   r   ro   r   r2   r3   rp     s    z*JITFunction._get_config.<locals>.<setcomp>c                 S  s8   h | ]0\}}t |trt |ts|d kr|js|jqS )r   )r   r   r   r   r   r   r2   r2   r3   rp     s   
 
  )compilerr   r   paramstuple)r.   ra   r   Zdivisible_by_16
equal_to_1r2   r   r3   _get_config  s    	


zJITFunction._get_configFc                 C  sH   | d krdS t | tr| S t| dd }t| }|r<dnd}|| S )N*i8r   r   r   r   )r   rg   r   r   )rl   r   Z	dtype_strZ	const_strr2   r2   r3   _type_of  s    
zJITFunction._type_ofc                 C  s   t t| j|}|S r4   )dictr   
constexprs)r.   Zconstexpr_keyr   r2   r2   r3   _make_constants  s    zJITFunction._make_constantsc                 C  s   t jd krdS | jj}| jj}ddd t| j|d D }	| d|j d|j	 d|j
 d	|j d
|	 d}
G dd d}t||||d ||}||||j|j	|j
|j|j||d
}t j||
|||| d|i|dddS )NFr   c                 S  s    g | ]\}}|j  d | qS )z: r%   )rD   r   r   r2   r2   r3   rF   "  s     z*JITFunction._call_hook.<locals>.<listcomp>r   z[num_warps=z, num_ctas=z, num_stages=z, enable_fp_fusion=]()c                   @  s   e Zd Zdd ZdS )z/JITFunction._call_hook.<locals>.JitFunctionInfoc                 S  s   || _ || _|| _d S r4   )rW   r%   jit_function)r.   rW   r%   r  r2   r2   r3   r$   '  s    z8JITFunction._call_hook.<locals>.JitFunctionInfo.__init__N)rJ   rR   r   r$   r2   r2   r2   r3   JitFunctionInfo%  s   r  r   )
r   devicer   	num_warpsnum_ctas
num_stagesenable_fp_fusionextern_libsconfigsspecialization_datarl   )rl   r   fncompileZis_manual_warmupZalready_compiled)r?   
cache_hookr  rJ   rR   r   r   r   r  r  r  r  r   r	  )r.   rl   r   r  r   r   r
  r%   rW   Z	arg_reprsr   r  r  r   r2   r2   r3   
_call_hook  s6    	
 0

zJITFunction._call_hookc                 C  s   t |st| j| dS )z
        Add a hook that will be executed prior to the execution of run
        function with args and kwargs passed into the kernel
        N)callabler]   pre_run_hooksr   )r.   hookr2   r2   r3   add_pre_run_hookE  s    zJITFunction.add_pre_run_hookc                 C  s   ddl m}m}m}m} || _|| _|| _|| _t| j| j| _dd t	| jD | _
dd t	| jD | _dd t	| jD | _dS )z1
        Precompute as much as possible.
        r   )CompiledKernelr  	ASTSourcemake_backendc                 S  s   g | ]\}}|j r|qS r2   r   rD   r   pr2   r2   r3   rF   W  s      z-JITFunction.create_binder.<locals>.<listcomp>c                 S  s   g | ]\}}|j s|qS r2   r  r  r2   r2   r3   rF   X  s      c                 S  s    g | ]\}}|j s|js|qS r2   )r   r   r  r2   r2   r3   rF   Y  s      N)r   r  r  r  r  r   r   r   binder	enumerateZconstexpr_indicesnon_constexpr_indicesZspecialised_indices)r.   r  r  r  r  r2   r2   r3   create_binderM  s    zJITFunction.create_binderc          &   
     s  t j }t j|}j|d< jD ]}||| q&jd krH  j||\}}	}
}}d|	t	|
|f }j
| |d }|d krt j }|}||}d|kstdd|kstdd|kstd|D ]}||jkrtd	| qt| }fd
djD }|	d t| }dd t||D }j| f  fddt|jD }| D ]$\}}t|rptd| dqp||||| rd S || d }j|||jd}|j
| |< t }j  D ]B\\}}\}}||| } |krt!d| d| d|  q|s|d k	sLtt|r^||}t|}!|d }"|!dkr|d nd}#|!dkr|d nd}$|j"||f| }%|j#|"|#|$||j$|j%|%j&j'j&j(f	|  |S )NdebugrK   Zdevice_typez=device_type option is deprecated; current target will be usedr  z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be usedz2Keyword argument %s was specified but unrecognisedc                   s   g | ]} j | jqS r2   )r   r%   )rD   r   r6   r2   r3   rF     s     z#JITFunction.run.<locals>.<listcomp>c                 S  s"   i | ]\}}||d krdn|qS )r   r   r2   )rD   ri   r   r2   r2   r3   r     s      z#JITFunction.run.<locals>.<dictcomp>c                   s6   i | ].\}}|j s*|j d  jks*|dkr|j|qS )r   N)r   r   r   r%   )rD   r   r  )r
  r2   r3   r     s      zCallable constexpr at index z is not supportedr   )r~   r   r[   z1 has changed since we compiled this kernel, from z to r   r   ))r   activeget_current_deviceZget_current_streamr  r  r  r  r   rg   cacher>   Zget_current_targetr  Zparse_optionsr]   r   KeyErrorr   valuesr  r   r   r   r   r   r  r   r  r  r  objectr,   re   launch_metadatar   functionZpacked_metadatar  Zlaunch_enter_hookZlaunch_exit_hook)&r.   r   r   ra   r   r  r  r  Z
bound_argsZsig_and_specr   r   Zexcess_kwargsrl   kernelr~   backendr   ri   Z
bound_valsZsigkeysZsigvalsr   r   r   ro   r/   Znot_presentr%   Zglobals_dict_idrA   Zglobals_dictZnewValZ	grid_sizeZgrid_0Zgrid_1Zgrid_2r&  r2   )r
  r.   r3   r   ]  s|    












 zJITFunction.runc                   sj  |r|ng } | _  j| _|| _t | _|| _t d | _ fdd| _	|| _
d | _g | _t| jj D ]2\}}	|o||kp|	j|k}
| jt||	|
 qttt | _| jtd| jtj d  | _tt| _d | _i | _d | _ t!j"#dddkrdn|| _$|| _%d	d
 | jD | _&dd
 | jD | _'g | _( j)| _) j*| _* j+| _+ j| _d S )Nr   c                   s   d kr j S | S r4   )rJ   )rk   r  r   r2   r3   r     r   z&JITFunction.__init__.<locals>.<lambda>z^def\s+\w+\s*\(ZTRITON_DEBUG0r   Tc                 S  s   g | ]
}|j qS r2   r   rD   r  r2   r2   r3   rF     s     z(JITFunction.__init__.<locals>.<listcomp>c                 S  s   g | ]}|j r|jqS r2   )r   r   r,  r2   r2   r3   rF     s      ),r  rR   rW   versionrS   r   r   getsourcelinesstarting_line_numberr   r&  r  r   r  r   r$  r%   r   r   textwrapdedent	getsourcer/   research	MULTILINEstartr   r   r"  hashr,   r(  osenvironr>   r  r\   	arg_namesr   r  r   rJ   __globals__)r.   r  r-  r   r  r\   r   r&  r   r   Zdnsr2   r*  r3   r$     s:    "
zJITFunction.__init__c                 C  sX   | j d krRt| j| j| jd}||   |jt| j	 | _ t
t|j | _| j S )N)r%   r*   r/   )r7  r   rJ   r;  r/   rC   parser7   rg   r/  r   sortedr,   r   )r.   Zdependencies_finderr2   r2   r3   rc     s    
zJITFunction.cache_keyc                O  s   | j ttj||dd|S )NTr   )r   r`   
MockTensor
wrap_dtype)r.   r   ra   r   r2   r2   r3   r     s    zJITFunction.warmupc                   s   ddl m}m}m} dd l}dd lm  tj	 }|
|}|d | jjkrhtd|d  d| jj  fdd|d	  D }t|d
  }	|| |	|||d }
dd |d  D }|d }||
d |}|| j| |< |S )Nr   )r   r  r  r   r%   zSpecialization data is for z but trying to preload for c                   s,   i | ]$\}}| j |r$  |n|qS r2   )r   Zis_dtyper   tlr2   r3   r     s    z'JITFunction.preload.<locals>.<dictcomp>r   r   r   c                 S  s(   i | ] \}}|t |tr t|n|qS r2   )r   r   r   r   r2   r2   r3   r     s    r   rl   )r   r   r  r  r   Ztriton.languagelanguager   r   r!  loadsr  rJ   re   r   r   	from_dictr"  )r.   r  r   r  r  r   r  Zdeserialized_objr   r   r/   r   rl   r(  r2   r@  r3   preload  s*    




zJITFunction.preloadc                 C  sH   t | j}t|t jstt|jdks.tt|jd t jsDt|S )Nr   r   )	r:   r<  r/   r   Moduler]   r   bodyFunctionDef)r.   treer2   r2   r3   r<    s
    zJITFunction.parsec                 O  s   t dd S )Nz:Cannot call @triton.jit'd outside of the scope of a kernel)re   )r.   ra   r   r2   r2   r3   __call__   s    zJITFunction.__call__c                   s$   t t| || |dkr d | _d S )Nr/   )r#   r?   __setattr__r7  )r.   r%   rL   r0   r2   r3   rK  #  s    zJITFunction.__setattr__c                 C  s   d| j  d| jj dS )NzJITFunction(:r  )rW   r  rJ   r6   r2   r2   r3   __repr__*  s    zJITFunction.__repr__)F)NNNNNN)rJ   rR   r   r  r   staticmethodr   r   r   r   r   r  r  r  r   r$   r   rc   r   rE  r<  rJ  rK  rM  r   r2   r2   r0   r3   r?     s2   

1Z  
:
	r?   JITFunction[T]r  r   c                 C  s   d S r4   r2   )r  r2   r2   r3   jit3  s    rQ  r-  r   r&  r   r  r\   zOptional[Callable]zOptional[Iterable[int]]zOptional[bool]zCallable[[T], JITFunction[T]])r   r&  r   r  r\   r   c                 C  s   d S r4   r2   rR  r2   r2   r3   rQ  8  s    
zOptional[T]z4Union[JITFunction[T], Callable[[T], JITFunction[T]]])r  r   r&  r   r  r\   r   c                  s6   ddd fdd}| dk	r.|| S |S dS )a<  
    Decorator for JIT-compiling a function using the Triton compiler.

    :note: When a jit'd function is called, arguments are
        implicitly converted to pointers if they have a :code:`.data_ptr()` method
        and a `.dtype` attribute.

    :note: This function will be compiled and run on the GPU. It will only have access to:

           * python primitives,
           * builtins within the triton package,
           * arguments to this function,
           * other jit'd functions

    :param fn: the function to be jit-compiled
    :type fn: Callable
    r   rO  rP  c              	     sJ   t | sttdddkr0ddlm} || S t|  dS d S )NZTRITON_INTERPRETr+  r   r   )InterpretedFunction)r-  r   r  r\   r   r&  )r  r]   r8  getenvinterpreterrS  r?   )r  rS  r  r   r&  r\   r   r-  r2   r3   	decoratora  s    zjit.<locals>.decoratorNr2   )r  r-  r   r&  r   r  r\   rW  r2   rV  r3   rQ  E  s    c                   @  s0   e Zd ZdZedd Zdd Zedd ZdS )	r>  zr
    Can be used in place of real tensors when calling:
        kernel.warmup(MockTensor(torch.float32), ...)
    c                 C  s"   | j jdkr| jdkrt| S | S )Nr   Ztorch)r1   rJ   rR   r>  rn   r2   r2   r3   r?    s    zMockTensor.wrap_dtypec                 C  s
   || _ d S r4   r   )r.   r   r2   r2   r3   r$     s    zMockTensor.__init__c                   C  s   dS )Nr   r2   r2   r2   r2   r3   r     s    zMockTensor.data_ptrN)rJ   rR   r   r   rN  r?  r$   r   r2   r2   r2   r3   r>  }  s   
r>  c                   @  sR   e Zd Zdd Zdd Zdd Zddd	d
Zdd Zdd Zdd Z	dd Z
dS )TensorWrapperc                 C  s*   || _ || _|j| _|j| _| jj| _d S r4   )r   basedatar  shape)r.   rY  r   r2   r2   r3   r$     s
    zTensorWrapper.__init__c                 C  s
   | j  S r4   )rY  r   r6   r2   r2   r3   r     s    zTensorWrapper.data_ptrc                 C  s   | j |S r4   )rY  stride)r.   r   r2   r2   r3   r\    s    zTensorWrapper.striderg   r   c                 C  s   d| j  d| j dS )NzTensorWrapper[r   r  )r   rY  r6   r2   r2   r3   __str__  s    zTensorWrapper.__str__c                 C  s
   | j  S r4   )rY  element_sizer6   r2   r2   r3   r^    s    zTensorWrapper.element_sizec                 C  s   t | j | jS r4   )rX  rY  cpur   r6   r2   r2   r3   r_    s    zTensorWrapper.cpuc                 C  s   | j |j  d S r4   )rY  copy_)r.   otherr2   r2   r3   r`    s    zTensorWrapper.copy_c                 C  s   t | j|| jS r4   )rX  rY  tor   )r.   r  r2   r2   r3   rb    s    zTensorWrapper.toN)rJ   rR   r   r$   r   r\  r]  r^  r_  r`  rb  r2   r2   r2   r3   rX    s   rX  c                 C  sV   t | tr*|| jjkr| jS t| j|S n(t| dr>t| |S tdt|  dd S )Nr   zCannot reinterpret a r   )r   rX  rY  r   r   r   r8   )Ztensorr   r2   r2   r3   reinterpret  s    


rc  )F)N)4
__future__r   r   r:   r&   rS   r^   r8  r3  r0  collectionsr   	functoolsr   typingr   r   r   r	   r
   r   r   r   r   r   Zruntime.driverr   typesr   rJ   r   rN   r   NodeVisitorr   r   r   r   r   r   r   r   r   r   r   r$  r   r?   rQ  r>  rX  rc  r2   r2   r2   r3   <module>   s   0 Z.
B  o 8