U
    h                     @   s  d dl Z d dlmZ d dlZd dlZd dlZd dlmZ	 d dl
mZ ddlmZ d dlmZ ddlmZ dd	lmZ G d
d dZG dd dZeddG dd dZdd Zdd Zdd Zdd Zdd ZejeejgdZejeej gdZ!ejeej"gdZ#G dd dZ$G dd  d Z%d!d" Z&d#d$ Z'd%d& Z(G d'd( d(Z)G d)d* d*e)Z*G d+d, d,e)Z+d-d. Z,d/d0 Z-d1d2 Z.d3d4 Z/e% Z0d5d6d7d8d9d:gZ1G d;d< d<Z2G d=d> d>Z3dS )?    N)Tuple)	dataclass   )InterpreterError)partial   )interpreter)irc                   @   s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )TensorHandlec                 C   s   || _ || _i | _dS )a  
            data: numpy array
            dtype: triton type, either pointer_type or scalar_type.
            we don't store block_type here because the shape information is already availale in the data field
            attr: a dictionary of attributes
        N)datadtypeattr)selfr   r    r   L/var/www/html/venv/lib/python3.8/site-packages/triton/runtime/interpreter.py__init__   s    zTensorHandle.__init__c                 C   s   t | j S N)boolr   allr   r   r   r   __bool__   s    zTensorHandle.__bool__c                 C   s   | j }t|dr|j}q|S )N
element_ty)r   hasattrr   )r   r   r   r   r   get_element_ty    s    
zTensorHandle.get_element_tyc                 C   s   t | j | jS r   )r
   r   copyr   r   r   r   r   clone&   s    zTensorHandle.clonec                 C   s   || j |< d S r   )r   )r   keyvaluer   r   r   set_attr)   s    zTensorHandle.set_attrN)__name__
__module____qualname__r   r   r   r   r   r   r   r   r   r
      s
   r
   c                   @   s   e Zd Zdd Zdd ZdS )BlockPointerHandlec                 C   s(   || _ || _|| _|| _|| _|| _d S r   )baseshapestridesoffsetstensor_shapeorder)r   r#   r$   r%   r&   r'   r(   r   r   r   r   /   s    zBlockPointerHandle.__init__c           
      C   s   | j  }|jd }| j}t| j j| j}tj| jtd}t	t
|D ]~}dgt
| }|| ||< | j| jt||  |}	|||	 | j| j tj }||krHt||	| j| jk }qHt|| j jj}||fS )N   r   r   )r#   r   primitive_bitwidthr'   npbroadcast_tor   Zonesr   rangelenr&   arangereshaper%   astypeuint64logical_andr$   r
   r   scalar)
r   boundary_checkdtype_ttZn_bytesr'   ptrsmasksdimZ
bcast_dimsoffr   r   r   materialize_pointers7   s    

  z'BlockPointerHandle.materialize_pointersN)r   r    r!   r   r<   r   r   r   r   r"   -   s   r"   T)frozenc                   @   sr   e Zd ZU dZeed< dZeed< dZe	ed< dZ
eed< dZeed< d	Ze	ed
< dZee	 ed< dZeed< dS )InterpreterOptionsNextern_libsFdebugarchTallow_fp8e4nvallow_fp8e4b15tf32default_dot_input_precision)rD   Ztf32x3Zieeeallowed_dot_input_precisionsr   max_num_imprecise_acc_default)r   r    r!   r?   dict__annotations__r@   r   rA   strrB   rC   rE   rF   r   rG   intr   r   r   r   r>   H   s   
r>   c                 C   sD   | t jkrt jS | t jkr t jS | t jkr0t jS | t jkr@t jS | S r   )	r,   uint8int8uint16int16uint32int32r3   int64r*   r   r   r   _get_signed_np_dtypeT   s    



rS   c              &   C   sR  t | tjrttjS tjtttjttjtj	ttj	tj
ttj
tjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttji}t | tjrJt | jtjr@ttjS || j S ||  S r   )
isinstancetlpointer_typer,   r   r3   int1r   float16float32float64rM   rL   rO   rN   rQ   rP   rR   bfloat16float8e5float8e5b16
float8e4nv
float8e4b8float8e4b15
block_typer   )Ztt_dtypeZnp_typesr   r   r   _get_np_dtype`   sX      
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

rb   c                 C   s  t td|j }t td|j }tj|  |d}||jd ? d@ }|j|j d }|j|j d }	|d|j> d @ }
|j}|j}||j? d|> d @ tj}|dk}t	|r@tj
|tjd}t|jD ]"}|
|? d@ }|j| ||dk< q|
dk}d||  ||< || |||@ < |
| || > d|j> d @ |
|< tdt|| | d|	> d }||}||}|j|jkr|
|j|j ? d|j> d @ }|tjjkr|
d|j|j d > @ }||dk }||}n$|
||j|j > d|j> d @ }|dk}t	|r||j? d|> d @ tj}|dk}||@ }tj
|tjd}d| || |  ||< || || ? d|j||  > B ||< ||jd > ||j> B |B }|| jS )Nuintr*   r   r   )getattrr,   r+   Z
frombuffertobytesZfp_mantissa_widthZexponent_biasr2   rQ   any
zeros_liker.   maximumminimum_irZROUNDING_MODEZRTNEr1   r$   )inputZinput_dtypeZoutput_dtyperounding_modeZinput_uint_dtypeZoutput_unint_dtypeZ	input_binsignZinput_exponent_widthZoutput_exponent_widthZsignificandZ
bias_inputZbias_outputexponentZsubnormal_indexZbit_posiZ	bit_indexZzero_significand_indexZexponent_outputZsign_outputZsignificand_outputcut_offZnon_zero_exponent_indexshiftoutputr   r   r   _convert_float   sl    $


rs   c                 C   s
   t | S r   )matherfxr   r   r   _erf   s    rx   c                 C   s   t | t | d? S )N@   )rK   )abr   r   r   
_umulhi_64   s    r|   )Zotypesc                   @   s   e Zd Zedd ZdS )ExtraFunctionsc                 C   s   t || j|||S r   )rU   tensorcreate_fp_to_fphandle)rk   dst_tyZfp_downcast_rounding_builderr   r   r   _convert_custom_types   s    z$ExtraFunctions._convert_custom_typesN)r   r    r!   staticmethodr   r   r   r   r   r}      s   r}   c                   @   sb  e Zd Zejjejjejjejjejjejjejj	ejj	iZ
ejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejji
ZddddZdd Zdd Zd	d
 Zdd Zdd Zdd Zdd Zdd Z dd Z!dd Z"dd Z#dd Z$dd Z%dd  Z&d!d" Z'd#d$ Z(d%d& Z)d'd( Z*d)d* Z+d+d, Z,d-d. Z-d/d0 Z.d1d2 Z/d3d4 Z0d5d6 Z1d7d8 Z2d9d: Z3d;d< Z4d=d> Z5d?d@ Z6dAdB Z7dCdD Z8dEdF Z9dGdH Z:dIdJ Z;dKdL Z<dMdN Z=dOdP Z>dQdR Z?dSdT Z@dUdV ZAdWdX ZBdYdX ZCdZdX ZDd[dX ZEd\dX ZFd]dX ZGd^dX ZHd_d` ZIdadb ZJdcdd ZKdedX ZLdfdX ZMdgdX ZNdhdX ZOdidX ZPdjdX ZQdkdX ZRdldX ZSdmdX ZTdndX ZUdodX ZVdpdX ZWdqdX ZXdrdX ZYdsdX ZZdtdX Z[dudX Z\dvdX Z]dwdX Z^dxdX Z_dydX Z`dzdX Zad{dX Zbd|dX Zcd}dX Zdd~dX ZeddX ZfddX ZgddX ZhddX ZiddX ZjddX ZkddX ZlddX ZmddX ZnddX ZoddX ZpddX ZqddX ZrddX ZsddX ZtddX ZuddX ZvddX ZwddX ZxddX ZyddX ZzddX Z{dd Z|dd Z}dd Z~dd ZddX ZddX Zdd Zdd Zdd ZddX ZddX ZddX ZddX ZddX ZddX ZddX ZddX ZddX ZddX ZddX Zdd Zdd ZddX Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZddĄ ZddƄ ZddȄ Zddʄ Zdd̄ Zdd΄ ZddЄ Zdd҄ ZddԄ Zddք Zdd؄ Zddڄ Zdd܄ Zddބ Zdd Zdd Zdd Zdd ZdS )InterpreterBuilderNreturnc                 C   s$   d | _ t | _i | _tj| jd< d S )NZconvert_custom_types)rA   r>   optionsZcodegen_fnsr}   r   r   r   r   r   r      s    zInterpreterBuilder.__init__c                 C   sR   || j d k std|| j d k s,td|| j d k sBtd|||f| _d S )Nr   zx >= grid_dim[0]r   zy >= grid_dim[1]r   zz >= grid_dim[2])grid_dim
ValueErrorgrid_idxr   rw   yzr   r   r   set_grid_idx   s    zInterpreterBuilder.set_grid_idxc                 C   s   |||f| _ d S r   )r   )r   nxnyZnzr   r   r   set_grid_dim   s    zInterpreterBuilder.set_grid_dimc                 C   s   t jS r   )rU   rX   r   r   r   r   get_half_ty   s    zInterpreterBuilder.get_half_tyc                 C   s   t jS r   )rU   r[   r   r   r   r   get_bf16_ty  s    zInterpreterBuilder.get_bf16_tyc                 C   s   t jS r   )rU   rY   r   r   r   r   get_float_ty  s    zInterpreterBuilder.get_float_tyc                 C   s   t jS r   )rU   rZ   r   r   r   r   get_double_ty	  s    z InterpreterBuilder.get_double_tyc                 C   s   t jS r   )rU   rM   r   r   r   r   get_int8_ty  s    zInterpreterBuilder.get_int8_tyc                 C   s   t jS r   )rU   rL   r   r   r   r   get_uint8_ty  s    zInterpreterBuilder.get_uint8_tyc                 C   s   t jS r   )rU   rO   r   r   r   r   get_int16_ty  s    zInterpreterBuilder.get_int16_tyc                 C   s   t jS r   )rU   rN   r   r   r   r   get_uint16_ty  s    z InterpreterBuilder.get_uint16_tyc                 C   s   t jS r   )rU   rQ   r   r   r   r   get_int32_ty  s    zInterpreterBuilder.get_int32_tyc                 C   s   t jS r   )rU   rP   r   r   r   r   get_uint32_ty  s    z InterpreterBuilder.get_uint32_tyc                 C   s   t jS r   )rU   rR   r   r   r   r   get_int64_ty  s    zInterpreterBuilder.get_int64_tyc                 C   s   t jS r   )rU   r3   r   r   r   r   get_uint64_ty!  s    z InterpreterBuilder.get_uint64_tyc                 C   s   t jS r   )rU   r^   r   r   r   r   get_fp8e4nv_ty$  s    z!InterpreterBuilder.get_fp8e4nv_tyc                 C   s   t jS r   )rU   r`   r   r   r   r   get_fp8e4b15_ty'  s    z"InterpreterBuilder.get_fp8e4b15_tyc                 C   s   t jS r   )rU   r_   r   r   r   r   get_fp8e4b8_ty*  s    z!InterpreterBuilder.get_fp8e4b8_tyc                 C   s   t jS r   )rU   r\   r   r   r   r   get_fp8e5_ty-  s    zInterpreterBuilder.get_fp8e5_tyc                 C   s   t jS r   )rU   r]   r   r   r   r   get_fp8e5b16_ty0  s    z"InterpreterBuilder.get_fp8e5b16_tyc                 C   s   t ||S r   )rU   rV   )r   Zelt_tyZ
addr_spacer   r   r   
get_ptr_ty3  s    zInterpreterBuilder.get_ptr_tyc                 C   s   t ||S r   )rU   ra   )r   r   r$   r   r   r   get_block_ty6  s    zInterpreterBuilder.get_block_tyc                 C   s   t tj|gtjdtjS Nr*   )r
   r,   arrayZbool_rU   rW   r   r   r   r   r   get_int19  s    zInterpreterBuilder.get_int1c                 C   s   t tj|gtjdtjS r   )r
   r,   r   rL   rU   r   r   r   r   	get_uint8<  s    zInterpreterBuilder.get_uint8c                 C   s   t tj|gtjdtjS r   )r
   r,   r   rM   rU   r   r   r   r   get_int8?  s    zInterpreterBuilder.get_int8c                 C   s   t tj|gtjdtjS r   )r
   r,   r   rN   rU   r   r   r   r   
get_uint16B  s    zInterpreterBuilder.get_uint16c                 C   s   t tj|gtjdtjS r   )r
   r,   r   rO   rU   r   r   r   r   	get_int16E  s    zInterpreterBuilder.get_int16c                 C   s   t tj|gtjdtjS r   )r
   r,   r   rP   rU   r   r   r   r   
get_uint32H  s    zInterpreterBuilder.get_uint32c                 C   s   t tj|gtjdtjS r   )r
   r,   r   rQ   rU   r   r   r   r   	get_int32K  s    zInterpreterBuilder.get_int32c                 C   s   t tj|gtjdtjS r   )r
   r,   r   r3   rU   r   r   r   r   
get_uint64N  s    zInterpreterBuilder.get_uint64c                 C   s   t tj|gtjdtjS r   )r
   r,   r   rR   rU   r   r   r   r   	get_int64Q  s    zInterpreterBuilder.get_int64c                 C   s   t tj|gtjdtjS r   )r
   r,   r   rX   rU   r   r   r   r   get_fp16T  s    zInterpreterBuilder.get_fp16c                 C   s   t tj|gtjdtjS r   )r
   r,   r   rY   rU   r   r   r   r   get_fp32W  s    zInterpreterBuilder.get_fp32c                 C   s   t tj|gtjdtjS r   )r
   r,   r   rZ   rU   r   r   r   r   get_fp64Z  s    zInterpreterBuilder.get_fp64c                 C   s   t tjdgt|d|S Nr   r*   )r
   r,   r   rb   )r   typer   r   r   get_null_value]  s    z!InterpreterBuilder.get_null_valuec                 C   s2   | j d krtdttj| j | gtjdtjS )Nzgrid_idx is Noner*   )r   r   r
   r,   r   rQ   rU   r   axisr   r   r   create_get_program_ida  s    
z(InterpreterBuilder.create_get_program_idc                 C   s    t tj| j| gtjdtjS r   )r
   r,   r   r   rQ   rU   r   r   r   r   create_get_num_programsf  s    z*InterpreterBuilder.create_get_num_programsc                 C   s0   t tj|jtdtj}d }| ||||||S r   )r
   r,   	ones_liker   r   rU   rW   create_masked_load)r   ptr_0_1is_volatilemaskotherr   r   r   create_loadj  s    zInterpreterBuilder.create_loadc                 C   s*   t tj|jtdtj}| |||d d S r   )r
   r,   r   r   r   rU   rW   create_masked_store)r   r   valr   r   r   r   r   r   create_storeo  s    zInterpreterBuilder.create_storec           
      C   sN   |  }t|}|d kr.ttj|j|d|}t|j|j|j|}	t|	|S r   )r   rb   r
   r,   rg   r   _interpreterload)
r   r8   r   r   cache_modifiereviction_policyr   r7   dtype_npretr   r   r   r   s  s    z%InterpreterBuilder.create_masked_loadc                 C   s   t |j|j|jS r   )r   storer   )r   r8   r   r   r   r   r   r   r   r   {  s    z&InterpreterBuilder.create_masked_storec                 C   sx   |j j}|j}|tjkr"|tjks6|tjkr\|tjkr\t|j||d t|}t	||jS t	|j
t||jS d S r   )r   r5   rU   r[   rY   rs   r   viewrb   r
   r2   )r   srcdst_typesrc_element_typedst_element_typer   r   r   r   	cast_impl  s    zInterpreterBuilder.cast_implc                 C   s   |  ||S r   r   r   r   r   r   r   r   <lambda>      zInterpreterBuilder.<lambda>c                 C   s   |  ||S r   r   r   r   r   r   r     r   c                 C   s   |  ||S r   r   r   r   r   r   r     r   c                 C   s   |  ||S r   r   r   r   r   r   r     r   c                 C   s   |  ||S r   r   r   r   r   r   r     r   c                 C   s   |  ||S r   r   r   r   r   r   r     r   c                 C   s   |  ||S r   r   )r   r   r   	is_signedr   r   r   r     r   c                 C   s4   |j j}|j}t|j|||t|}t||jS r   )r   r5   rs   r   r   rb   r
   )r   r   r   rl   r   r   r   r   r   r   r     s    z"InterpreterBuilder.create_fp_to_fpc                 C   s   t |jt||jS r   )r
   r   r   rb   r5   r   r   r   r   create_bitcast  s    z!InterpreterBuilder.create_bitcastc                 C   s   t ||j|j|jjS r   r
   r   r   r5   )r   lhsrhsopr   r   r   	binary_op  s    zInterpreterBuilder.binary_opc                 C   s   |  ||tjS r   r   r,   addr   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r,   multiplyr   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r,   divider   r   r   r   r     r   c                 C   s   |  ||tjS r   )r   r,   	remainderr   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r,   subtractr   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||S r   create_idivr   r   r   r   r     r   c                 C   s   |  ||S r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r,   fmodr   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   )r   r,   Z
left_shiftr   r   r   r   r     r   c                 C   s   |  ||tjS r   )r   r,   right_shiftr   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r,   ri   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r,   rh   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r,   Z
less_equalr   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r,   lessr   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r,   Zgreater_equalr   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r,   Zgreaterr   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r,   equalr   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r,   	not_equalr   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   r   r   r   r   r   r     r   c                 C   s   |  ||tjS r   )r   r,   Zbitwise_andr   r   r   r   r     r   c                 C   s   |  ||tjS r   )r   r,   Zbitwise_xorr   r   r   r   r     r   c                 C   s   |  ||tjS r   )r   r,   Z
bitwise_orr   r   r   r   r     r   c                 C   s&   t |jt|j|j |j |jjS r   )r
   r   r,   r   r   r5   r   r   r   r   r     s    zInterpreterBuilder.create_idivc                 C   sD   t |jj}t |jj}|j||_|j||_| ||tjS r   )rS   r   r   r2   r   r,   r   )r   r   r   Z	lhs_dtypeZ	rhs_dtyper   r   r   create_ashr  s
    zInterpreterBuilder.create_ashrc                 C   s   |j j}|tjks|tjkr4tt|j |j |jjS ttd|j	d d  }|j 
|}|j 
|}t|||j	d ? }t|
||jjS d S )Nrc   r)   r   )r   r   r,   rR   r3   r
   np_umulhi_u64r5   rd   itemsizer2   r   )r   r   r   r   Zcompute_dtypeZlhs_dataZrhs_dataZret_datar   r   r   create_umulhi  s    z InterpreterBuilder.create_umulhic                 C   s   t ||j|j|j|jjS r   r   )r   r   r   r   r   r   r   r   
ternary_op  s    zInterpreterBuilder.ternary_opc                 C   s   |  |||tjS r   )r   r,   Zclip)r   arglohiZpropagate_nansr   r   r   r     r   c                 C   s   |  |||tjS r   )r   r,   where)r   condr   r   r   r   r   r     r   c                 C   s   t |j|j |j |jjS r   r   r   r   r   r   
create_fma  s    zInterpreterBuilder.create_fmac                 C   s   t ||j|jjS r   r   )r   r   r   r   r   r   unary_op  s    zInterpreterBuilder.unary_opc                 C   sZ   |j }|jd }ttd|j }|j|}d|> d }||@ t|}t||j jS )Nr   rc   )	r   r+   rd   r,   r   r   rb   r
   r5   )r   r   r7   Zmask_bitwidthZnp_uint_dtyper   r   r   r   r   r   create_fabs  s    
zInterpreterBuilder.create_fabsc                 C   s   |  |tjS r   )r  r,   cosr   r   r   r   r   r     r   c                 C   s   |  |tjS r   )r  r,   expr  r   r   r   r     r   c                 C   s   |  |tjS r   )r  r,   Zexp2r  r   r   r   r     r   c                 C   s   |  |tjS r   )r  r,   absr  r   r   r   r     r   c                 C   s   |  |tjS r   )r  r,   floorr  r   r   r   r     r   c                 C   s   |  |tjS r   )r  r,   ceilr  r   r   r   r     r   c                 C   s   |  |tjS r   )r  r,   logr  r   r   r   r     r   c                 C   s   |  |tjS r   )r  r,   log2r  r   r   r   r     r   c                 C   s   |  |tjS r   r  r,   sqrtr  r   r   r   r   	  r   c                 C   s   |  |tjS r   r  r  r   r   r   r   
  r   c                 C   s   |  |tjS r   )r  r,   sinr  r   r   r   r     r   c                 C   s0   |j jtjkrt|j nt|j }t||jjS r   )r   r   r,   rY   np_erf_fp32np_erf_fp64r
   r5   )r   r   r   r   r   r   
create_erf  s    "zInterpreterBuilder.create_erfc                 C   s   t dt|j |jjS )Nr   )r
   r,   r  r   r   r5   r  r   r   r   create_rsqrt  s    zInterpreterBuilder.create_rsqrtc                 C   s   t |j||jjS r   )r
   r   r1   r   r5   )r   r   r$   Zallow_reorderr   r   r   r     r   c                 C   s   t t|j||jjS r   )r
   r,   	transposer   r   r5   )r   r   permr   r   r   create_trans  s    zInterpreterBuilder.create_transc                 C   s   |j }|j }|jjdkr"|j s8|jjdkrl|j rlt||jtjd tj}t||jtjd tj}t	tj
|||j jd|j  |jjS )Nr)   r*   )r   r   r+   Zis_floatingrs   rU   rX   r   r,   r
   matmulr5   )r   rz   r{   dZinput_precisionZmax_num_imprecise_accZa_dataZb_datar   r   r   
create_dot  s    
zInterpreterBuilder.create_dotc                 C   s   t tj||tjdtjS r   )r
   r,   r0   rQ   rU   )r   startstopr   r   r   create_make_range#  s    z$InterpreterBuilder.create_make_rangec                 C   s"   t tj|j|d|fdd tjS )Nr   )binsr.   )r
   r,   Z	histogramr   rU   rQ   )r   r   r  r   r   r   create_histogram&  s    z#InterpreterBuilder.create_histogramc                 C   s<   |  }|j}td|d }t|j||jtj  |jS )Nr   r)   )	r   r+   maxr
   r   r2   r,   r3   r   )r   r   offsetr7   Zelement_bitwidthZelement_bytewidthr   r   r   create_addptr+  s    z InterpreterBuilder.create_addptrc                 C   s   | |\}}| }	t|	}
|d kr,d }n\|tjjkrPttj|j	|
d|	}n8|tjj
krzttj|j	td|
d|	}ntd| | ||||||S )Nr*   nanzunsupported padding option )r<   r   rb   rj   ZPADDING_OPTIONZPAD_ZEROr
   r,   rg   r   ZPAD_NANZ	full_likefloatr   r   )r   r   r6   Zpadding_optionr   r   r   r8   r9   r7   r   r   r   r   r   create_tensor_pointer_load2  s    z-InterpreterBuilder.create_tensor_pointer_loadc                 C   s    | |\}}| |||||S r   )r<   r   )r   r   r   r6   r   r   r8   r9   r   r   r   create_tensor_pointer_storeA  s    z.InterpreterBuilder.create_tensor_pointer_storec                 C   s   t t|j||jjS r   )r
   r,   expand_dimsr   r   r5   )r   r   r   r   r   r   create_expand_dimsE  s    z%InterpreterBuilder.create_expand_dimsc                 C   s   t t|j||jjS r   )r
   r,   r-   r   r   r5   r   r   r$   r   r   r   create_broadcastH  s    z#InterpreterBuilder.create_broadcastc                 C   s   t |jtj|jS r   r
   r   r2   r,   r3   r5   r   r   r   r   r   r   create_int_to_ptrK  s    z$InterpreterBuilder.create_int_to_ptrc                 C   s   t |jtj|jS r   r,  r-  r   r   r   create_ptr_to_intN  s    z$InterpreterBuilder.create_ptr_to_intc                 C   s   t t|j|jg|jjS r   )r
   r,   Zconcatenater   r   r5   r   r   r   r   
create_catQ  s    zInterpreterBuilder.create_catc                 C   s    t tj|j|jgdd|jjS )Nr   )r
   r,   stackr   r   r5   r   r   r   r   create_joinT  s    zInterpreterBuilder.create_joinc                 C   s(   t |jd |jjt |jd |jjfS )N).r   ).r   r   )r   r   r   r   r   create_splitX  s    zInterpreterBuilder.create_splitc                 C   sZ   t |jtjr4ttj||jd t|jd|jj	S ttj||jt|jd|jj	S d S r   )
rT   r   rU   ra   r
   r,   fullr   rb   r5   r*  r   r   r   create_splat\  s    &zInterpreterBuilder.create_splatc                 C   sB   || j krtd| | j | }tt|j|j|j||jjS )Nunsupported semantic )ir_sem_to_interpreter_semr   r
   r   Z
atomic_casr   r   r5   )r   r   cmpr   semscoper   r   r   create_atomic_casb  s    

z$InterpreterBuilder.create_atomic_casc                 C   sf   || j krtd| || jkr0td| | j | }| j| }tt||j|j|j||jjS )Nzunsupported rmwOp r8  )	ir_rmw_op_to_interpreter_rmw_opr   r9  r
   r   Z
atomic_rmwr   r   r5   )r   ZrmwOpr   r   r   r;  r<  r   r   r   create_atomic_rmwh  s    



z$InterpreterBuilder.create_atomic_rmwc                 C   s   t dd S )Nz4extern_elementwise not supported in interpreter modeNotImplementedError)r   ZlibNameZlibPathsymbolZargListZretTypeisPurer   r   r   create_extern_elementwiseq  s    z,InterpreterBuilder.create_extern_elementwisec                 C   s   t dd S )Nz,inline_asm not supported in interpreter moder@  )r   Z	inlineAsmconstraintsvaluesr   rC  packr   r   r   create_inline_asmt  s    z$InterpreterBuilder.create_inline_asmc                 C   s   d| j d  d| j d  d| j d  d}|r<|d| 7 }|rTtjdd	d
 id |D ]}t|d|j   qX|rtjd d d S )N(r   z, r   r   ) r   c                 S   s   d| dS )N0x02xr   rv   r   r   r   r   }  r   z1InterpreterBuilder.create_print.<locals>.<lambda>)	formatter)r   r,   Zset_printoptionsprintr   )r   prefixhexrF  msgr   r   r   r   create_printw  s    *zInterpreterBuilder.create_printc                 C   s&   |s"t | d| d| d| d S )Nz in :AssertionError)r   	conditionmessagefileNamefuncNameZlineNor   r   r   create_assert  s    z InterpreterBuilder.create_assertc                 C   s   d S r   r   r   r   r   r   create_barrier  s    z!InterpreterBuilder.create_barrierc                 C   s    dd |D }t ||||||S )Nc                 S   s   g | ]}|  qS r   r   .0r"  r   r   r   
<listcomp>  s     z<InterpreterBuilder.create_make_block_ptr.<locals>.<listcomp>)r"   )r   r#   r$   r%   r&   r'   r(   new_offsetsr   r   r   create_make_block_ptr  s    z(InterpreterBuilder.create_make_block_ptrc                 C   sv   t |jt |krtddd |jD }t|j|j|j||j|j}t	t |D ]}|j|  j
|| j
7  _
qR|S )Nz len(ptr.offsets) != len(offsets)c                 S   s   g | ]}|  qS r   r]  r^  r   r   r   r`    s     z5InterpreterBuilder.create_advance.<locals>.<listcomp>)r/   r&   r   r"   r#   r$   r%   r'   r(   r.   r   )r   r   r&   ra  r   ro   r   r   r   create_advance  s    z!InterpreterBuilder.create_advancec                 C   s<   t |}d|jkr*ttjdd|d|jS td| d S )NrK   r   r1  r*   zunsupported type )rb   namer
   r,   r6  r5   	TypeError)r   r   Znp_typer   r   r   get_all_ones_value  s    
z%InterpreterBuilder.get_all_ones_value)r   r    r!   rj   ZMEM_SEMANTICZACQUIREr   ZRELEASEZRELAXEDZACQUIRE_RELEASEr9  Z	ATOMIC_OPZADDZRMW_OPZFADDZMINZUMINMAXZUMAXANDORZXORZXCHGr>  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zcreate_si_to_fpZcreate_ui_to_fpZcreate_fp_to_siZcreate_fp_to_uiZcreate_fp_extZcreate_fp_truncZcreate_int_castr   r   r   Zcreate_faddZcreate_fmulZcreate_fdivZcreate_fremZcreate_fsubZ
create_mulZcreate_precise_divfZcreate_sdivZcreate_udivZcreate_sremZcreate_uremZ
create_addZ
create_subZ
create_shlZcreate_lshrZcreate_minsiZcreate_minuiZcreate_minimumfZcreate_minnumfZcreate_maxsiZcreate_maxuiZcreate_maximumfZcreate_maxnumfZcreate_icmpSLEZcreate_icmpSLTZcreate_icmpSGEZcreate_icmpSGTZcreate_icmpULEZcreate_icmpULTZcreate_icmpUGEZcreate_icmpUGTZcreate_icmpEQZcreate_icmpNEZcreate_fcmpOLTZcreate_fcmpOGTZcreate_fcmpOLEZcreate_fcmpOGEZcreate_fcmpOEQZcreate_fcmpONEZcreate_fcmpULTZcreate_fcmpUGTZcreate_fcmpULEZcreate_fcmpUGEZcreate_fcmpUEQZcreate_fcmpUNEZ
create_andZ
create_xorZ	create_orr   r   r   r   Zcreate_clampfZcreate_selectr  r  r  Z
create_cosZ
create_expZcreate_exp2Zcreate_iabsZcreate_floorZcreate_ceilZ
create_logZcreate_log2Zcreate_precise_sqrtZcreate_sqrtZ
create_sinr  r  Zcreate_reshaper  r  r  r   r#  r&  r'  r)  r+  r.  r/  r0  r4  r5  r7  r=  r?  rD  rH  rS  r[  r\  rb  rc  rf  r   r   r   r   r      sd                	

		
r   c                    s"   |d fdd
}t | || d S )N)memberc                    s    | |dd |  D d iS )Nc                 S   s   i | ]\}}|d kr||qS )r   r   r_  kvr   r   r   
<dictcomp>  s    z1_patch_attr.<locals>.<lambda>.<locals>.<dictcomp>r   )items)rj  argskwargsbuilderr   r   r     s   z_patch_attr.<locals>.<lambda>)setattr)objrd  rj  rs  
new_memberr   rr  r   _patch_attr  s    rw  c                 C   s2   t | D ]"\}}tj|r
t| ||| q
d S r   )inspect
getmembersrU   core
is_builtinrw  )pkgrs  rd  rj  r   r   r   _patch_builtin  s    r}  c                    sJ   dd  dd }dd | _  fdd| _dd | _d	d | _t|| _d S )
Nc                 S   s   | j j}|jdkrt|S dS )Nr   T)r   r   sizer   )r   r   r   r   r   	_get_bool  s    z%_patch_lang_tensor.<locals>._get_boolc                 S   s&   t jtt| jj| jj| jj	S r   )
rU   rz  r~   r
   r,   r  r   r   r   r5   r   r   r   r   _get_transpose  s    z*_patch_lang_tensor.<locals>._get_transposec                 S   s   t | jjS r   )rK   r   r   r   r   r   r   r     r   z$_patch_lang_tensor.<locals>.<lambda>c                    s    | S r   r   r   r  r   r   r     r   c                 S   s   t | jjS r   )reprr   r   r   r   r   r   r     r   c                 S   s   t | jjS r   )rJ   r   r   r   r   r   r   r     r   )	__index__r   __repr____str__propertyT)r~   r  r   r  r   _patch_lang_tensor  s    


r  c                   @   s<   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdS )ReduceScanOpIneterfacec                 C   s   || _ || _d S r   )r   
combine_fn)r   r   r  r   r   r   r     s    zReduceScanOpIneterface.__init__c                 C   s,   |d k	r(|t |kr(td| d| d S )Nzaxis z out of bounds for shape )r/   r   )r   r$   r   r   r   r   
check_axis  s    z!ReduceScanOpIneterface.check_axisc                 C   s>   |D ]4}t |tjjs(tdt| | |j| j qd S )Nzinput must be a tensor, got )	rT   rU   rz  r~   r   r   r  r$   r   )r   rk   r   r   r   r   check_tensor  s    z#ReduceScanOpIneterface.check_tensorc                 C   sN   t |dr |jr t||j}ntj|gt|d}|}tjt	||j
|S )Nr$   r*   )r   r$   rU   ra   r,   r   rb   rz  r~   r
   r5   )r   r   r   Zret_typer   r   r   	to_tensor  s
    z ReduceScanOpIneterface.to_tensorc                 C   s$   t |ts|f}| | | |S r   )rT   tupler  
apply_implr   rk   r   r   r   apply  s    

zReduceScanOpIneterface.applyc                 C   s   t dd S )Nzapply_impl not implementedr@  r  r   r   r   r    s    z!ReduceScanOpIneterface.apply_implN)	r   r    r!   r   r  r  r  r  r  r   r   r   r   r    s   r  c                       sF   e Zd Z fddZdd Zdd Zddd	Zd
d Zdd Z  Z	S )	ReduceOpsc                    s   t  || || _d S r   )superr   	keep_dims)r   r   r  r  	__class__r   r   r     s    zReduceOps.__init__c                 C   sN   g }|D ]8}|d k	r | | qd}| | |jj |j qt||fS )Nr   )appendr  r   r   flattenr   r  )r   rk   r   r   r   r   r   r   unravel  s    zReduceOps.unravelc                    s@  j } j \ }g }g } d jjj}|d| ||d d   } D ],}||jj |tj||jjjd qNt	|d j
D ]}	t|	|d| |d d   t fddt|D }
| dkrt	t|D ]}|
| jj || < qqt fddt|D }jj||
 }t|tsP|fn|}t	t|D ]:}t|| tjjr|| jj n|| || < q`qg }t|D ]v\}	}jr|d k	rt||}n t	t|D ]}t|d}qn|d kr| }|| |	 j qt|dkr8|d S t|S )Nr   r   r*   c                 3   s(   | ] \}} |  | jV  qd S r   r  r   r_  iir  )rk   input_indexr   r   r   	<genexpr>  s     z+ReduceOps.generic_reduce.<locals>.<genexpr>c                 3   s(   | ] \}} |  | jV  qd S r   r  r_  Zoio)rk   output_indexr   r   r   r  	  s     )r   r  r   r   r$   r  r,   zerosr   r.   r~  unravel_indexr  	enumerater/   itemr  fnrT   rU   rz  r~   r  r(  r  )r   rk   Zoriginal_axisr   
input_dataoutput_dataZinput_shapeZoutput_shaper   ro   Zinput_tuplej	acc_tuplecombine_fn_retr   r   _r   )rk   r  r  r   r   generic_reduce  sJ     

zReduceOps.generic_reduceNc                 C   s   t |tr|d n|}d }d }|rB| ||jj| j| jd|j}|rf| ||jj| j| jdtj	}|d k	r~|d k	r~||fS |d k	r|S |d k	r|S t
dd S )Nr   r   Zkeepdimsz-val_reduce_op and idx_reduce_op are both None)rT   r  r  r   r   r   r  r   rU   rQ   r   )r   rk   val_reduce_opidx_reduce_opr   idxr   r   r   min_max  s      zReduceOps.min_maxc                 C   s"   |  tj|jj| j| jd|jS )Nr  )r  r,   sumr   r   r   r  r   r  r   r   r   r  1  s    zReduceOps.sumc                 C   s   | j tjjkr&| j|d tjtjdS | j tjjkrL| j|d tj	tj
dS | j tjjkrp| j|d tj	d dS | j tjjkr| j|d tjd dS | j tjjkr| |d S | |S d S )Nr   )r  r  )r  rU   standardZ_argmin_combine_tie_break_leftr  r,   minZargminZ_argmax_combine_tie_break_leftr!  ZargmaxZ_elementwise_maxZ_elementwise_min_sum_combiner  r  r  r   r   r   r  4  s    zReduceOps.apply_impl)N)
r   r    r!   r   r  r  r  r  r  __classcell__r   r   r  r   r    s   
+
r  c                       s<   e Zd Z fddZdd Zdd Zdd Zd	d
 Z  ZS )ScanOpsc                    s   t  || || _d S r   )r  r   reverse)r   r   r  r  r  r   r   r   F  s    zScanOps.__init__c                 C   s"   | j tj|jj| jd|jdgS Nr2  r*   )r  r,   cumsumr   r   r   r   r  r   r   r   r  J  s    zScanOps.cumsumc                 C   s"   | j tj|jj| jd|jdgS r  )r  r,   cumprodr   r   r   r   r  r   r   r   r  M  s    zScanOps.cumprodc                    s  g }g }d j jj}D ],}||j j |tj||j jjd qt|d jD ]}t	|| t
 fddt|D } j dkrtt|D ]}|| j j ||  < qqVt
 fddtt D t
fddt|D }	jj|	| }
t|
t
s |
fn|
}	tt|D ]:}t|	| tjjrX|	| j j n|	| ||  < q0qVg }t|D ]"\}}||| j qz|S )Nr   r*   c                 3   s(   | ] \}} |  | jV  qd S r   r  r  )indexrk   r   r   r   r  [  s     z'ScanOps.generic_scan.<locals>.<genexpr>c                 3   s,   | ]$}|j kr | d  n | V  qdS )r   Nr2  )r_  ro   )r  r   r   r   r  a  s     c                 3   s(   | ] \}} |  | jV  qd S r   r  r  )rk   
prev_indexr   r   r   r  b  s     )r   r   r$   r  r,   r  r   r.   r~  r  r  r  r   r/   r  r  r  rT   rU   rz  r~   r  )r   rk   r  r  r$   r   ro   r   r  r  r  r   r   )r  rk   r  r   r   generic_scanP  s6      zScanOps.generic_scanc              	   C   s   g }| j r:|D ](}|| tj|jj| jd|j qn|}| j	t
jjkr\| |d }n(| j	t
jjkrz| |d }n
| |}| j r|D ]}tj|jj| jd|j_qt|dkr|d pt|S )Nr2  r   r   )r  r  r  r,   flipr   r   r   r   r  rU   r  r  r  Z_prod_combiner  r  r/   r  )r   rk   Z	new_inputr   r   r   r   r   r  n  s    (
zScanOps.apply_impl)	r   r    r!   r   r  r  r  r  r  r   r   r  r   r  D  s
   r  c                  C   s4   ddd} ddd}| t _|t _| t j_|t j_d S )NFc                 [   s   t |||| S r   )r  r  )rk   r   r  r  rq  r   r   r   _new_reduce  s    z'_patch_reduce_scan.<locals>._new_reducec                 [   s   t |||| S r   )r  r  )rk   r   r  r  rq  r   r   r   	_new_scan  s    z%_patch_reduce_scan.<locals>._new_scan)F)F)rU   reduceZassociative_scanrz  )r  r  r   r   r   _patch_reduce_scan  s    

r  c                 C   sx   dd }ddd}ddd}dd	 }|| _ || _|| _t| _|| j_t|d
d| _t|dd| _	t|dd| _
t  d S )Nc                 S   sP  | j dkr| S | j dkr$| S | j dkr6| S | j dkrH| S | j dkrZ| S | j dkrl| S | j dkr~| S | j dkr| S | j d	kr|	 S | j d
kr|
 S | j dkr| S | j dkr| S | j dk r| S | j dkr | S | j dkr| S | j dkr(| S | j dkr<| S td|  dd S )NvoidrW   rM   rL   rO   rN   rQ   rP   rR   r3   Zfp8e5Zfp8e4nvZfp8e4b15Zfp16Zbf16Zfp32Zfp64zfail to convert z to ir type)rd  Zget_void_tyZget_int1_tyr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   rs  r   r   r   
_new_to_ir  sF    











z$_patch_lang_core.<locals>._new_to_irc                 [   s6   |d krd}|d kr d|  }}n
| | }}t |||S )Nr   r   )r.   )Zarg1Zarg2steprq  r  endr   r   r   
_new_range  s    
z$_patch_lang_core.<locals>._new_range c                 S   s   | st |d S r   rU  )r  rR  r   r   r   _new_static_assert  s    z,_patch_lang_core.<locals>._new_static_assertc                 S   sn   t | tjs| S t |ttfs$|gn|}dd |D }t|tdt| jkr\td| | j	
|| | S )Nc                 S   s"   g | ]}t |tjr|jn|qS r   )rT   rU   	constexprr   r_  rm  r   r   r   r`    s     z7_patch_lang_core.<locals>._set_attr.<locals>.<listcomp>r   z$len(values) != len(input.shape) for )rT   rU   r~   listr  r/   r!  r$   r   r   r   )rk   rF  rd  r   r   r   	_set_attr  s    z#_patch_lang_core.<locals>._set_attrztt.divisiblityrd  ztt.contiguityztt.constancy)NN)r  )r.   Zstatic_rangeZstatic_assertrO  Zstatic_printr   Zto_irr   Zmultiple_ofZmax_contiguousZmax_constancyr  )langr  r  r  r  r   r   r   _patch_lang_core  s    (
	
r  c                 C   s   dd | j  D }t|dks(tdt|d t t|d jt |d tkrbt|d jt t	|d j t
|d  d S )Nc                 S   s"   g | ]\}}|t t jfkr|qS r   )rU   rz  )r_  r  r   r   r   r   r`    s      z_patch_lang.<locals>.<listcomp>r   z:triton.language must be visible from within jit'd functionr   )__globals__ro  r/   rV  r}  interpreter_builderr~   rU   rt   r  r  )r  r  r   r   r   _patch_lang  s    r  c                 C   s<  t | trttjjjtjjj	| }t
j}d|   krFdk rRn nt
j}nnd|   krfdk rrn nt
j}nNd|   krdk rn nt
j}n.d|   krdk rn nt
j}ntd|  tt
j| g|d|}t||S t| d	r8ttjjjtjjj	| }tt
j|  gt
jd|}t||S | S )
Ni   l        l        l         l            l            zUnsupported integer value r*   data_ptr)rT   rK   rU   Z	str_to_tytritonruntimejitZJITFunctionZ_type_ofZ_key_ofr,   rQ   rP   rR   r3   r   r
   r   r~   r   r  )r   tyr   r   r   r   r   _implicit_cvt  s&    
""r  Z	num_warpsZ
num_stagesZnum_ctasZenable_fp_fusiongridZmaxnregc                   @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
GridExecutorc                    sN   ddl m || _|| _|| _fdd|j D   fdd|D | _d S )Nr   _normalize_tyc                    s   i | ]\}}| |qS r   r   )r_  rd  r  r  r   r   rn    s      z)GridExecutor.__init__.<locals>.<dictcomp>c                    s   g | ]}  |d kr|qS )r  )get)r_  rd  )rI   r   r   r`    s      z)GridExecutor.__init__.<locals>.<listcomp>)r  r  r  	arg_namesr  rI   ro  
constexprs)r   r  r  r  r   )rI   r  r   r     s    zGridExecutor.__init__c                 C   sp   g }|D ](}t |dr&||  q|| qi }| D ](\}}t |dr^| ||< q>|||< q>||fS Nr  )r   r  cpuro  )r   args_devrq  args_hstr   
kwargs_hstr   r   r   r   r   _init_args_hst  s    


zGridExecutor._init_args_hstc           
      C   sr   t ||D ](\}}t|dr
|j||jj q
| D ]0\}}|| }	t|dr<|j|	|jj q<d S r  )zipr   r   Zcopy_toZdevicero  )
r   r  r  rq  r  Zarg_devZarg_hstr   Z	kwarg_devZ	kwarg_hstr   r   r   _restore_args_dev$  s    

zGridExecutor._restore_args_devc              
      sN  dd |  D }|ddr"d S  ||\}}t j tj jf||} fdd|  D }t jrz |n j}t	|dkst
d|ddt	|   }tj|  zTt|d	 D ]B}t|d
 D ]0}t|d D ]}	t|||	  jf | qqqW n2 tk
r8 }
 ztt|
|
W 5 d }
~
X Y nX  |||| d S )Nc                 S   s   i | ]\}}|t kr||qS r   )RESERVED_KWSrk  r   r   r   rn  1  s       z)GridExecutor.__call__.<locals>.<dictcomp>ZwarmupFc                    s(   i | ] \}}|| j kr|nt|qS r   )r  r  )r_  rd  r   r   r   r   rn  ;  s         z#grid must have at most 3 dimensions)r   r   r   r   )ro  popr  r  r  rx  getcallargscallabler  r/   rV  r  r   r.   r   	Exceptionr   r  r  )r   r  rq  r  r  rp  r  rw   r   r   er   r   r   __call__/  s(    

 zGridExecutor.__call__N)r   r    r!   r   r  r  r  r   r   r   r   r  	  s   	r  c                   @   s6   e Zd ZddddZedd Z dd Zd	d
 ZdS )InterpretedFunctionNr   c                    s<   | _  fdd}| _t|}dd |j D  _d S )Nc                     s   |d }t  j j|| |S )Nr  r  r  r  )rp  rq  r  r   r   r   runR  s    z)InterpretedFunction.__init__.<locals>.runc                 S   s   g | ]
}|j qS r   r  r  r   r   r   r`  X  s     z0InterpretedFunction.__init__.<locals>.<listcomp>)r  r  rx  	signature
parametersrF  r  )r   r  r  r  r   r   r   r   O  s
    
zInterpretedFunction.__init__c                 C   s   | j jS r   )r  r   r   r   r   r   r   Z  s    zInterpretedFunction.__name__c                 C   s   t | j| j|S r   r  )r   r  r   r   r   __getitem__^  s    zInterpretedFunction.__getitem__c              
   O   sN   t | j z| j||W S  tk
rH } ztt||W 5 d }~X Y nX d S r   )r  r  r  r   r  )r   rp  rq  r  r   r   r   r  a  s
    
zInterpretedFunction.__call__)r   r    r!   r   r  r  r  r   r   r   r   r  M  s
   
r  )4rx  typingr   rt   numpyr,   r  Ztriton.languagelanguagerU   dataclassesr   errorsr   	functoolsr   Z_C.libtritonr   r   r	   rj   r
   r"   r>   rS   rb   rs   rx   r|   Z	vectorizerY   r  rZ   r  r3   r   r}   r   rw  r}  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r   <module>   sR    @   N"`>ND