U
    h_.                  	   @   sZ  d dl mZ d dlmZ d dlmZmZ d dlmZ dd Z	G dd de
ZeG d	d
 d
ZG dd dZdd Zdd Zeee edddZeedddZeedddZeee edddZeedddZeeedddZeeedd d!Zeedd"d#Zeedd$d%Zd&Zed'krVd d(lmZ eed)Zejd*d+d,d- ejd.d/ed0d1 ejd2ed3d4d5 e  Z!e Zg Z"e!j#D ],Z$ee$Z%e%& Z'e"(e%j) e*e' qd6d7 ej+, D Z-d8d7 ej+, D Z.e.d  d  Z/ee/Z0ee/Z1e!j23d94d:BZ5d;Z2e2d<6e-7 Z2e2d<7 Z2e2e07 Z2e2d<7 Z2e2e17 Z2e57e2 W 5 Q R X d=d7 ej+, D Z8d>d7 ej+9 D Z:ee:e/Z;ee/Z<ee:e/Z=ee/Z>ee/Z?e!j23d?4d:Z5d3Z2e2d;7 Z2e2d@7 Z2e2dA7 Z2e2d<7 Z2e2d<6e87 Z2e2d<7 Z2e2e;7 Z2e2d<7 Z2e2e>7 Z2e2d<7 Z2e2e<7 Z2e2d<7 Z2e2e=7 Z2e2d<7 Z2e2e?7 Z2e57e2 W 5 Q R X dBS )C    )defaultdict)Path)SequenceUnion)	dataclassc                 C   s   | d k	S N )xr   r   C/var/www/html/venv/lib/python3.8/site-packages/triton/tools/link.py_exists   s    r   c                   @   s   e Zd ZdS )LinkerErrorN)__name__
__module____qualname__r   r   r   r
   r      s   r   c                   @   sb   e Zd ZU eed< ee ed< ee ed< eeedf  ed< eed< eed< eed< eed	< dS )
KernelLinkerMetaorig_kernel_name	arg_names
arg_ctypesNsizessig_hashtriton_suffixsuffix	num_specs)r   r   r   str__annotations__r   r   intr   r   r   r
   r      s   
r   c                   @   sd   e Zd ZddddZedddZedd	d
ZedddZeedddZee	dddZ
dS )HeaderParserN)returnc                 C   sF   dd l }|d| _|d| _|d| _|d| _tt| _d S )Nr   z'//[\s]*tt-linker:[\s]*([\w]+):(.+):(.+)z^([\w]+)_([\w]+)_([\w]+)$z[\s]*(\w+)\s(\w+)[,]?z[c,d])	recompilelinker_directiveskernel_namec_sig
arg_suffixr   listkernels)selfr   r   r   r
   __init__   s    zHeaderParser.__init__)headerc                 C   s   |  D ]}|dr| j|}t|r|d|d|d  }}}| |\}}}	| |\}
}| |	|\}}| 	d
||gt|||
|||	|	|d qd S )Nz//         _)r   r   r   r   r   r   r   r   )
splitlines
startswithr    matchr   group_match_name_match_c_sig_match_suffix_add_kerneljoinr   )r&   r(   lnmker_namer"   Z	algo_infonamer   r   Zc_typesr   r   r   r   r   r
   extract_linker_meta-   s*    
"z HeaderParser.extract_linker_meta)r8   c                 C   sR   | j |}t|r@|d|d|d  }}}|||fS t| dd S )Nr)   r*   r+   z is not a valid kernel name)r!   r/   r   r0   r   )r&   r8   r7   r9   r   r   r   r   r
   r1   D   s
    "
zHeaderParser._match_name)r"   c                 C   sZ   | j |}t|rHg g  }}|D ]\}}|| || q"||fS t| dd S )Nz" is not a valid argument signature)r"   findalllenappendr   )r&   r"   r7   Ztysargstyarg_namer   r   r
   r2   K   s    

zHeaderParser._match_c_sig)r   r"   c           	      C   s   | d}ddd}d}g }tt|D ]}|t|}|dkrPt| d|tt|7 }| j||r|d7 }|d g|t|   |	|||   |d7 }|t|d k r||d  }q(|d gt|t|   q(||fS )N,r)      )cdr   z is not a valid kernel suffix)
splitranger<   findr   r   r#   r/   extendr=   )	r&   r   r"   r>   Zs2ir   r   iposr   r   r
   r3   V   s$    

zHeaderParser._match_suffix)r9   kerc              
   C   sv   || j krb| j | d }t|j|jD ]:\}}||kr&td| dd|j dd|j q&| j | | d S )NrE   z Mismatched signature for kernel z: 
	existing sig is: rA   z
	current is: )r%   zipr   r   r5   r=   )r&   r9   rL   lastcurZnew_r   r   r
   r4   m   s    
$zHeaderParser._add_kernel)r   r   r   r'   r   r:   r1   r2   r3   r   r4   r   r   r   r
   r      s   r   c                 C   s   d dd t| j| jD S )N, c                 S   s   g | ]\}}| d | qS  r   .0r?   argr   r   r
   
<listcomp>{   s     z0gen_signature_with_full_args.<locals>.<listcomp>)r5   rM   r   r   r7   r   r   r
   gen_signature_with_full_argsz   s    rX   c                 C   sN   dd t | j| jD }dd t | j| jD }ddd t ||D }|S )Nc                 S   s   g | ]\}}|d kr|qS r)   r   )rT   r?   hintr   r   r
   rV      s      z!gen_signature.<locals>.<listcomp>c                 S   s   g | ]\}}|d kr|qS rY   r   rT   rU   rZ   r   r   r
   rV      s      rP   c                 S   s   g | ]\}}| d | qS rQ   r   rS   r   r   r
   rV      s     )rM   r   r   r   r5   )r7   Z	arg_typesr   sigr   r   r
   gen_signature~   s    r]   )r9   metasr   c              	   C   s&   d|  dt |d  d|  d|  d	S )N

CUresult (CUstream stream, rE   z);
void load_();
void unload_();
    )rX   )r9   r^   r   r   r
   make_algo_decls   s    
rc   )metar   c                 C   s:   d| j  dt|  d| j  dt|  d| j  d| j  dS )Nr_   _default(CUstream stream, z);
CUresult r`   z, int algo_id);
void load_ra   rb   )r   rX   )rd   r   r   r
   make_global_decl   s    rf   c                 C   sD   d| j  dt|  d}|d| j  dd| j d7 }|d7 }|S )	N	CUresult re   z){
	  return 	(stream, rP   z, 0);
}
r   rX   r5   r   rd   srcr   r   r
   make_default_algo_kernel   s     rn   c                    s  d|  d}t |dd dD ]0}|d|j d|j d|j dt| d		7 }q|d7 }|d|  dt|d
  d7 }|d7 }t |dd dD ]}dd  d fddt|j|j	D }|t
|j	rd| dnd7 }dd t|j|j	D }|d|j d|j d|j dd| d		7 }q|d7 }|d7 }|d7 }dD ]}|d| d|  d7 }t |dd dD ].}|d| d|j d|j d|j d	7 }qf|d| d|  d 7 }|d7 }t |d!d dD ].}|d"| d|j d|j d|j d	7 }q|d7 }q<|S )#Nz// launcher for: 
c                 S   s   | j  S r   r   rW   r   r   r
   <lambda>       z.make_kernel_hints_dispatcher.<locals>.<lambda>)keyrg   r,   r`   );
rE   z){c                 S   s   | j  S r   rp   rW   r   r   r
   rq      rr   c                 S   s8   |dkrd|  d| dS |dkr4d|  d| dS d S )NrB   (z % z == 0)r)   z == )r   )valrZ   r   r   r
   rq      s
    z && c                    s"   g | ]\}}|d k	r ||qS r   r   )rT   rw   rZ   Zcond_fnr   r
   rV      s   z0make_kernel_hints_dispatcher.<locals>.<listcomp>z  if (z)
zif (1)
c                 S   s   g | ]\}}|d kr|qS rY   r   r[   r   r   r
   rV      s      z    return ri   rP   z#  return CUDA_ERROR_INVALID_VALUE;
rj   loadZunloadz
// z for: c                 S   s   | j  S r   rp   rW   r   r   r
   rq      rr   void ();
z() {c                 S   s   | j  S r   rp   rW   r   r   r
   rq      rr     )sortedr   r   r   r]   rX   r5   rM   r   r   any)r9   r^   rm   rd   Zcondsr   moder   rx   r
   make_kernel_hints_dispatcher   s8    .0,,r   c                 C   sV   d| j  dt|  d}|d| j  d7 }|d| j  dd| j d	7 }|d
7 }|S )Nrg   r`   z, int algo_id){
z   assert (algo_id < (int)sizeof(z_kernels));
rh   z_kernels[algo_id](stream, rP   rt   rj   rk   rl   r   r   r
   !make_kernel_meta_const_dispatcher   s
     r   )namesrd   r   c                 C   sH   dt | d}|d|j d7 }| D ]}|d| d7 }q&|d7 }|S )Nz3typedef CUresult (*kernel_func_t)(CUstream stream, rt   zkernel_func_t z_kernels[] = {
r}   z,
z};
)rX   r   )r   rd   rm   r9   r   r   r
   make_func_pointers   s    r   c                 C   sR   d}dD ]D}|d| d|j  d7 }| D ]}|d| d| d7 }q(|d7 }q|S )	N ry   r{   r,   z(void){
r}   r|   z}

r   )r   rd   rm   r   r9   r   r   r
   make_kernel_load_def   s    
r   c                 C   s   d| j  d}|S )Nint z_get_num_algos(void);r   rl   r   r   r
   make_get_num_algos_decl   s    r   c                 C   s4   d| j  d}|d| j  d| j  d7 }|d7 }|S )Nr   z_get_num_algos(void){
z  return (int)(sizeof(z_kernels) / sizeof(z_kernels[0]));
rj   r   rl   r   r   r
   make_get_num_algos_def   s    r   a0  
Triton ahead-of-time linker:

This program takes in header files generated by compile.py, and generates a
single entry-point responsible for dispatching the user's input to the right
kernel given the specializations that were compiled.

Example usage:
python link.py /path/to/headers/*.h -o kernel_name
__main__)ArgumentParser)descriptionheaders+z_Paths to header files to link. Must include linker directive annotations (autogenerated by ttc))nargshelpz--outz-ozOut filename)typer   z--prefixr   z(String to prefix kernel dispatcher names)r   defaultr   c                 C   s   g | ]\}}t ||qS r   )rc   rT   r9   rd   r   r   r
   rV     s     rV   c                 C   s   g | ]\}}|qS r   r   r   r   r   r
   rV     s     z.hwz#include <cuda.h>
ro   c                 C   s   g | ]\}}t ||qS r   )r   r   r   r   r
   rV   *  s     c                 C   s   g | ]}|qS r   r   )rT   r9   r   r   r
   rV   +  s     z.cz#include <stdint.h>
z#include <assert.h>
N)@collectionsr   pathlibr   typingr   r   dataclassesr   r   	Exceptionr   r   r   rX   r]   r   rc   rf   rn   r   r   r   r   r   r   descr   argparser   parseradd_argument
parse_argsr>   Zincludesr   r(   Zh_path	read_textZh_strr=   r9   r:   r%   itemsZ
algo_declsZ
meta_listsrd   Zget_num_algos_declZglobal_decloutwith_suffixopenfpr5   writeZdefskeysr   Zfunc_pointers_defZmeta_const_defZload_unload_defZget_num_algos_defZdefault_algo_kernelr   r   r   r
   <module>   s   ]	
)	





