U
    hD                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlZd dlmZ d dlmZ dZedkreed	Zejd
dd ejddedddd ejddeddd ejddeddd ejddeddd ejdd edd!d ejd"d#ed$dd% ejd&d'ed(dd% e Zejr.ejnejZejrBejneeZeejZejd eej ej ej!eZ"ej#e"Z$e"j%&e$ e'e$ejZ(ej)*d)Z)e+e)dkst,e-e.d*d+ ej/*d)Z/e
e d,d-d.Z0d/ej1 d0ej2 Z3e0e/e3g Z4d1d2 Z5d3d4 e6e/D Z7d5d4 e78 D Z7d6d4 e6e/D Z9d7d4 e98 D Z9d8d4 e6e/D Z/d9:d:d; e9; D Z<d<d; e9= D Z>e>d=ej1 d>ej2 g7 Z>e7; D ]Z?e?d?kst,d@e? qdAd; e78 D Z@dBd; e78 D ZAejBjCe@eAdCZDeAD ]ZEe9FeEdi qejBjGe(e9e/eDdDZHej1ej2dEZIejJeHeIdFZKg ZLg ZMe/= D ].ZEeEeAkrfeLe(jLeE g7 ZLeMe/eE g7 ZMqfee/; eDZNdG:ee4eNgZOee PeKjQdH dIdJ ZReOeje+eRdK:dLd; eSeRdddI eRdddI D dK:dMd; eSeLeMD dK:dNd; e/= D dK:dOd; eLD e+eLe>eKjTjUej1dG:e<e3ge)d  e)d e)dI ddPZVdQD ]bZWeeXjdReW  ZYeZdSe4 dGeN dSeW [dT Z\e\]eeY^ j_f eV W 5 Q R X qdS )U    N)ArgumentParser)Path)List)kernel_suffix	ty_to_cppa  
Triton ahead-of-time compiler:

This program compiles the kernel with name `kernel-name` in the file at the
provided `path` into self-contained C source-code that embeds the `cubin`
data along with utilities to load, unload and launch the kernel.

signature is provided as a list of (optionally divisibility-hinted) types
or constexpr values, e.g.

`compile.py --kernel-name kernel --signature "*fp32:16, i32:16, 1024, i32" --out-name kernel /path/to/kernel.py`

will compile triton.JITFunction of name `kernel` inside the file `/path/to/kernel.py`.
Said kernel will be specialized such that argument 0, 1 are assumed to be multiple of 16,
and argument 2 is assumed to be a compile-time constant of value 1024, i.e. it won't be part of the generated prototype.

The resulting entry point will have signature

CUresult kernel_{specialization_suffix}(CUstream stream, unsigned gX, unsigned gY, unsigned gZ, float* arg0, int32_t arg1, int32_t arg2)

Different such specialized entry points can be combined using the `linker.py` script.

NOTE: when resolving the scope of /path/to/kernel.py, the file will be executed from within its parent directory with the python interpreter
used to run this `compile.py` script
__main__)descriptionpathzTPath to Python source containing desired kernel in its scope. File will be executed.)helpz--kernel-namez-n zName of the kernel to compileT)typedefaultr   requiredz--num-warpsz-w   z$Number of warps to launch the kernel)r   r   r   z--num-stagesz-ns   z/Number of stages (meta-parameter of the kernel)z
--out-namez-onz Out name for the compiled kernelz
--out-pathz-ozOut filenamez--signaturez-szSignature of the kernel)r   r   r   z--gridz-gzLaunch grid of the kernel,c                 C   s
   |  dS )N )strip)s r   F/var/www/html/venv/lib/python3.8/site-packages/triton/tools/compile.py<lambda>F       r   )	signaturec                 C   s,   t  }|d|   | d d S )Nr      )hashlibsha256updatejoinencode	hexdigest)r   mr   r   r   hash_signatureH   s    r#   ZwarpsZxstagesc                 C   sL   zt | }|W S  tk
r"   Y nX zt| }|W S  tk
rF   Y nX d S N)int
ValueErrorfloat)r   retr   r   r   	constexprP   s    r)   c                 C   s,   i | ]$\}}d |kr|t |d d qS ):r   )r)   split.0ir   r   r   r   
<dictcomp>]   s       r/   c                 C   s   i | ]\}}|d k	r||qS r$   r   r-   kvr   r   r   r/   ^   s       c                 C   s   i | ]\}}|t |qS r   )r)   r,   r   r   r   r/   _   s      c                 C   s   i | ]\}}|d k	r||qS r$   r   r0   r   r   r   r/   `   s       c                 C   s(   i | ] \}}|t kr||d d qS )r*   r   )	constantsr+   r,   r   r   r   r/   a   s       xc                 C   s   g | ]}t |qS r   )str)r-   r2   r   r   r   
<listcomp>b   s     r6   c                 C   s$   g | ]}t j|  d t|  qS )=)kernel	arg_namesr3   r-   r.   r   r   r   r6   c   s     z
num_warps=znum_stages=)r      z#Only 1 and 16 are valid hints, got c                 C   s   g | ]\}}|d kr|qS )r;   r   r-   r.   hr   r   r   r6   i   s      c                 C   s   g | ]\}}|d kr|qS )r   r   r<   r   r   r   r6   j   s      )divisible_by_16
equal_to_1)fnr3   r   attrs)	num_warps
num_stages)options_Zcubin   z, c                 C   s   g | ]\}}d | | qS )0xr   )r-   r4   yr   r   r   r6      s     c                 C   s"   g | ]\}}t | d | qS r   r   )r-   nametyr   r   r   r6      s     c                 C   s(   g | ] }t t|  d tj|  qS rJ   )r   r   r8   r9   r:   r   r   r   r6      s     c                 C   s   g | ]}d | qS )&r   )r-   argr   r   r   r6      s     )kernel_nameZtriton_kernel_nameZbin_sizeZbin_datar   Zfull_signatureZarg_pointersnum_argsZkernel_docstringsharedrB   Z	algo_infoZgridXZgridYZgridZZ_placeholder)r=   czcompile..w)`binasciir   importlib.util	importlibsysargparser   pathlibr   typingr   ZtritonZtriton.compiler.code_generatorr   Ztriton.backends.nvidia.driverr   desc__name__parseradd_argumentr5   r%   
parse_argsargsZout_namerO   Zout_pathr
   Zarg_pathinsertparentutilspec_from_file_locationstemspecmodule_from_specmodloaderexec_modulegetattrr8   gridr+   lenAssertionErrorlistmapr   r#   rB   rC   Zmeta_sigZsig_hashr)   	enumeratehintsitemsr3   r   valuesZ	const_sigkeysZ
doc_stringr=   r>   r?   compilerZAttrsDescriptorrA   r.   r   Z	ASTSourcesrcoptscompileZccinfor9   Z	arg_typessuffix	func_namehexlifyasmZhex_zipmetadatarQ   paramsext__file__Ztemplate_pathwith_suffixopenfpwrite	read_textformatr   r   r   r   <module>   s   



,$