U
    hL+                     @   s   d dl mZmZ d dlmZmZmZmZ d dlm	Z	 d dl
mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ e	ddG d	d
 d
ZG dd deZdS )    )BaseBackend	GPUTarget)irpassesllvmamd)	dataclass)AnyTupleN)PathT)frozenc                   @   s   e Zd ZU dZeed< dZeed< dZeed< dZeed< dZ	e
ed	< d
Zeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZee ed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dd Zdd  ZdS )!
HIPOptions   	num_warps   waves_per_eur   
num_stagesnum_ctasNextern_libs)r   r   r   cluster_dimsFdebugarchallow_fp8e4nvallow_fp8e4b15ieeedefault_dot_input_precision)r   allowed_dot_input_precisionsTenable_fp_fusionmatrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_namec                 C   s   t tjd }| jd kri nt| j}d| jks:d| jkr>dnd}t| d| ddg}|D ]}t|| d	 ||< q\t| d
t	|
  | jdkr| j| jd @ dkstdd S )NlibZgfx10Zgfx11    @   	warp_sizeZocmlZocklz.bcr   r   r   znum_warps must be a power of 2)r   __file__parentr   dictr   object__setattr__strtupleitemsr   AssertionError)selfZdefault_libdirr   r'   Zlibsr$    r2   N/var/www/html/venv/lib/python3.8/site-packages/triton/backends/amd/compiler.py__post_init__#   s     zHIPOptions.__post_init__c                 C   s.   d dd | j D }t|d S )N_c                 S   s   g | ]\}}| d | qS )-r2   ).0namevalr2   r2   r3   
<listcomp>1   s     z#HIPOptions.hash.<locals>.<listcomp>utf-8)join__dict__r/   hashlibsha256encode	hexdigest)r1   keyr2   r2   r3   hash0   s    zHIPOptions.hash)__name__
__module____qualname__r   int__annotations__r   r   r   r   r*   r   r.   r   boolr   r-   r   r   r   r   r
   r   r   r   r    r!   r#   r4   rC   r2   r2   r2   r3   r      s(   
r   c                       s   e Zd ZeedddZedd fddZedd	d
Zdd Z	dd Z
dd Zedd Zedd Zedd Zedd Zedd Zedd Zdd Ze dd  Z  ZS )!
HIPBackendtargetc                 C   s
   | j dkS )Nr"   )backendrK   r2   r2   r3   supports_target7   s    zHIPBackend.supports_targetN)rL   returnc                    s&   t  | t|jtstd| _d S )Nhsaco)super__init__
isinstancer   r-   r0   Z
binary_ext)r1   rL   	__class__r2   r3   rR   ;   s    zHIPBackend.__init__)rO   c                    s4   d| j ji}| fddtj D  tf |S )Nr   c                    s   i | ]}| kr| | qS r2   r2   )r7   koptsr2   r3   
<dictcomp>B   s       z,HIPBackend.parse_options.<locals>.<dictcomp>)rL   r   updater   __dataclass_fields__keys)r1   rX   argsr2   rW   r3   parse_options@   s    zHIPBackend.parse_optionsc                 C   s(   |j |j|j|jd |jd |jd fS )Nr   r      )r   r   sharedr   )r1   metadatar2   r2   r3   pack_metadataE   s    zHIPBackend.pack_metadatac                 C   s
   t  }|S N)r*   )r1   Zcodegen_fnsr2   r2   r3   get_codegen_implementationO   s    z%HIPBackend.get_codegen_implementationc                 C   s   t | d S rc   )r   load_dialects)r1   ctxr2   r2   r3   re   S   s    zHIPBackend.load_dialectsc                  C   st   t d} | d k	r&t| }| r&|S ttjd }| r@|S td}| rT|S td}| rh|S tdd S )NZTRITON_HIP_LLD_PATHzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldz/ROCm linker /opt/rocm/llvm/bin/ld.lld not found)osgetenvr   is_filer(   r)   	Exception)Zlld_env_pathZlldr2   r2   r3   path_to_rocm_lldV   s    
zHIPBackend.path_to_rocm_lldc                 C   s   t | j}|  tj| tj| tj	| tj
| tj| tj| tj| tj| ||  | S rc   )r   pass_managercontextenable_debugr   commonZadd_inlinerttirZadd_rewrite_tensor_pointerZadd_combineadd_canonicalizerZadd_reorder_broadcastadd_cseZadd_licmadd_symbol_dcerunmodra   optionspmr2   r2   r3   	make_ttirj   s    
zHIPBackend.make_ttirc                 C   sP  t | j}|  tj|d|j |j|j	|j
 ||  t | j}|  tj| tj| tj| tjj||j|j|j tj| tjj| tj|d |jdkrt|jrtjj| tj| tj|d tj| tj| |jdkr*tjj| tj| tj| ||  | S )Nzhip:Tr   )r   rl   rm   rn   r   rp   Zadd_convert_to_ttgpuirr   r   r'   r   rt   ttgpuirZadd_coalesceZadd_remove_layout_conversionsZadd_optimize_thread_localityr   Zadd_accelerate_matmulr   r   Zadd_optimize_epilogueZadd_optimize_dot_operandsr   Zhas_matrix_core_featureZadd_stream_pipelinero   rq   Zadd_reduce_data_duplicationZadd_reorder_instructionsrr   rs   ru   r2   r2   r3   
make_ttgiry   s6    

zHIPBackend.make_ttgirc           
         s2  | }t |j}|  tjj||j tj	
| tj	| tj| d}tjj||j| tj| tj| tj	| tj	| tj| tj| tj| tjdddkrtj| tjj| || t  t }t|| t |j t d t  dd t  dd t  dd t  d	|j!d
k dd  " D }|d #tj$ |d %dd|j&|j!   |d %d|j'  |j(rdnd}|d %d| |j)r  fdd|j)D }	t* |	 t+ tj,tj- | .d|d< t/  t0 S )NTZTRITON_DISABLE_LINE_INFO0i  Z__oclc_finite_only_optFZ__oclc_correctly_rounded_sqrt32Z__oclc_unsafe_math_optZ__oclc_wavefrontsize64r&   c                 S   s   g | ]}|  s|qS r2   )Zis_declaration)r7   fnr2   r2   r3   r:      s      z(HIPBackend.make_llir.<locals>.<listcomp>r   zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr   zdenormal-fp-math-f32c                    s    g | ]\}}t  |r|qS r2   )r   Zneed_extern_lib)r7   r8   pathZllvm_modr2   r3   r:      s      ztriton_gpu.sharedr`   )1r   rl   rm   rn   r   r   rz   Z%add_decompose_unsupported_conversionsr   convertZadd_scf_to_cfZadd_index_to_llvmirZadd_allocate_shared_memoryZadd_to_llvmirro   rq   rr   Zadd_cf_to_llvmirZadd_arith_to_llvmirrs   rg   environgetZllvmirZadd_di_scopeZadd_builtin_func_to_llvmirrt   r   Zinit_targetsZ	to_moduleZset_isa_versionZset_abi_versionZset_bool_control_constantr'   Zget_functionsZset_calling_convZCALLING_CONV_AMDGPU_KERNELZadd_fn_attrr   r   r    r   Zlink_extern_libsZoptimize_moduleZOPTIMIZE_O3TARGET_TRIPLEZget_int_attrZcleanup_bitcode_metadatar-   )
srcra   rw   rv   rx   Z_HIPBackend__HIP_FTZrm   ZfnsZdenormal_modepathsr2   r   r3   	make_llir   sT    

zHIPBackend.make_llirc              	   C   sj   t d| }t|dkst|d |d< t| tj|jdg |j	d}t
jddd	krftd
 t| |S )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   r8    FZAMDGCN_ENABLE_DUMPr|   1z!// -----// AMDGCN Dump //----- //)refindalllenr0   r   Ztranslate_to_asmr   r   r   r   rg   r   r   print)r   ra   rw   namesamdgcnr2   r2   r3   make_amdgcn   s    zHIPBackend.make_amdgcnc           
      C   s   t | |jd}t }t x}t D}t|jd}|	| W 5 Q R X t
|ddd|jd|jg W 5 Q R X t|jd}| }	W 5 Q R X W 5 Q R X |	S )Nr   wbz-flavorZgnuz-sharedz-orb)r   Zassemble_amdgcnr   rJ   rk   tempfileNamedTemporaryFileopenr8   write
subprocess
check_callread)
r   ra   rw   rP   Z	rocm_pathZtmp_outZtmp_inZfd_inZfd_outretr2   r2   r3   
make_hsaco   s    

&zHIPBackend.make_hsacoc                    s^    fdd|d<  fdd|d<  fdd|d<  fdd|d	<  fd
d|d< d S )Nc                    s    | | S rc   )ry   r   ra   rw   r1   r2   r3   <lambda>       z'HIPBackend.add_stages.<locals>.<lambda>rp   c                    s    | | S rc   )r{   r   r   r2   r3   r      r   Zttgirc                    s    | | S rc   )r   r   r   r2   r3   r      r   Zllirc                    s    | | S rc   )r   r   r   r2   r3   r      r   r   c                    s    | | S rc   )r   r   r   r2   r3   r     r   rP   r2   )r1   Zstagesrw   r2   r   r3   
add_stages   s
    zHIPBackend.add_stagesc                 C   s&   t jt dgdd}| d| j S )Nz	--versionr;   )encodingr6   )r   check_outputrJ   rk   rL   )r1   versionr2   r2   r3   rC     s    zHIPBackend.hash)rD   rE   rF   staticmethodr   rN   rR   r	   r^   rb   rd   re   rk   ry   r{   r   r   r   r   	functools	lru_cacherC   __classcell__r2   r2   rT   r3   rJ   5   s,   




H

rJ   )Ztriton.backends.compilerr   r   Ztriton._C.libtritonr   r   r   r   dataclassesr   typingr	   r
   r>   r   rg   r   r   r   pathlibr   r   rJ   r2   r2   r2   r3   <module>   s   &