U
    h                    @  s  d dl mZ d dlmZmZmZmZmZ ddlm	Z	 ddl
mZ ddl
mZ edZG d	d
 d
eZddddddZddddddZddddddZdddddddZdddddddZddddd!d"d#d$Zddddd%d&d'Zddddd%d(d)Zddddd%d*d+Zddddd%d,d-Zddddd%d.d/Zdddddd0d1d2Zddddd%d3d4Zddd5dd6d7d8Zddd5dd6d9d:Zdddd5dd;d<d=Z dddd!d%d>d?Z!ddddd%d@dAZ"ddddd%dBdCZ#ddddd%dDdEZ$ddddd%dFdGZ%ddddd%dHdIZ&dddJdKdLZ'ddddd%dMdNZ(ddddd%dOdPZ)ddddd%dQdRZ*dddSdTdUZ+ddddVdWdXZ,ddddVdYdZZ-dd[d\d]d^Z.ddddd%d_d`Z/ddddd%dadbZ0ddddd%dcddZ1ddddd%dedfZ2ddddd%dgdhZ3ddddd%didjZ4dddddkdldmZ5dnddddodpdqZ6ddndddrdsdtZ7ddnddddudvdwZ8dddddxdydzZ9dddddd{d|d}Z:ddddd~ddZ;ddd!dddZ<dddddddZ=ddndddddZ>ddddd"ddZ?ddddZ@dddddddZAdddddddddZBdd ZCdd ZDdd ZEdd ZFdd ZGdd ZHdd ZIdd ZJdd ZKddddddddddd
ddZLddddddddZMdddddddZNdd ZOdd ZPddddddddddZQddddddddddZRdddddddddÄZSddddddddĜddƄZTddddddddĜddȄZUddddddddĜddʄZVddddddddĜdd̄ZWddddddddĜdd΄ZXddddddddĜddЄZYddddddddĜdd҄ZZddԄ Z[ddddddddd՜ddׄZ\dddddd؜ddڄZ]dd܄ Z^dddddߜddZ_ddddddddZ`dddddddZaddnddddZbddnddddZcddnddddZddddddZeddddddddZfdddddddddZgdd ZhdddZiddddddZjddddd dZkdS (      )annotations)ListOptionalSequenceTupleTypeVar   )ir   )core)mathTc                      s   e Zd Z fddZ  ZS )IncompatibleTypeErrorImplc                   s@   || _ || _d| j   d | j  | _tt| | j d S )Nzinvalid operands of type  and )type_atype_b__repr__messagesuperr   __init__)selfr   r   	__class__ J/var/www/html/venv/lib/python3.8/site-packages/triton/language/semantic.pyr      s    z"IncompatibleTypeErrorImpl.__init__)__name__
__module____qualname__r   __classcell__r   r   r   r   r      s   r   intz
ir.builderz	tl.tensor)axisbuilderreturnc                 C  s*   | dkrt d|  t|| tjS )Nr   r
   r   z+program_id axis must be 0, 1, or 2 but got )
ValueErrortltensorZcreate_get_program_idint32r    r!   r   r   r   
program_id   s    r)   c                 C  s*   | dkrt d|  t|| tjS )Nr#   z-num_programs axis must be 0, 1, or 2 but got )r$   r%   r&   Zcreate_get_num_programsr'   r(   r   r   r   num_programs    s    r*   ztl.dtype)a_tyb_tyr"   c                 C  s   | j }|j }| j}|j}||kr0||kr,| S |S |tjjjkrN||krJ| S |S |tjjjkrl||krh|S | S td| d| d S )Nzunexpected signedness r   )int_bitwidthint_signednessr%   dtypeZ
SIGNEDNESSZUNSIGNED	TypeError)r+   r,   Za_rankZb_rankZa_snZb_snr   r   r   integer_promote_impl+   s    r1   bool)r+   r,   
div_or_modr"   c                 C  s   |   s|  rtjS |  s&| r,tjS |  s<| rL|rFtjS tjS |  s\| r|rftjS |  r|| r|tjS tjS | 	 r|	 st
d|  d| |r| j|jkrt
d|   d |  d t| |S )Nunexpected type r   zCannot use /, #, or % with x because they have different signedness;this is unlikely to result in a useful answer. Cast them to the same signedness.)Zis_fp64r%   float64is_fp32float32is_fp16float16is_bf16bfloat16is_intr0   r.   r   r1   )r+   r,   r3   r   r   r   computation_type_impl;   s*    r>   None)r   r   allow_ptr_ar"   c                 C  sF   |   rB|st| ||  r0| |kr0t| || rBt| |d S N)is_ptrr   is_floating)r   r   r@   r   r   r   check_ptr_type_implc   s    

rD   FTzTuple[tl.tensor, tl.tensor])lhsrhsr!   r"   c           
      C  sx   t | ||\} }| jj}|jj}t||| t||| |rp| sp| spt|||}	t| |	|} t||	|}| |fS rA   )broadcast_impl_valuetypescalarrD   rB   r>   cast)
rE   rF   r!   Zallow_lhs_ptrZallow_rhs_ptrZarithmetic_checkr3   Z
lhs_sca_tyZ
rhs_sca_ty
ret_sca_tyr   r   r   binary_op_type_checking_implo   s    rL   )inputotherr!   r"   c                 C  s   t | ||dd\} }| jj}|jj}| r<| r<td| rf| sf||  } }| jj}|jj}| rt|| j|j| jS |	 rt|
| j|j| jS | rt|| j|j| jS td| d S )NTzcannot add pointers togetherr4   )rL   rH   rI   rB   r0   r%   r&   create_addptrhandlerC   Zcreate_faddr=   Z
create_addrM   rN   r!   input_scalar_tyother_scalar_tyr   r   r   add   s     
rT   c                 C  s   t | ||dd\} }| jj}| rDt|| jt||j| jS |	 rft|
| j|j| jS | rt|| j|j| jS td| d S )NTFr4   )rL   rH   rI   rB   r%   r&   rO   rP   minusrC   Zcreate_fsubr=   Z
create_subr0   rM   rN   r!   	scalar_tyr   r   r   sub   s     rX   c                 C  sn   t | ||\} }| jj}| r:t|| j|j| jS | r\t|	| j|j| jS t
d| d S Nr4   )rL   rH   rI   rC   r%   r&   Zcreate_fmulrP   r=   Z
create_mulr0   rV   r   r   r   mul   s    rZ   c                 C  s   t | ||dddd\} }| jj}|jj}| rF| rFt|||}n| rd| rdt| ||} nt| r| rt| tj|} t|tj|}nF| r| r|j|jkrt|||}qt| ||} nt	d| t
|| j|j| jS NFTr4   )rL   rH   rI   rC   r=   rJ   r%   r8   Zfp_mantissa_widthr0   r&   create_fdivrP   rQ   r   r   r   truediv   s     r]   c                 C  s   t | ||dddd\} }| jj}|jj}| r| rt||}t| ||} t|||}| r|t|	| j
|j
| jS t|| j
|j
| jS td| d S r[   )rL   rH   rI   r=   r1   rJ   is_int_signedr%   r&   Zcreate_sdivrP   Zcreate_udivr0   )rM   rN   r!   rR   rS   ret_tyr   r   r   floordiv   s    
r`   )rM   rN   ieee_roundingr!   r"   c                 C  s^   | j j}|j j}| r | s(tdt| ||dddd\} }|| j|j}t|| j S )Nz4both operands of fdiv must have floating scalar typeFT)	rH   rI   rC   r0   rL   r\   rP   r%   r&   )rM   rN   ra   r!   rR   rS   retr   r   r   fdiv   s    rc   c              	   C  s   t | ||dddd\} }| jj}|jj}| r\t| ttjt| |d||d|||}|S |	 r|j
|j
krtd|  d |  d | rt|| j|j| jS t|| j|j| jS td| d S )NFTZ_builderzCannot mod z by r5   r4   )rL   rH   rI   rC   rX   rZ   r   floorrc   r=   r.   r0   r   r^   r%   r&   Zcreate_sremrP   Zcreate_urem)rM   rN   r!   rW   rS   rb   r   r   r   mod   s    ( rf   ztl.PropagateNan)xypropagate_nanr!   c                 C  s   t | ||\} }| j}| rz|tjjkrDt|| j|j| j	S |tjj
krjt|| j|j| j	S td| nR| rt|| j|j| j	S | rt|| j|j| j	S td| d S NzUnexpected propagate_nan Unexpected dtype )rL   r/   rC   r%   PropagateNanALLr&   Zcreate_minimumfrP   rH   NONEZcreate_minnumfr$   r^   Zcreate_minsiis_int_unsignedZcreate_minuir0   rg   rh   ri   r!   r/   r   r   r   minimum  s    rq   c                 C  s   t | ||\} }| j}| rz|tjjkrDt|| j|j| j	S |tjj
krjt|| j|j| j	S td| nR| rt|| j|j| j	S | rt|| j|j| j	S td| d S rj   )rL   r/   rC   r%   rl   rm   r&   Zcreate_maximumfrP   rH   rn   Zcreate_maxnumfr$   r^   Zcreate_maxsiro   Zcreate_maxuir0   rp   r   r   r   maximum  s    rr   )rg   minmaxri   r!   c                 C  sr   t |||\}}t | ||\} }t | ||\} }| j}| r^t|| j|j|j|| jS td| dd S )Nrk   z(. Only floating point clamp is supported)	rL   r/   rC   r%   r&   Zcreate_clampfrP   rH   r0   )rg   rs   rt   ri   r!   r/   r   r   r   clamp%  s     ru   c                 C  sz   t | ||ddd\} }| jj}|jj}| r6| s@t||t||}||kr^t| ||} ||krrt|||}| |fS )NF)rL   rH   rI   r=   r   r1   rJ   )rM   rN   r!   input_sca_tyZother_sca_tyrK   r   r   r   bitwise_op_type_checking_impl6  s    

rw   c                 C  s*   t | ||\} }t|| j|j| jS rA   )rw   r%   r&   Z
create_andrP   rH   rM   rN   r!   r   r   r   and_E  s    ry   c                 C  s*   t | ||\} }t|| j|j| jS rA   )rw   r%   r&   Z	create_orrP   rH   rx   r   r   r   or_J  s    rz   c                 C  s*   t | ||\} }t|| j|j| jS rA   )rw   r%   r&   Z
create_xorrP   rH   rx   r   r   r   xor_O  s    r{   c                 C  sD   | j  st| td|} |j  s8t|td|}t| ||S Nint1)rH   is_int1bitcastr%   r/   ry   rx   r   r   r   logical_andT  s
    

r   c                 C  sD   | j  st| td|} |j  s8t|td|}t| ||S r|   )rH   r~   r   r%   r/   rz   rx   r   r   r   
logical_or\  s
    

r   rM   r!   c                 C  s&   | j  st| td|} t| |S r|   )rH   r~   r   r%   r/   invertr   r   r   r   not_d  s    
r   c                 C  s*   t | ||\} }t|| j|j| jS rA   )rw   r%   r&   Zcreate_lshrrP   rH   rx   r   r   r   lshrj  s    r   c                 C  s*   t | ||\} }t|| j|j| jS rA   )rw   r%   r&   Zcreate_ashrrP   rH   rx   r   r   r   ashro  s    r   c                 C  s*   t | ||\} }t|| j|j| jS rA   )rw   r%   r&   Z
create_shlrP   rH   rx   r   r   r   shlt  s    r   )rM   r"   c                 C  s   | S rA   r   rM   r   r   r   plus~  s    r   )rM   r!   r"   c                 C  sH   | j j}| r$td|  d t||||}t	|| |S )Nz$wrong type argument to unary minus ())
rH   rI   rB   r$   r   r%   r&   get_null_valueto_irrX   )rM   r!   rv   _0r   r   r   rU     s
    rU   c                 C  sP   | j j}| s| r,td|  d t||	||}t
| ||S )Nz%wrong type argument to unary invert (r   )rH   rI   rB   rC   r$   r   r%   r&   Zget_all_ones_valuer   r{   )rM   r!   rv   Z_1r   r   r   r     s
    r   ztl.block_type)vr"   c                 C  s&   | j  stjS | j j}ttj|S rA   )rH   is_blockr%   r}   shape
block_type)r   r   r   r   r   
_bool_like  s    
r   c                 C  s   t | ||\} }| jj}| r<t|| j|jt| S |	 r|
 rht|| j|jt| S t|| j|jt| S td| d S rY   )rL   rH   rI   rC   r%   r&   Zcreate_fcmpOGTrP   r   r=   r^   Zcreate_icmpSGTZcreate_icmpUGTr0   rV   r   r   r   greater_than  s    r   c                 C  s   t | ||\} }| jj}| r<t|| j|jt| S |	 r|
 rht|| j|jt| S t|| j|jt| S td| d S rY   )rL   rH   rI   rC   r%   r&   Zcreate_fcmpOGErP   r   r=   r^   Zcreate_icmpSGEZcreate_icmpUGEr0   rV   r   r   r   greater_equal  s    r   c                 C  s   t | ||\} }| jj}| r<t|| j|jt| S |	 r|
 rht|| j|jt| S t|| j|jt| S td| d S rY   )rL   rH   rI   rC   r%   r&   Zcreate_fcmpOLTrP   r   r=   r^   Zcreate_icmpSLTZcreate_icmpULTr0   rV   r   r   r   	less_than  s    r   c                 C  s   t | ||\} }| jj}| r<t|| j|jt| S |	 r|
 rht|| j|jt| S t|| j|jt| S td| d S rY   )rL   rH   rI   rC   r%   r&   Zcreate_fcmpOLErP   r   r=   r^   Zcreate_icmpSLEZcreate_icmpULEr0   rV   r   r   r   
less_equal  s    r   c                 C  sr   t | ||\} }| jj}| r<t|| j|jt| S |	 r`t|
| j|jt| S td| d S rY   )rL   rH   rI   rC   r%   r&   Zcreate_fcmpOEQrP   r   r=   Zcreate_icmpEQr0   rV   r   r   r   equal  s    r   c                 C  sr   t | ||\} }| jj}| r<t|| j|jt| S |	 r`t|
| j|jt| S td| d S rY   )rL   rH   rI   rC   r%   r&   Zcreate_fcmpUNErP   r   r=   Zcreate_icmpNEr0   rV   r   r   r   	not_equal  s    r   )startendr!   r"   c                 C  s   t | trt |tstdt| d? }t|d? }|s<|rDtd|| krTtd||  }||d @ dkrttd|g}ttj|}t|| ||S )Nz/arange's arguments must be of type tl.constexpr    zarange must fit in int32z=arange's end argument must be greater than the start argumentr
   r   z#arange's range must be a power of 2)	
isinstancer   r$   r2   r%   r   r'   r&   Zcreate_make_range)r   r   r!   Zis_start_int64Zis_end_int64ranger   r_   r   r   r   arange  s    r   z	List[int])r   r/   r!   r"   c                 C  s   t |tjr.|jjdks tdt|||}nP|d kr>td|dkrX||	|}nt
|d|j }||}t||}t|| |S )Nr
   zonly accepts size-1 tensorz2dtype must be specified when value is not a tensorr   get_)r   r%   r&   numelvalueAssertionErrorrJ   r$   r   r   getattrnamesplat)r   r   r/   r!   Zget_value_fnr   r   r   full  s    r   )r   r   r!   r"   c                 C  sF   | j  rtdt|dkr"| S t| j|}t|| j	||S )NzCannot splat a block tensorr   )
rH   r   r   lenr%   r   r/   r&   create_splatrP   )r   r   r!   r_   r   r   r   r     s
    r   )rM   	dst_shapecan_reorderr!   r"   c                 C  sR   d}|D ]}||9 }q| j j|kr*tdt| j j|}t|| j|||S )Nr
   z:reshape() cannot change total number of elements in tensor)	rH   r   r$   r%   r   rI   r&   Zcreate_reshaperP   )rM   r   r   r!   r   sr_   r   r   r   reshape%  s    
r   )rM   r    r!   r"   c                 C  sZ   dd | j D }||d | j s4t| ||dS t| jj|}t|	| j
||S )Nc                 S  s   g | ]}t |qS r   r%   _constexpr_to_value).0rg   r   r   r   
<listcomp>0  s     zexpand_dims.<locals>.<listcomp>r
   )r   r!   )r   insertrH   r   r   r%   r   rI   r&   create_expand_dimsrP   )rM   r    r!   r   r_   r   r   r   expand_dims/  s    
r   )rE   rF   r   r!   r"   c                 C  sX   |st dt| jdkst t| jj| jd |jd  g}t|| j	|j	|S )Nz;current implementation of `cat` always may reorder elementsr
   r   )
r   r   r   r%   r   rH   rI   r&   Z
create_catrP   )rE   rF   r   r!   ret_typer   r   r   cat:  s    "r   )abr!   r"   c                 C  s   t | ||\} }| jg k}|r6t| d|} t|d|}t| jd tjrTtd}nd}| j|g }t| jj|}t	|
| j|j|}|rt|dgd|d}|S )Nr   r   Fr   r!   )rG   r   r   r   r%   	constexprr   rH   rI   r&   Zcreate_joinrP   r   )r   r   r!   Z
was_rank_1two	new_shaper   rb   r   r   r   joinA  s    
r   )r   r!   r"   c                 C  sp   t | jdkstt| jd dks*t| jd d }t| jj|}|| j	\}}t
||t
||fS )Nr   r   r   )r   r   r   r%   r   r   rH   rI   Zcreate_splitrP   r&   )r   r!   r   r   ZoutLHSZoutRHSr   r   r   splitZ  s    

r   z
Tuple[int])rM   dimsr!   r"   c                   s~   t  jt |krtdtdd |D ttt |krJtd| t jj	 fdd|D }t
| j||S )Nz5permute dims must have the same length as input shapec                 s  s   | ]}t |V  qd S rA   r   r   dr   r   r   	<genexpr>j  s     zpermute.<locals>.<genexpr>z?permute dims must be a permutation of 0, 1, ..., n-1, but were c                   s   g | ]} j | qS r   r   r   r   r   r   r   m  s     zpermute.<locals>.<listcomp>)r   r   r$   sortedlistr   r%   r   rH   rI   r&   Zcreate_transrP   )rM   r   r!   r   r   r   r   permuteg  s    "r   )rM   r   r!   r"   c                 C  s   | j  s.t| j |}t|| j||S | j  }t|t|kr\t	d| d| ||krh| S t
|D ]F\}}|| |krp|dkrpt	d||  d| d| d| d| 
qpt| j j|}t|| j||S )Nz!Cannot broadcast, rank mismatch: , r
   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension : )rH   r   r%   r   r&   r   rP   get_block_shapesr   r$   	enumeraterI   create_broadcast)rM   r   r!   r_   Z	src_shapeiitemr   r   r   broadcast_impl_shapeq  s    

,r   c              	   C  sd  | j }|j }| rJ| sJt|j|j}t||j|	 |}n| s| rt|j|j}t|| j|	 |} n| r\| r\|	 }|	 }t
|t
|k rtt
|t
|D ]8}t|| jdt|jdg| } | j }|	 }qn`t
|t
|k rltt
|t
|D ]:}t||jdt|jdg| }|j }|	 }q0t
|t
|kstg }t|D ]p\}	}
||	 }|
dkr|| nH|dks||
kr||
 n(tdt|	 d t|
 d t| q||kr.t|j|}t|| j||} ||kr\t|j|}t||j||}| |fS )Nr   r
   z?Cannot make_shape_compatible: incompatible dimensions at index r   r   )rH   r   r%   r   rI   r   r&   r   rP   r   r   r   r   r   r   appendr$   strr   )rE   rF   r!   Zlhs_tyZrhs_tyZ	lhs_shapeZ	rhs_shape_	ret_shaper   leftrightr_   r   r   r   rG     sf    



rG   zOptional[str]Zrounding_modec                 C  s@   | d krd S | dkrt jjS | dkr,t jjS td|  dd S )NZrtneZrtzzInvalid rounding mode: z0. Supported rounding modes are 'rtne' and 'rtz'.)r	   ROUNDING_MODERTNEZRTZr$   r   r   r   r   _str_to_rounding_mode  s    r   )rM   dst_tyr!   r"   c                 C  s   | j }| r"t|j| j  }||kr.| S |j}|j}| sJ| rVt| ||S |j}|j}||krt	dt
| d t
| t|| j|||S )Nz!Cannot bitcast data-type of size z to data-type of size )rH   r   r%   r   rI   r   rB   rJ   primitive_bitwidthr$   r   r&   create_bitcastrP   r   )rM   r   r!   src_ty
src_sca_ty
dst_sca_tyZsrc_bitsZdst_bitsr   r   r   r     s     r   N)rM   r   r!   fp_downcast_roundingr"   c                 C  s  | j }t|tjr|j}t|tjr*|j}| rFt|j| j  }||krR| S |j}|j}t	|}d}|
 r|
 r|j|jk r|d krtjj}q|tjjkrd}n$|d k	rtdt| d t| | s| r|jjstd| s| r2|jdd k	std|jd | |||dS | rF|
 s`|
 rZ| s`|r~t|| j||||S | r| r| r| st t | tj!|||S |
 o|
 o|j|jk}|rt|"| j|||S |
 o|
 o|j|jk }	|	r@t|#| j|||S |$ r|$ r|j%|j%ksp|j&|j&kr|' o|(  }
|( r| j)|}t|*|| j)}t+| ||S t|,| j|||
|S |- rd|$ rd|( r"| j)|}t|*|| j)}t+| ||S |' rHt|.| j|||S t|/| j|||S |$ r|- r|( s|' st|0| j|||S t|1| j|||S |2 r4|$ r4|j%}|d	krt|3| j|||S |d
kr4t+t | tj4|t|5dtj4|S |$ rd|2 rdt|6| j|||S |2 r|2 rt|7| j|||S dstd|  d| d S )NFTz]fp_downcast_rounding should be set only for truncating fp conversions. Source scalar type is z and destination type is z4fp8e4nv data type is not supported on CUDA arch < 89Zconvert_custom_typesz0target doesn't provide conversion for this type.rd   @   r
   r   zcannot cast z to )8rH   r   r%   r   r   r   r   rI   r   r   rC   r   r	   r   r   r$   r   
is_fp8e4nvoptionsallow_fp8e4nvr   is_fp8e4b15Zcodegen_fnsgetis_fp8r&   Zcreate_fp_to_fprP   r   r9   r7   r;   rJ   r8   Zcreate_fp_truncZcreate_fp_extr=   r-   r.   r^   is_boolr/   r   r   create_int_castZis_standard_floatingZcreate_fp_to_siZcreate_fp_to_uiZcreate_ui_to_fpZcreate_si_to_fprB   Zcreate_ptr_to_intint64	get_int64Zcreate_int_to_ptrr   )rM   r   r!   r   r   r   r   Zuse_custom_roundingZtruncate_fpZext_fpZsign_extendtyr   Zbitwidthr   r   r   rJ     s    
 
 










&rJ   c                 C  sD   t jj}| r@| dkrt jj}n"| dkr0t jj}ntd|  d|S )Nz.ca.cgCache modifier  not supported)r	   CACHE_MODIFIERrn   CACGr$   cache_modifiercacher   r   r   _str_to_load_cache_modifierQ  s    

r   c                 C  sh   t jj}| rd| dkrt jj}nF| dkr0t jj}n4| dkrBt jj}n"| dkrTt jj}ntd|  d|S )Nz.wbr   z.csz.wtr   r   )r	   r   rn   ZWBr   CSZWTr$   r   r   r   r   _str_to_store_cache_modifier]  s    



r   c                 C  sD   t jj}| r@| dkrt jj}n"| dkr0t jj}ntd|  d|S )NZ
evict_lastZevict_firstzEviction policy r   )r	   ZEVICTION_POLICYZNORMALZ
EVICT_LASTZEVICT_FIRSTr$   )eviction_policyevictionr   r   r   _str_to_eviction_policym  s    

r   c                 C  s@   d }| r<| dkrt jj}n"| dkr,t jj}ntd|  d|S )NzeronanzPadding option r   )r	   PADDING_OPTIONZPAD_ZEROPAD_NANr$   )padding_optionpaddingr   r   r   _str_to_padding_optiony  s    

r  c                 C  sh   t jj}| rd| dkrt jj}nF| dkr0t jj}n4| dkrBt jj}n"| dkrTt jj}ntd|  d|S )NacquirereleaseZacq_relrelaxedMemory semantic r   )r	   ZMEM_SEMANTICZACQUIRE_RELEASEZACQUIREZRELEASEZRELAXEDr$   )Z
sem_optionsemr   r   r   _str_to_sem  s    



r  c                 C  sV   t jj}| rR| dkrt jj}n4| dkr0t jj}n"| dkrBt jj}ntd|  d|S )NZgpuZctasysr  r   )r	   ZMEM_SYNC_SCOPEZGPUZCTAZSYSTEMr$   )Zscope_optionscoper   r   r   _str_to_scope  s    


r
  c                 C  s   | rt | ds| g} dd | D } | D ],}t|trNd|  krLt|k s&n tq&t| dksdtt| tt| kstdt| S dS )N__iter__c                 S  s"   g | ]}t |tjr|jn|qS r   r   r%   r   r   r   elemr   r   r   r     s     z0_canonicalize_boundary_check.<locals>.<listcomp>r   z'Duplicate dimension in `boundary_check`r   )hasattrr   r   r   r   setr   )boundary_checkblock_shapedimr   r   r   _canonicalize_boundary_check  s    
*r  c	              
   C  s   |d k	s|d k	rt d| jjj}	|	tjks4td|	 rP|tjj	krPt d| jj}
t
||
 }t|| j||||||
S )NK`mask` and `other` arguments cannot be specified for loading block pointers3`tl.int1` should be rewrited in `tl.make_block_ptr`z@Padding option `nan` is not supported for integer block pointers)r$   rH   
element_tyr%   r}   r   r=   r	   r   r   r  r   r&   Zcreate_tensor_pointer_loadrP   )ptrmaskrN   r  r   r   r   is_volatiler!   elt_tyr   r   r   r   _load_block_pointer  s    
 r  c	              
   C  s  | j j s"td| j   d|d kr:|d k	r:td|sB|rJtd| j  s|rj|j  rjtd|r|j  rtd| j  r|d k	rt|| j  |}|d k	rt|| j  |}| j j}	|	j}
|
t	j
krt	j}
t	|
|	j}	t| |	|} |d k	rt||
|}| j  r0| j  }t	|
|}n|
}|d krXt	|| j||||S t	|| j|j|rt|jnd ||||S d S )NUnsupported ptr type z in `tl.load`z)`other` cannot be provided without `mask`z`padding_option` or `boundary_check` argument is not supported for loading a tensor ofpointers or loading a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadEMask argument cannot be block type if pointer argument is not a blockzFOther argument cannot be block type if pointer argument is not a block)rH   rI   rB   r$   r   r   r   r   r  r%   r}   int8pointer_typeaddress_spacerJ   r   r&   Zcreate_loadrP   Zcreate_masked_load)r  r  rN   r  r   r   r   r  r!   ptr_tyr  r   r   r   r   r   _load_legacy  sH    





r#  zOptional[tl.tensor]r   r   )
r  r  rN   r  r   r   r   r  r!   r"   c	              
   C  sb   t |}	t|}
t|}| j rF| jj rFt| |||||	|
||	S t| |||||	|
||	S d S rA   )	r   r   r  rH   rB   r  r   r  r#  )r  r  rN   r  r   r   r   r  r!   r   r   r   r   r   r   load   s    r$  )desc_ptrr   r   r!   r"   c                 C  s<   t ||dd}|| j|||t|t|}t||S NFrequire_i64)_convert_to_ir_valuesZcreate_descriptor_loadrP   r   r   r   r%   r&   )r%  offsetsr   r   rH   r!   rg   r   r   r   descriptor_load  s    r+  )r%  r   r!   r"   c                 C  s*   t ||dd}t|| j|j|tjS r&  )r)  r%   r&   Zcreate_descriptor_storerP   void)r%  r   r*  r!   r   r   r   descriptor_store  s    r-  c           	   	   C  s   |d k	rt d| jj }|j s2t|||}|j sDtd||j ksntd| d|j  d| jjj|jjkstd| jjj d|jj d| jjj}|tjkstdt	||}t
|||}t|| j|j|||tjS )	Nr  z-Value argument must be block type or a scalarzBlock shape(z) and value shape(z
) mismatchzBlock element type(z) and value element type(r  )r$   rH   r  r   r   r   r   r%   r}   r  rJ   r&   Zcreate_tensor_pointer_storerP   r,  )	r  valr  r  r   r   r!   r  r  r   r   r   _store_block_pointer  s"    
2

r/  c           	   	   C  s4  | j j s"td| j   d|r.td| j  s`|j  rJtd|r`|j  r`td| j  rt|| j  |}|d k	rt|| j  |}| j j}|j}|t	j
krt	j}t	||j}t| ||} t|||}|st	|| j|j||t	jS |j j stdt	|| j|j|j||t	jS )Nr  z in `tl.store`z`boundary_check` argument is not supported for storing a tensor of pointers or storing a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadzFValue argument cannot be block type if pointer argument is not a blockr  z"Mask must have boolean scalar type)rH   rI   rB   r$   r   r   r   r   r  r%   r}   r  r   r!  rJ   r&   Zcreate_storerP   r,  r   Zcreate_masked_store)	r  r.  r  r  r   r   r!   r"  r  r   r   r   _store_legacy;  s2    



r0  )r  r.  r  r   r   r!   r"   c           	      C  sp   t |}t|}| j s&| jj r.td| j rX| jj rXt	| ||||||S t
| ||||||S d S )N"Cannot store to a constant pointer)r   r   rH   is_constrI   r$   rB   r  r   r/  r0  )	r  r.  r  r  r   r   r!   r   r   r   r   r   storeg  s    r3  )r  cmpr.  r  r	  r!   r"   c              	   C  sN   t |}t|}| jjj}|jdkr,tdt|	| j
|j
|j
|||jS )N)   r   r   z9atomic_cas only supports elements with width {16, 32, 64})r  r
  rH   rI   r  r   r$   r%   r&   Zcreate_atomic_casrP   )r  r4  r.  r  r	  r!   r  r   r   r   
atomic_cas}  s    

r6  z&Tuple[tl.tensor, tl.tensor, tl.tensor])r  r.  r  opr!   r"   c                 C  sF  | j j std| j   | j  s4| j j r<td| j jj}|tjkrh|dkrhtd| d |tj	tj
tjtjfkrtd| d t| | j  r|d k	rt|| j  |}|d k	rt|| j  |}t|| j jj|}|s<|d}tj	}| j  r0||| j  }ttj	| j  }t||}| ||fS )Nz)Pointer argument of store instruction is r1  rT   Zatomic_z does not support fp16z does not support T)rH   rI   rB   r$   r   r2  r  r%   r:   r}   r  Zint16r<   r   r   r   r   rJ   Zget_int1r   r   r&   )r  r.  r  r7  r!   r  Zmask_irZmask_tyr   r   r   atom_red_typechecking_impl  s.    


r8  )r  r.  r  r  r	  r!   r"   c                 C  s  t | ||d|\} }}t|}t|}|jj}| r| rft|	t
jj| j|j|j|||jS t|	t
jj| j|j|j|||jS |tjtjhkrtd| tg d||}|tjkrtjntj}t|||}	t| t|d|}
|tjkrtjntj}t|||}t| t|d|}t|||}t|||}t|	t
jj|
j|	jt|||j|||	j}t|	t
jj|j|jt|||j|||j}t||||}t|||S )Nrt   z#atomic_max not supported for dtype         r
   )r8  r  r
  rH   rI   r=   r^   r%   r&   create_atomic_rmwr	   	ATOMIC_OPMAXrP   UMAXr8   r6   r0   r   r'   r   r   r   uint32uint64r   r   ry   UMINwherer  r.  r  r  r	  r!   sca_tyr   Zi_typeZi_valZi_ptrZui_typeZui_valZui_ptrposnegZpos_retZneg_retrb   r   r   r   
atomic_max  sV          rF  c                 C  s  t | ||d|\} }}t|}t|}|jj}| r| rft|	t
jj| j|j|j|||jS t|	t
jj| j|j|j|||jS |tjtjhkrtd| tg d||}|tjkrtjntj}t|||}	t| t|d|}
|tjkrtjntj}t|||}t| t|d|}t|||}t|||}t|	t
jj|
j|	jt|||j|||	j}t|	t
jj|j|jt|||j|||j}t||||}t|||S )Nrs   z#atomic_min not supported for dtype r9  r
   )r8  r  r
  rH   rI   r=   r^   r%   r&   r:  r	   r;  ZMINrP   r@  r8   r6   r0   r   r'   r   r   r   r>  r?  r   r   ry   r=  rA  rB  r   r   r   
atomic_min  sV          rG  c              
   C  sj   t | ||d|\} }}t|}t|}|jj}| r>tjjntjj	}t
||| j|j|j|||jS )NrT   )r8  r  r
  rH   rI   rC   r	   r;  ZFADDZADDr%   r&   r:  rP   )r  r.  r  r  r	  r!   rC  r7  r   r   r   
atomic_add  s    rH  c              
   C  sN   t | ||d|\} }}t|}t|}t|tjj| j	|j	|j	|||j
S )Nand)r8  r  r
  r%   r&   r:  r	   r;  ANDrP   rH   r  r.  r  r  r	  r!   r   r   r   
atomic_and  s     rL  c              
   C  sN   t | ||d|\} }}t|}t|}t|tjj| j	|j	|j	|||j
S )Nor)r8  r  r
  r%   r&   r:  r	   r;  ORrP   rH   rK  r   r   r   	atomic_or   s     rO  c              
   C  sN   t | ||d|\} }}t|}t|}t|tjj| j	|j	|j	|||j
S )Nxor)r8  r  r
  r%   r&   r:  r	   r;  ZXORrP   rH   rK  r   r   r   
atomic_xor  s     rQ  c              
   C  sN   t | ||d|\} }}t|}t|}t|tjj| j	|j	|j	|||j
S )NZxchg)r8  r  r
  r%   r&   r:  r	   r;  ZXCHGrP   rH   rK  r   r   r   atomic_xchg  s     rR  c                 C  sH   |   |jjks(td|jj d|  |  } | dkr<d} ttj| S )Nzinput_precision must be one of z. Got ZTF32X3ZTF32x3)lowerr   Zallowed_dot_input_precisionsr   upperr   r	   ZINPUT_PRECISION)input_precisionr!   r   r   r   _str_to_dot_input_precision  s    rV  )rE   rF   accrU  max_num_imprecise_acc	out_dtyper!   r"   c              
   C  s  dd }| j  r|j  s t|| j|j|j | j sF|j rbt| tj|} t|tj|}|d krr|jj	}t
||}t| j}t|j}	||	  krdksn ||	  krdksn td| j d|j d| jd j|jd	 jks&td
| j d|j d| jd j d|jd	 j d	| jd	 jdkr\| jd jdkr\|jd jdksvtd| j d|j d| j j r| j jtjkstd| jd jdkstd|d}
tj}nd| rtdnP| j j s| j j r
|d}
tj}n"| r|dn|d}
|}| j jd	 }|j jd }|dkrZ| j jd nd }t||rt|||gn||g}|d kr||
|r|||gn||g}n|j}|j |kst|d kr| j r|j r|jj}nd}t| | j|j||||S )Nc                 S  s  |j sT|  s| rtd|  r2| r2d S | |ksPtd|  d| dn(|  sd| r| |kstd|  d| d|  s|  std|  dn|  s| r|jrd	d
dg}nd	d
g}dd }|| |d |||d n|  s&| 	 s&| 
 s&|  s&td|  | s\|	 s\|
 s\| s\td| | |ks|td|  d| dd S )Nz1Dot op does not support fp8e4nv on CUDA arch < 90zFirst input (z) and second input (z) must have the same dtype!z0Both operands must be same type. First operand (z) and second operand (r   z:Both operands must be either int8 or uint8. Operand type (Zfp8e4nvZfp8e5Zfp8e4b15c                   s@   t  fdd|D s<d|}td| d| d  dd S )Nc                 3  s    | ]}t  d |  V  qdS )is_N)r   )r   Z
dtype_namer/   r   r   r   =  s     zLdot.<locals>.assert_dtypes_valid.<locals>._validate_dtype.<locals>.<genexpr>r   zOnly supports z. z (r   )anyr   r   )r/   allowed_typesZoperand_nameZsupported_typesr   r[  r   _validate_dtype<  s    
z9dot.<locals>.assert_dtypes_valid.<locals>._validate_dtypezFirst operandzSecond operandzUnsupported dtype )r   r   r   r   r=   Zis_int8Zis_uint8Zallow_fp8e4b15r9   r;   r7   r~   )Z	lhs_dtypeZ	rhs_dtyper   r]  r^  r   r   r   assert_dtypes_valid*  s4    "
**z dot.<locals>.assert_dtypes_validr      z+Both inputs must be either 2D or 3D; (lhs: z	 vs rhs: r   r   zFirst input shape (z) and second input shape z= are not compatible for matmul (second index of first shape (z0) must be equal to first index of second shape (r5  z0All non-batch values in both first input shape (z) and second input shape (z) must be >= 16!zonly int8 supported!r
   r   zsmall blocks not supported!r   zhout_dtype=bfloat16 is unsupported. Please use out_dtype=float32/float16 and cast with `.to(tl.bfloat16)`)!rH   r   r   r/   r   r   rJ   r%   r:   Zdefault_dot_input_precisionrV  r   r   r   rI   r=   r  	get_int32r'   r;   r$   r7   Zget_fp32r8   r9   Zget_fp16r   r   rP   r   Zmax_num_imprecise_acc_defaultr&   Z
create_dot)rE   rF   rW  rU  rX  rY  r!   r_  Zlhs_rankZrhs_rankr   Zret_scalar_tyMNBr_   Z
acc_handler   r   r   dot'  sf     


F0$


 
"

rf  )	conditionrg   rh   r!   r"   c                 C  s   t | tj|} | j rHt| ||\} }t|||\}}t| ||\} }t|||dd\}}| j svt| ||\} }|j}t|| j	|j	|j	|S )NT)
rJ   r%   r}   rH   r   rG   rL   r&   Zcreate_selectrP   )rg  rg   rh   r!   r   r_   r   r   r   rA    s    

rA  c                 C  s"   |rt ||}n|}t | |S rA   )r%   r   r&   )rg   rW   r   Zres_tyr   r   r   wrap_tensor  s    rh  zSequence[tl.tensor]zTuple[tl.tensor, ...])inputsr    r!   r"   c                   s    d kr"t fddD d d jjt} |k sNtd| d fddtD tfddD std	d
d D  |   t fddt	tD S )Nc                 3  s$   | ]}t ||jjgd  dV  qdS )Tr   N)r   r   r   r   tr!   r   r   r     s     zreduction.<locals>.<genexpr>r   z&reduction axis must be < inputs rank (r   c                   s   g | ]\}}| kr|qS r   r   )r   r   r   )r    r   r   r     s      zreduction.<locals>.<listcomp>c                 3  s   | ]}|j j kV  qd S rA   )rH   r   rj  r   r   r   r     s     z-all reduction inputs must have the same shapec                 S  s   g | ]
}|j qS r   rP   rj  r   r   r   r     s     c                 3  s(   | ] }t | | jjV  qd S rA   rh  Z
get_resultrH   rI   r   r   )ri  	reduce_opr   r   r   r     s     )
tuplerH   r   r   r   r   allZcreate_reduceverifyr   )ri  r    region_builder_fnr!   rankr   )r    r!   ri  rp  r   r   r   	reduction  s    rv  )ri  r    reverser!   r"   c                   s    d j jt}| |  kr*|k sBn td| d| d|dk rR||7 } D ]}|j jksVtdqV|dd  D |||   t fdd	tt D S )
Nr   z
scan axis z must be < inputs rank (r   z(all scan inputs must have the same shapec                 S  s   g | ]
}|j qS r   rm  rj  r   r   r   r     s     z$associative_scan.<locals>.<listcomp>c                 3  s(   | ] }t | | jjV  qd S rA   rn  ro  ri  Zscan_opr   r   r   r     s     z#associative_scan.<locals>.<genexpr>)rH   r   r   r   Zcreate_scanrs  rq  r   )ri  r    rt  rw  r!   ru  rk  r   rx  r   associative_scan  s    .ry  )rM   num_binsr!   r"   c                 C  sJ   t | jdkstd| j s(tdt|| j|t	tj
|fS )Nr
   z histogram only supports 1D inputz%histogram only supports integer input)r   r   r   r/   r=   r%   r&   Zcreate_histogramrP   r   r'   )rM   rz  r!   r   r   r   	histogram  s    r{  )rg   valuesr"   c                 C  s@   t dt| jt|kr td| jdt|| j  | S )Nr
   zAShape of input to multiple_of does not match the length of valuesztt.divisibility)	rt   r   r   r$   rP   set_attrr	   	make_attrget_contextrg   r|  r   r   r   multiple_of  s    r  c                 C  s:   t | jt |krtd| jdt|| j  | S )NzDShape of input to max_contiguous does not match the length of valuesztt.contiguityr   r   r$   rP   r}  r	   r~  r  r  r   r   r   max_contiguous  s    r  c                 C  s:   t | jt |krtd| jdt|| j  | S )NzCShape of input to max_constancy does not match the length of valuesztt.constancyr  r  r   r   r   max_constancy  s    r  )r!   r"   c                 C  s   t |  t jS rA   )r%   r&   Zcreate_barrierr,  rl  r   r   r   debug_barrier  s    r  zList[tl.tensor])prefixargshexr!   r"   c                 C  sx   |  ds|r| d7 } |  ds4|r4| d d d } t| dkrR| dsRd|  } dd |D }t|| ||tjS )N r   r   r   c                 S  s   g | ]
}|j qS r   rm  )r   argr   r   r   r     s     z device_print.<locals>.<listcomp>)endswithr   
startswithr%   r&   Zcreate_printr,  )r  r  r  r!   new_argsr   r   r   device_print  s    r  )condmsg	file_namelinenor!   r"   c              	   C  sP   | j }| s2t|jd}t|| jd|} t|| j||||tj	S )N)r
   )
rH   r   r%   r   rI   r&   r   rP   Zcreate_assertr,  )r  r  r  	func_namer  r!   Zcond_tyr   r   r   device_assert  s
    r  c                 C  s4  t |trt|}t |tjr|rZd|j  kr:dk sNn td|j d| |jS d|j  krpdk sn td|j d| |jS nt |tjr|j	jdkstd	|j
 std
|j
tjkr|r| |j|  |j
 S |j
tjkr|sdstd|jS ds0tdt| d S )Nl         l            z@Block pointers only support 64 bit `shape/strides`, got a value z which is out of the range           zFBlock pointers only support 32 bit `offsets/block_shape`, got a value r
   z*Expected a scalar in shape/strides/offsetsz8Expected an integer scalar type in shape/strides/offsetsFzzBlock pointers only support 32 bit `offsets/block_shape`, add a `.to(tl.int32)` or use regular indexing for 64 bit supportz3Unsupported element type in shape/strides/offsets: )r   r   r%   r   r   r   r   rb  r&   r   r/   r=   r   r   rP   Zget_int64_tyr^   r'   rH   )r!   r  r(  r   r   r   _convert_elem_to_ir_value  s"    

**r  c                   s,   t |dr fdd|D S t |gS )Nr  c                   s   g | ]}t  |qS r   )r  r  r!   r(  r   r   r   %  s     z)_convert_to_ir_values.<locals>.<listcomp>)r  r  )r!   	list_liker(  r   r  r   r)  #  s    
r)  )baser!   r"   c              	     s:  t ||}t ||}t ||dd}| j r8| jj r@td| jjtjkrht| t	tj
| jj|} t dsx g dd  D  tdd  D std	t|ds|g}d
d |D }t|ttt|kstdt fdd||||fD std|| j||| |}t|t	t| jj S )NFr'  zMExpected `base` to be a pointer type (but not a block pointer type or others)r  c                 S  s"   g | ]}t |tjr|jn|qS r   r  r  r   r   r   r   ;  s     z"make_block_ptr.<locals>.<listcomp>c                 s  s0   | ](}t |to&d |  ko"dk n  V  qdS )r  r  N)r   r   r  r   r   r   r   <  s     z!make_block_ptr.<locals>.<genexpr>zGExpected a list of constant integers (`int32_t` range) in `block_shape`c                 S  s"   g | ]}t |tjr|jn|qS r   r  r  r   r   r   r   B  s     z<Expected a permutation of (0, 1, ..., len(order)-1) in orderc                 3  s   | ]}t  t |kV  qd S rA   )r   )r   r  r  r   r   r   F  s     zBExpected shape/strides/offsets/block_shape to have the same length)r)  rH   rB   r  r   r$   r%   r}   rJ   r   r  r!  r  rr  r   r   r   r   r   Zcreate_make_block_ptrrP   r&   r   )r  r   stridesr*  r  orderr!   rP   r   r  r   make_block_ptr)  s,    



 "r  c                 C  s&   t ||dd}t|| j|| jS r&  )r)  r%   r&   Zcreate_advancerP   rH   )r  r*  r!   r   r   r   advanceP  s    r  )FFTF)N)T)l
__future__r   typingr   r   r   r   r   Z_C.libtritonr	    r   r%   r   r   	Exceptionr   r)   r*   r1   r>   rD   rL   rT   rX   rZ   r]   r`   rc   rf   rq   rr   ru   rw   ry   rz   r{   r   r   r   r   r   r   r   rU   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rG   r   r   rJ   r   r   r   r  r  r
  r  r  r#  r$  r+  r-  r/  r0  r3  r6  r8  rF  rG  rH  rL  rO  rQ  rR  rV  rf  rA  rh  rv  ry  r{  r  r  r  r  r  r  r  r)  r  r  r   r   r   r   <module>   s   (    



:
w: 	,	''		`		'