U
    hL3                     @  s  d dl mZ ddlmZ ddlmZ ddlmZ dd Zd	d
ddZd	d
ddZ	ej
edd Zej
eeddd Zej
eedd_ddZej
edd Zedd Zedd Zedd Zedd Zed d! Zed"d# Zed$d% Zej
eejd&d'd(d)d`d,d-Zej
eejd.d/d0dad1d2Zed3d4 Zed5d6 Zed7d8 Zed9d: Zej
eejd;d'd(d)dbd<d=Zej
eejd>d/d0dcd?d@ZedAdB Z ej
eedCdddDdCZ!edEdF Z"ej
ej#edGdedHdIZ$ej
ee%dJdfdKdJZ&edLdM Z'ej
ee%dNdgdOdNZ(ed	d	dPdQdRZ)ed	d	d	dSdTdUZ*ej
ed*ej+fd	d	dVdWdXZ,dYdZ Z-ej
edhd[d\Z.ed]d^ Z/d*S )i    )annotations   )jit   )core)mathc                 C  s   t | tjr| jS | S N)
isinstancer   	constexprvalue)o r   J/var/www/html/venv/lib/python3.8/site-packages/triton/language/standard.py_unwrap_if_constexpr
   s    r   zcore.constexpr)ic                 C  s.   d}| j }|dkr$|dL }|d7 }q
t|S )Nr   r   r   r   r
   )r   log2nr   r   r   _log2   s    
r   c                 C  s$   | j }t||d @ dko |dkS )Nr   r   r   )r   r   r   r   r   _is_power_of_two   s    r   c                 C  s   | | d | S )z
    Computes the ceiling division of :code:`x` by :code:`div`

    :param x: the input number
    :type x: Block
    :param div: the divisor
    :param div: Block
    r   r   )xdivr   r   r   cdiv!   s    r   sigmoidc                 C  s   ddt |    S )Nr   )r   expr   r   r   r   r   /   s    softmaxFc                 C  s0   | t | d }t|}t|d}t|||S )Nr   )maxr   r   sumZfdiv)r   Zieee_roundingznumZdenr   r   r   r   6   s    

c                 C  s   t j| | jgddS )zn
    Returns a contiguous flattened view of :code:`x`.

    :param x: the input tensor
    :type x: Block
    T)Zcan_reorder)r   reshapenumelr   r   r   r   ravel@   s    	r#   c                 C  sT   | | | }|| }|| }|| }t || |}|||  }	|| | }
|	|
fS )a  
    Transforms indices of a row-major :code:`size_i * size_j` matrix into those
    of one where the indices are col-major for each group of :code:`size_g`
    rows.

    For example, for :code:`size_i = size_j = 4` and :code:`size_g = 2`, it will
    transform ::

        [[0 , 1 , 2 , 3 ],
         [4 , 5 , 6 , 7 ],
         [8 , 9 , 10, 11],
         [12, 13, 14, 15]]

    into ::

        [[0, 2,  4 , 6 ],
         [1, 3,  5 , 7 ],
         [8, 10, 12, 14],
         [9, 11, 13, 15]]
    r   minimum)r   jZsize_iZsize_jZsize_gZijZsize_gjZgroup_idZoff_iZnew_iZnew_jr   r   r   	swizzle2dL   s    r'   c                 C  s   t | d|S )a'  
    Returns a tensor filled with the scalar value 0 for the given :code:`shape` and :code:`dtype`.

    :param shape: Shape of the new array, e.g., (8, 16) or (8, )
    :type shape: tuple of ints
    :param dtype: Data-type of the new array, e.g., :code:`tl.float16`
    :type dtype: DType
    r   )r   full)shapedtyper   r   r   zeross   s    
r+   c                 C  s   t | j| jS )zS
    Creates a tensor of zeros with the same shape and type as a given tensor.
    )r+   r)   r*   )inputr   r   r   
zeros_like   s    r-   c           	      C  sJ   |r| |ko||k }nd}| |kp$|}t || |}t |||}||fS NFr   where)	value1index1value2index2tie_break_lefttiegtZv_retZi_retr   r   r   _argmax_combine   s    r8   c                 C  s   t | |||dS NTr8   r1   r2   r3   r4   r   r   r   _argmax_combine_tie_break_left   s    r<   c                 C  s   t | |||dS r.   r:   r;   r   r   r   _argmax_combine_tie_break_fast   s    r=   c                 C  s   t | |S r   )r   maximumabr   r   r   _elementwise_max   s    rB   r>   return_indicesreturn_indices_tie_break_left)Zreturn_indices_argtie_break_argNTc                 C  s   t | } |r8|r$t j| |t|dS t j| |t|dS nft | jjt dk rt | j rn| 	t j
} n| j std| 	t j} t j| |t|dS d S N	keep_dims    z"Expecting input to be integer type)r   _promote_bfloat16_to_float32_reduce_with_indicesr<   r=   r
   r*   primitive_bitwidthis_floatingtofloat32is_intAssertionErrorint32reducerB   r,   axisrC   rD   rH   r   r   r   r      s    
r   zmaximum indexr5   )rE   c                 C  s   t | |d||d\}}|S NT)rC   rD   rH   )r   r,   rU   r5   rH   _retr   r   r   argmax   s    rZ   c           	      C  sJ   |r| |ko||k }nd}| |k p$|}t || |}t |||}||fS r.   r/   )	r1   r2   r3   r4   r5   r6   ltZ	value_retZ	index_retr   r   r   _argmin_combine   s    r\   c                 C  s   t | |||dS r9   r\   r;   r   r   r   _argmin_combine_tie_break_left   s    r^   c                 C  s   t | |||dS r.   r]   r;   r   r   r   _argmin_combine_tie_break_fast   s    r_   c                 C  s   t | |S r   r$   r?   r   r   r   _elementwise_min   s    r`   r%   c                 C  s   t | } |r8|r$t j| |t|dS t j| |t|dS n`t | jjdk rt | j rh| 	t j
} n| j sztd| 	t j} t j| |t|dS d S rF   )r   rJ   rK   r^   r_   r
   r*   rL   rM   rN   rO   rP   rQ   rR   rS   r`   rT   r   r   r   min   s    
ra   zminimum indexc                 C  s   t | |d||d\}}|S rV   )ra   rW   r   r   r   argmin   s    rb   c                 C  s   | | S r   r   r?   r   r   r   _sum_combine   s    rc   r   c                 C  s   t | } t j| |t|dS )NrG   )r   rJ   rS   rc   )r,   rU   rH   r   r   r   r     s    
c                 C  s   | |A S r   r   r?   r   r   r   _xor_combine  s    rd   zxor sumc                 C  s<   | j j}| stdtj| |d} tj| |t|||dS )Nz#xor_sum only supported for integers)_builder)rH   re   
_generator)typeZscalarrP   
ValueErrorr   rJ   rS   rd   )r,   rU   rH   re   rf   Z	scalar_tyr   r   r   xor_sum  s
    ri   cumsumc                 C  s   t | } t | |t|S r   )r   rJ   associative_scanrc   r,   rU   reverser   r   r   rj   %  s    
c                 C  s   | | S r   r   r?   r   r   r   _prod_combine1  s    rn   cumprodc                 C  s   t | } t | |t|S r   )r   rJ   rk   rn   rl   r   r   r   ro   6  s    
)r   n_dimsc                 C  s,  | j |? }|d|  dd|| d  g}t| |}tddd d d d f }tt|d|  dd d d d d f |}tt|| dd d d d d f |}	t|| j}t|	| j}	tj| jj	dd}
|j
|
dd}|	j
|
dd}| j
|
dd}|t||	k|A ||A t|A }|j
| jddS )Nr   r   r   T)Zbitwidthsigned)Zbitcast)r"   r   r!   arangebroadcast_tor   r)   Zget_int_dtyper*   rL   rN   r0   r-   )r   flipr   rp   n_outerr)   ymaskleftrightZidtypeZileftZirightZixrY   r   r   r   _compare_and_swapB  s    
,("rz   )stageorderrp   c                 C  s   | j |? }t||k |dkrl|d|d |   dd| g}tttddddddf || j}n|}t|D ]}t| ||||  |} qz| S )zb
    order_type 0 == ascending
    order_type 1 == descending
    order_type 2 == alternating
    r   r   r   N)	r"   r   static_assertr!   rs   rr   r)   static_rangerz   )r   r{   r|   rp   ru   r)   rt   r   r   r   r   _bitonic_mergeV  s    
.r   )dim
descendingc                 C  sv   |d krt | jd n|}t|t | jd kd t| j| }td|d D ]}t| |||k rhdn||} qR| S )Nr   z+only minor dimension is currently supportedr   )lenr)   r   r}   r   r~   r   )r   r   r   Z_dimrp   r   r   r   r   sorto  s    r   c                 C  sF   t | } t |}| d kr$t|d } | t|d ks<tdt| S )Nr   z2Currently only support flipping the last dimension)r   r   rQ   r   r
   )r   r)   r   r   r   _get_flip_dim  s    r   c           	      C  s  t t| jt|| j  t t| j t| j}t| jt| jt|| j  }t | dg| }t ||}t 	dddddf dt 	dd k}t 
||D ]T}|}t 
d|d D ]$}||kr||d krt ||}qt|| |d dd}qt || j} | S )z
    Flips a tensor `x` along the dimension `dim`.

    :param x: the first input tensor
    :type x: Block
    :param dim: the dimension to flip along (currently only final dimension supported)
    :type dim: int
    r   r   Nr   TrG   )r   r}   r   r)   r   r"   r   r!   Zexpand_dimsrr   r~   r   )	r   r   Zstepsstartrv   rt   r   Zflip2r&   r   r   r   rt     s    
 (rt   c                 C  sX   t | |}t|jtstt|jdkr.|S t ||jdd d|jd  g S dS )z
    Interleaves the values of two tensors along their last dimension.

    The two tensors must have the same shape.

    Equivalent to `tl.join(a, b).reshape(a.shape[-1:] + [2 * a.shape[-1]])`
    r   Nr   )r   joinr	   r)   listrQ   r   r!   )r@   rA   cr   r   r   
interleave  s
    	r   )F)NFTF)TF)NFTF)TF)NF)NFNN)r   F)r   F)N)0
__future__r   Zruntime.jitr    r   r   r   r   r   Z_tensor_member_fnr   Z_add_math_1arg_docstrr   r   r#   r'   r+   r-   r8   r<   r=   rB   Z_add_reduction_docstrr   rZ   r\   r^   r_   r`   ra   rb   rc   r   rd   builtinri   Z_add_scan_docstrrj   rn   ro   rz   r   ZCONSTEXPR_0r   r   rt   r   r   r   r   r   <module>   s   	


&












	
		