U
    ?hR#  ã                   @   sà   d dl Zd dlmZmZmZmZmZ d dlm	Z	m
Z
mZ d dlmZ dd„ Zdd„ Zd	d
„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd „ Zed!ƒG d"d#„ d#e
ƒƒZed$krÜe	 ¡  dS )%é    N)ÚcudaÚint32Úint64Úfloat32Úfloat64)ÚunittestÚCUDATestCaseÚskip_on_cudasim)Úconfigc                 C   s4   t  d¡}|dkrd| d< t  d¡ | d | |< d S )Né   r   é*   ì   ÿÿ )r   ÚgridZsyncwarp)ÚaryÚi© r   úW/var/www/html/venv/lib/python3.8/site-packages/numba/cuda/tests/cudapy/test_warp_ops.pyÚuseful_syncwarp   s
    

r   c                 C   s$   t  d¡}t  d||¡}|| |< d S ©Nr   r   ©r   r   Z	shfl_sync)r   Úidxr   Úvalr   r   r   Úuse_shfl_sync_idx   s    
r   c                 C   s$   t  d¡}t  d||¡}|| |< d S r   )r   r   Zshfl_up_sync©r   Údeltar   r   r   r   r   Úuse_shfl_sync_up   s    
r   c                 C   s$   t  d¡}t  d||¡}|| |< d S r   )r   r   Zshfl_down_syncr   r   r   r   Úuse_shfl_sync_down   s    
r   c                 C   s$   t  d¡}t  d||¡}|| |< d S r   )r   r   Zshfl_xor_sync)r   Úxorr   r   r   r   r   Úuse_shfl_sync_xor!   s    
r   c                 C   s$   t  d¡}t  d|d¡}|| |< d S ©Nr   r   r   r   )r   Zintor   r   r   r   r   Úuse_shfl_sync_with_val'   s    
r    c                 C   s&   t  d¡}t  d| | ¡}|||< d S r   )r   r   Zall_sync©Úary_inÚary_outr   Úpredr   r   r   Úuse_vote_sync_all-   s    
r%   c                 C   s&   t  d¡}t  d| | ¡}|||< d S r   )r   r   Zany_syncr!   r   r   r   Úuse_vote_sync_any3   s    
r&   c                 C   s&   t  d¡}t  d| | ¡}|||< d S r   )r   r   Zeq_syncr!   r   r   r   Úuse_vote_sync_eq9   s    
r'   c                 C   s    t jj}t  dd¡}|| |< d S )Nr   T©r   Z	threadIdxÚxZballot_sync)r   r   Úballotr   r   r   Úuse_vote_sync_ballot?   s    r+   c                 C   s&   t  d¡}t  d| | ¡}|||< d S r   )r   r   Zmatch_any_sync)r"   r#   r   r*   r   r   r   Úuse_match_any_syncE   s    
r,   c                 C   s2   t  d¡}t  d| | ¡\}}|r&|nd||< d S r   )r   r   Zmatch_all_sync)r"   r#   r   r*   r$   r   r   r   Úuse_match_all_syncK   s    
r-   c                 C   sz   t jj}|d dkr"t  dd¡}nL|d dkr<t  dd¡}n2|d dkrVt  dd¡}n|d d	krnt  d
d¡}|| |< d S )Né   r   éTr   é""""é   éDDDDé   ì   ˆ r(   )Úarrr   r*   r   r   r   Úuse_independent_schedulingQ   s    r6   c                 C   s   t jr
dS t ¡ j| kS d S )NT)r
   ZENABLE_CUDASIMr   Zget_current_deviceZcompute_capability)Úccr   r   r   Ú_safe_cc_check^   s    r8   z2Warp Operations are not yet implemented on cudasimc                   @   s´   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	d
„ Zdd„ Zdd„ Z	dd„ Z
dd„ Zdd„ Ze edƒd¡dd„ ƒZe edƒd¡dd„ ƒZe edƒd¡dd„ ƒZdd„ Zd d!„ Zd"S )#ÚTestCudaWarpOperationsc                 C   sJ   t  d¡tƒ}d}tj|tjd}|d|f |ƒ |  t |dk¡¡ d S )Nzvoid(int32[:])é    ©Údtyper   r   )r   Újitr   ÚnpÚemptyr   Ú
assertTrueÚall©ÚselfÚcompiledÚnelemr   r   r   r   Útest_useful_syncwarpg   s
    z+TestCudaWarpOperations.test_useful_syncwarpc                 C   sP   t  d¡tƒ}d}d}tj|tjd}|d|f ||ƒ |  t ||k¡¡ d S ©Núvoid(int32[:], int32)r:   r.   r;   r   )r   r=   r   r>   r?   r   r@   rA   )rC   rD   rE   r   r   r   r   r   Útest_shfl_sync_idxn   s    z)TestCudaWarpOperations.test_shfl_sync_idxc                 C   st   t  d¡tƒ}d}d}tj|tjd}tj|tjd}||d …  |8  < |d|f ||ƒ |  t ||k¡¡ d S rG   )	r   r=   r   r>   r?   r   Úaranger@   rA   ©rC   rD   rE   r   r   Úexpr   r   r   Útest_shfl_sync_upv   s    z(TestCudaWarpOperations.test_shfl_sync_upc                 C   sv   t  d¡tƒ}d}d}tj|tjd}tj|tjd}|d | …  |7  < |d|f ||ƒ |  t ||k¡¡ d S rG   )	r   r=   r   r>   r?   r   rJ   r@   rA   rK   r   r   r   Útest_shfl_sync_down€   s    z*TestCudaWarpOperations.test_shfl_sync_downc                 C   sd   t  d¡tƒ}d}d}tj|tjd}tj|tjd|A }|d|f ||ƒ |  t ||k¡¡ d S )NrH   r:   é   r;   r   )	r   r=   r   r>   r?   r   rJ   r@   rA   )rC   rD   rE   r   r   rL   r   r   r   Útest_shfl_sync_xorŠ   s    z)TestCudaWarpOperations.test_shfl_sync_xorc                 C   s    t tttf}t  d¡t d¡t tj¡t tj¡f}t||ƒD ]\\}}t |d d … |f¡t	ƒ}d}tj
||jd}|d|f ||ƒ |  t ||k¡¡ q>d S )Néÿÿÿÿl        r:   r;   r   )r   r   r   r   r>   ÚpiÚzipr   r=   r    r?   r<   r@   rA   )rC   ÚtypesÚvaluesÚtypr   rD   rE   r   r   r   r   Útest_shfl_sync_types“   s    
 
ÿz+TestCudaWarpOperations.test_shfl_sync_typesc                 C   sŠ   t  d¡tƒ}d}tj|tjd}tj|tjd}|d|f ||ƒ |  t |dk¡¡ d|d< |d|f ||ƒ |  t |dk¡¡ d S )Núvoid(int32[:], int32[:])r:   r;   r   r   rQ   )	r   r=   r%   r>   Zonesr   r?   r@   rA   ©rC   rD   rE   r"   r#   r   r   r   Útest_vote_sync_allž   s    z)TestCudaWarpOperations.test_vote_sync_allc                 C   s’   t  d¡tƒ}d}tj|tjd}tj|tjd}|d|f ||ƒ |  t |dk¡¡ d|d< d|d< |d|f ||ƒ |  t |dk¡¡ d S )NrX   r:   r;   r   r   r1   é   )	r   r=   r&   r>   Úzerosr   r?   r@   rA   rY   r   r   r   Útest_vote_sync_any©   s    z)TestCudaWarpOperations.test_vote_sync_anyc                 C   s¼   t  d¡tƒ}d}tj|tjd}tj|tjd}|d|f ||ƒ |  t |dk¡¡ d|d< |d|f ||ƒ |  t |dk¡¡ d|d d …< |d|f ||ƒ |  t |dk¡¡ d S )NrX   r:   r;   r   r   )	r   r=   r'   r>   r\   r   r?   r@   rA   rY   r   r   r   Útest_vote_sync_eqµ   s    z(TestCudaWarpOperations.test_vote_sync_eqc                 C   sP   t  d¡tƒ}d}tj|tjd}|d|f |ƒ |  t |t d¡k¡¡ d S )Núvoid(uint32[:])r:   r;   r   r   )r   r=   r+   r>   r?   Úuint32r@   rA   rB   r   r   r   Útest_vote_sync_ballotÃ   s
    z,TestCudaWarpOperations.test_vote_sync_ballot)é   r   z-Matching requires at least Volta Architecturec                 C   sl   t  d¡tƒ}d}tj|tjdd }tj|tjd}t dd¡}|d|f ||ƒ |  t 	||k¡¡ d S )NrX   é
   r;   r1   )iU  iª  r[   r   )
r   r=   r,   r>   rJ   r   r?   Útiler@   rA   )rC   rD   rE   r"   r#   rL   r   r   r   Útest_match_any_syncÊ   s    z*TestCudaWarpOperations.test_match_any_syncc                 C   sŠ   t  d¡tƒ}d}tj|tjd}tj|tjd}|d|f ||ƒ |  t |dk¡¡ d|d< |d|f ||ƒ |  t |dk¡¡ d S )NrX   rc   r;   r   iÿ  r.   r   )	r   r=   r-   r>   r\   r   r?   r@   rA   rY   r   r   r   Útest_match_all_syncÕ   s    z*TestCudaWarpOperations.test_match_all_syncz;Independent scheduling requires at least Volta Architecturec                 C   sN   t  d¡tƒ}tjdtjd}t dd¡}|d |ƒ |  t ||k¡¡ d S )Nr_   r:   r;   )r/   r0   r2   r4   é   ©r   r:   )	r   r=   r6   r>   r?   r`   rd   r@   rA   )rC   rD   r5   rL   r   r   r   Útest_independent_schedulingâ   s
    z2TestCudaWarpOperations.test_independent_schedulingc                 C   sH   t jdd„ ƒ}tjdtjd}|d |ƒ t dd¡}tj ||¡ d S )Nc                 S   s4   t  d¡}|d dkr$t  ¡ | |< nt  ¡ | |< d S )Nr   r1   r   )r   r   Z
activemask©r)   r   r   r   r   Úuse_activemaskí   s    
z>TestCudaWarpOperations.test_activemask.<locals>.use_activemaskr:   r;   rh   )iUUUUl   ª*UU rO   )r   r=   r>   r\   r`   rd   ÚtestingÚassert_equal)rC   rk   ÚoutÚexpectedr   r   r   Útest_activemaskì   s    
z&TestCudaWarpOperations.test_activemaskc                 C   sZ   t jdd„ ƒ}tjdtjd}|d |ƒ tjdd„ tdƒD ƒtjd}tj ||¡ d S )Nc                 S   s   t  d¡}t  ¡ | |< d S )Nr   )r   r   Zlanemask_ltrj   r   r   r   Úuse_lanemask_lt  s    
z@TestCudaWarpOperations.test_lanemask_lt.<locals>.use_lanemask_ltr:   r;   rh   c                 S   s   g | ]}d | d ‘qS )r1   r   r   )Ú.0r   r   r   r   Ú
<listcomp>  s     z;TestCudaWarpOperations.test_lanemask_lt.<locals>.<listcomp>)	r   r=   r>   r\   r`   ZasarrayÚrangerl   rm   )rC   rq   rn   ro   r   r   r   Útest_lanemask_lt  s    
ÿz'TestCudaWarpOperations.test_lanemask_ltN)Ú__name__Ú
__module__Ú__qualname__rF   rI   rM   rN   rP   rW   rZ   r]   r^   ra   r   Z
skipUnlessr8   re   rf   ri   rp   ru   r   r   r   r   r9   e   s0   

	
ÿ
	
ÿ

ÿ
r9   Ú__main__)Únumpyr>   Znumbar   r   r   r   r   Znumba.cuda.testingr   r   r	   Z
numba.corer
   r   r   r   r   r   r    r%   r&   r'   r+   r,   r-   r6   r8   r9   rv   Úmainr   r   r   r   Ú<module>   s,    .