U
    ?h                     @   sT   d Z ddlZddlmZmZ ddlmZ edG dd deZedkrPe	  dS )	a  
Matrix multiplication example via `cuda.jit`.

Reference: https://stackoverflow.com/a/64198479/13697228 by @RobertCrovella

Contents in this file are referenced from the sphinx-generated docs.
"magictoken" is used for markers as beginning and ending of example text.
    N)CUDATestCaseskip_on_cudasim)captured_stdoutz4cudasim doesn't support cuda import at non-top-levelc                       s4   e Zd ZdZ fddZ fddZdd Z  ZS )
TestMatMulzo
    Text matrix multiplication using simple, shared memory/square, and shared
    memory/nonsquare cases.
    c                    s    t  | _| j  t   d S N)r   _captured_stdout	__enter__supersetUpself	__class__ [/var/www/html/venv/lib/python3.8/site-packages/numba/cuda/tests/doc_examples/test_matmul.pyr
      s    
zTestMatMul.setUpc                    s   | j d d d  t   d S r   )r   __exit__r	   tearDownr   r   r   r   r      s    zTestMatMul.tearDownc                    s  ddl mm ddl}ddl}jfdd}|dddg}|ddg}|	ddg}
|}
|}
|}	d}
||jd |
d  }||jd	 |
d	  }||f}|||
f |||	 |	 }t| t||  d j fd
d}|dddg}|ddg}|	ddg}
|}
|}
|}	  f}
||jd |
d  }||jd	 |
d	  }||f}|||
f |||	 |	 }t| t||  d}| j|||| k|d |dddg}|ddg}|	ddg}
|}
|}
|}	  f}
t|jd |jd }t|jd	 |jd	 }|||
d  }|||
d	  }||f}|||
f |||	 |	 }t| t||  d}| j|||| k|d dS )z/Test of matrix multiplication on various cases.r   )cudafloat32Nc                    sn     d\}}||jd k rj||jd k rjd}t| jd D ] }|| ||f |||f  7 }q<||||f< dS )z2Perform square matrix multiplication of C = A * B.   r              N)gridshaperange)ABCijtmpk)r   r   r   matmul)   s    z)TestMatMul.test_ex_matmul.<locals>.matmul      )r#   r#   r   c                    sh  j j  fd}j j  fd}d\}}jj}jj}jj}	d}
t|	D ]}d|||f< d|||f< || jd k r||   | jd k r| |||   f |||f< ||jd k r||   |jd k r|||   |f |||f< 	  t D ]"}|
|||f |||f  7 }
q
	  q^||jd k rd||jd k rd|
|||f< dS )z
            Perform matrix multiplication of C = A * B using CUDA shared memory.

            Reference: https://stackoverflow.com/a/64198479/13697228 by @RobertCrovella
            )r   Zdtyper   r   r   r   N)
Zsharedarrayr   Z	threadIdxxyZgridDimr   r   Zsyncthreads)r   r   r   ZsAZsBr&   r'   ZtxtyZbpgr    r   r   ZTPBr   r   r   r   fast_matmulN   s(    	$$ 
 z.TestMatMul.test_ex_matmul.<locals>.fast_matmulz5fast_matmul incorrect for shared memory, square case.)msgs            z9fast_matmul incorrect for shared memory, non-square case.)Znumbar   r   numpymathZjitZarangeZreshapeZonesZzerosZ	to_deviceceilr   Zcopy_to_hostprintZ
assertTrueallmax)r   npr1   r"   Zx_hZy_hZz_hZx_dZy_dZz_dZthreadsperblockZblockspergrid_xZblockspergrid_yZblockspergridr*   r+   Z
grid_y_maxZ
grid_x_maxr   r)   r   test_ex_matmul    sp    


+





zTestMatMul.test_ex_matmul)__name__
__module____qualname____doc__r
   r   r7   __classcell__r   r   r   r   r      s   r   __main__)
r;   ZunittestZnumba.cuda.testingr   r   Znumba.tests.supportr   r   r8   mainr   r   r   r   <module>   s    