U
    hd                     @   s  d dl Z d dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZ dd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Z d!d" Z!d#d$ Z"d%d& Z#d'd( Z$d)d* Z%d+d, Z&d-d. Z'd/d0 Z(d1d2 Z)d3d4 Z*d5d6 Z+d7d8 Z,d9d: Z-d;d< Z.d=d> Z/d?d@ Z0dAdB Z1ej2dCdDdEdF Z3ej2dCdDdGdH Z4dIdJ Z5dKdL Z6dMdN Z7dOdP Z8dQdR Z9dSdT Z:dUdV Z;dWdX Z<dYdZ Z=d[d\ Z>d]d^ Z?d_d` Z@dadb ZAdcdd ZBdedf ZCdgdh ZDdidj ZEdkdl ZFdmdn ZGdodp ZHdqdr ZIdsdt ZJdudv ZKdwdx ZLdydz ZMd{d| ZNd}d~ ZOdd ZPdd ZQdd ZRdd ZSdd ZTdd ZUdd ZVG dd deZWeXdkreY  dS )    N)cudaint64)compile_ptx)TypingError)f2)unittestCUDATestCaseskip_on_cudasimskip_unless_cc_53skip_unless_cuda_pythonc                 C   s   t jj}|| d< d S Nr   r   	threadIdxxaryi r   `/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/numba/cuda/tests/cudapy/test_intrinsics.pysimple_threadidx   s    r   c                 C   s   t jj}|| |< d S Nr   r   r   r   r   fill_threadidx   s    r   c                 C   s>   t jj}t jj}t jj}|d |d  |d  | |||f< d S N   )r   r   r   yz)r   r   jkr   r   r   fill3d_threadidx   s    r   c                 C   s   t d}|| |< d S r   r   gridr   r   r   r   simple_grid1d   s    
r!   c                 C   s"   t d\}}|| | ||f< d S N   r   )r   r   r   r   r   r   simple_grid2d$   s    r$   c                 C   s(   t d}t d}|dkr$|| d< d S )Nr   r   r   r    gridsize)r   r   r   r   r   r   simple_gridsize1d)   s    

r'   c                 C   s@   t d\}}t d\}}|dkr<|dkr<|| d< || d< d S )Nr#   r   r   r%   )r   r   r   r   r   r   r   r   simple_gridsize2d0   s
    r(   c           	      C   sp   t d\}}t jjt jj }t jjt jj }| j\}}t|||D ]&}t|||D ]}|| | ||f< qTqDd S r"   )r   r    gridDimr   blockDimr   shaperange)	cstartXstartYgridXgridYheightwidthr   r   r   r   r   intrinsic_forloop_step8   s    
r4   c                 C   s   t || d< d S r   )r   Zpopcr   r-   r   r   r   simple_popcC   s    r6   c                 C   s   t |||| d< d S r   )r   fmar   abr-   r   r   r   
simple_fmaG   s    r;   c                 C   s   t j|d |d | d< d S r   r   fp16Zhaddr   r9   r:   r   r   r   simple_haddK   s    r?   c                 C   s   t j||| d< d S r   r<   r>   r   r   r   simple_hadd_scalarO   s    r@   c                 C   s$   t j|d |d |d | d< d S r   r   r=   Zhfmar8   r   r   r   simple_hfmaS   s    rB   c                 C   s   t j|||| d< d S r   rA   r8   r   r   r   simple_hfma_scalarW   s    rC   c                 C   s   t j|d |d | d< d S r   r   r=   Zhsubr>   r   r   r   simple_hsub[   s    rE   c                 C   s   t j||| d< d S r   rD   r>   r   r   r   simple_hsub_scalar_   s    rF   c                 C   s   t j|d |d | d< d S r   r   r=   Zhmulr>   r   r   r   simple_hmulc   s    rH   c                 C   s   t j||| d< d S r   rG   r>   r   r   r   simple_hmul_scalarg   s    rI   c                 C   s   t j||| d< d S r   )r   r=   hdivr>   r   r   r   simple_hdiv_scalark   s    rK   c                 C   s:   t d}|| jk r6|| }|| }t j||| |< d S r   )r   r    sizer=   rJ   )r   Zarray_aZarray_br   r9   r:   r   r   r   simple_hdiv_kernelo   s
    

rM   c                 C   s   t j|d | d< d S r   r   r=   Zhnegr   r9   r   r   r   simple_hnegw   s    rP   c                 C   s   t j|| d< d S r   rN   rO   r   r   r   simple_hneg_scalar{   s    rQ   c                 C   s   t j|d | d< d S r   r   r=   ZhabsrO   r   r   r   simple_habs   s    rS   c                 C   s   t j|| d< d S r   rR   rO   r   r   r   simple_habs_scalar   s    rT   c                 C   s   t j||| d< d S r   )r   r=   Zheqr>   r   r   r   simple_heq_scalar   s    rU   c                 C   s   t j||| d< d S r   )r   r=   hner>   r   r   r   simple_hne_scalar   s    rW   c                 C   s   t j||| d< d S r   )r   r=   hger>   r   r   r   simple_hge_scalar   s    rY   c                 C   s   t j||| d< d S r   )r   r=   Zhgtr>   r   r   r   simple_hgt_scalar   s    rZ   c                 C   s   t j||| d< d S r   )r   r=   hler>   r   r   r   simple_hle_scalar   s    r\   c                 C   s   t j||| d< d S r   r   r=   hltr>   r   r   r   simple_hlt_scalar   s    r_   T)Zdevicec                 C   s   t j| |S r   r]   r   r   r   r   r   
hlt_func_1   s    ra   c                 C   s   t j| |S r   r]   r`   r   r   r   
hlt_func_2   s    rb   c                 C   s   t ||ot||| d< d S r   )ra   rb   rr9   r:   r-   r   r   r   test_multiple_hcmp_1   s    re   c                 C   s    t ||otj||| d< d S r   )ra   r   r=   r^   rc   r   r   r   test_multiple_hcmp_2   s    rf   c                 C   s    t ||otj||| d< d S r   )ra   r   r=   rX   rc   r   r   r   test_multiple_hcmp_3   s    rg   c                 C   s$   t j||ot j||| d< d S r   r]   rc   r   r   r   test_multiple_hcmp_4   s    rh   c                 C   s$   t j||ot j||| d< d S r   )r   r=   r^   rX   rc   r   r   r   test_multiple_hcmp_5   s    ri   c                 C   s   t j||| d< d S r   )r   r=   Zhmaxr>   r   r   r   simple_hmax_scalar   s    rj   c                 C   s   t j||| d< d S r   )r   r=   Zhminr>   r   r   r   simple_hmin_scalar   s    rk   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r    lenr=   Zhsinrd   r   r   r   r   r   simple_hsin   s    
rn   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r    rl   r=   Zhcosrm   r   r   r   simple_hcos   s    
ro   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r    rl   r=   Zhlogrm   r   r   r   simple_hlog   s    
rp   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r    rl   r=   Zhlog2rm   r   r   r   simple_hlog2   s    
rq   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r    rl   r=   Zhlog10rm   r   r   r   simple_hlog10   s    
rr   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r    rl   r=   Zhexprm   r   r   r   simple_hexp   s    
rs   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r    rl   r=   Zhexp2rm   r   r   r   simple_hexp2   s    
rt   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r    rl   r=   Zhsqrtrm   r   r   r   simple_hsqrt   s    
ru   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r    rl   r=   Zhrsqrtrm   r   r   r   simple_hrsqrt  s    
rv   c                 C   s   | d S )Ng      r   )r   dtyper   r   r   numpy_hrsqrt
  s    rx   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r    rl   r=   Zhceilrm   r   r   r   simple_hceil  s    
ry   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r    rl   r=   Zhfloorrm   r   r   r   simple_hfloor  s    
rz   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r    rl   r=   Zhrcprm   r   r   r   simple_hrcp  s    
r{   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r    rl   r=   Zhtruncrm   r   r   r   simple_htrunc#  s    
r|   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r    rl   r=   Zhrintrm   r   r   r   simple_hrint*  s    
r}   c                 C   s   t || d< d S r   )r   ZcbrtrO   r   r   r   simple_cbrt1  s    r~   c                 C   s   t || d< d S r   )r   Zbrevr5   r   r   r   simple_brev5  s    r   c                 C   s   t || d< d S r   )r   Zclzr5   r   r   r   
simple_clz9  s    r   c                 C   s   t || d< d S r   )r   Zffsr5   r   r   r   
simple_ffs=  s    r   c                 C   s   t || d< d S r   roundr5   r   r   r   simple_roundA  s    r   c                 C   s   t ||| d< d S r   r   )r   r-   ndigitsr   r   r   simple_round_toE  s    r   c                 C   sF   t d}| | dkr:|d dkr0|| | |< qBd| |< nd| |< d S )Nr      r#   r         r   )r9   r:   r-   r   r   r   r   branching_with_ifsI  s    

r   c                 C   sB   t d}t |d dk|| d}t | | dk|d| |< d S )Nr   r#   r   r   r   r   )r   r    selp)r9   r:   r-   r   innerr   r   r   branching_with_selpsU  s    
r   c                 C   s   t d}t j| |< d S r   )r   r    Zlaneidr   r   r   r   simple_laneid\  s    
r   c                 C   s   t j| d< d S r   )r   Zwarpsize)r   r   r   r   simple_warpsizea  s    r   c                 C   s   t |  d S r   r   r   r   r   r   nonliteral_gride  s    r   c                 C   s   t |  d S r   )r   r&   r   r   r   r   nonliteral_gridsizei  s    r   c                       s  e Zd Z fddZdd Zdd Zdd Zed	d
d Zed	dd Z	dd Z
dd Zdd Zeddd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zed'd( Zed)d* Zed+d,d- Zed.d/ Zed0d1 Zed+d2d3 Zed4d5 Zed6d7 Zed+d8d9 Zed:d; Z ed<d= Z!ed+d>d? Z"ee#d@dAdB Z$ee#d@dCdD Z%edEdF Z&edGdH Z'ed+dIdJ Z(edKdL Z)edMdN Z*ed+dOdP Z+ee#d@dQdR Z,ee#d@dSdT Z-edUdV Z.edWdX Z/edYdZ Z0ed[d\ Z1d]d^ Z2d_d` Z3dadb Z4edcddde Z5dfdg Z6dhdi Z7djdk Z8dldm Z9edcdndo Z:dpdq Z;drds Z<dtdu Z=dvdw Z>edcdxdy Z?dzd{ Z@d|d} ZAd~d ZBdd ZCdd ZDeddd ZEdd ZFdd ZGeddd ZHdd ZI  ZJS )TestCudaIntrinsicc                    s   t    tjd d S r   )supersetUpnprandomseedself	__class__r   r   r   n  s    
zTestCudaIntrinsic.setUpc                 C   s@   t dt}tjdtjd}|d | | |d dk d S )Nvoid(int32[:])r   rw   r   r   r   )r   jitr   r   onesint32
assertTruer   compiledr   r   r   r   test_simple_threadidxr  s    z'TestCudaIntrinsic.test_simple_threadidxc                 C   sZ   t dt}d}tj|tjd}tj|tjd}|d|f | | t||k d S )Nr   
   r   r   )	r   r   r   r   r   r   aranger   all)r   r   Nr   expr   r   r   test_fill_threadidxx  s    z%TestCudaIntrinsic.test_fill_threadidxc                    sN   d\  fdd} fdd}| }| }|  t||k d S )N)r         c                     s>   t dt} tj ftjd}| d ff | |S )Nzvoid(int32[:,:,::1])r   r   )r   r   r   r   zerosr   r   r   XYZr   r   c_contigous  s    z<TestCudaIntrinsic.test_fill3d_threadidx.<locals>.c_contigousc                     sD   t dt} ttj ftjd}| d ff | |S )Nzvoid(int32[::1,:,:])r   r   )r   r   r   r   Zasfortranarrayr   r   r   r   r   r   f_contigous  s    z<TestCudaIntrinsic.test_fill3d_threadidx.<locals>.f_contigous)r   r   r   )r   r   r   Zc_resZf_resr   r   r   test_fill3d_threadidx  s    
z'TestCudaIntrinsic.test_fill3d_threadidxzCudasim does not check typesc              	   C   s*   |  td tdt W 5 Q R X d S NZRequireLiteralValuezvoid(int32))assertRaisesRegexr   r   r   r   r   r   r   r   test_nonliteral_grid_error  s    z,TestCudaIntrinsic.test_nonliteral_grid_errorc              	   C   s*   |  td tdt W 5 Q R X d S r   )r   r   r   r   r   r   r   r   r   test_nonliteral_gridsize_error  s    z0TestCudaIntrinsic.test_nonliteral_gridsize_errorc                 C   s\   t dt}d\}}|| }tj|tjd}|||f | | t|t|k d S )Nvoid(int32[::1])r      r   )	r   r   r!   r   emptyr   r   r   r   )r   r   ntidnctaidZnelemr   r   r   r   test_simple_grid1d  s    z$TestCudaIntrinsic.test_simple_grid1dc           	      C   s   t dt}d}d}|d |d  |d |d  f}tj|tjd}| }|||f | t|jd D ](}t|jd D ]}|| |||f< q~ql| 	t
||k d S Nzvoid(int32[:,::1])r   r   r   r   r   r   r   )r   r   r$   r   r   r   copyr,   r+   r   r   )	r   r   r   r   r+   r   r   r   r   r   r   r   test_simple_grid2d  s     z$TestCudaIntrinsic.test_simple_grid2dc                 C   sN   t dt}d\}}tjdtjd}|||f | | |d ||  d S )Nr   r   r   r   r   )r   r   r'   r   r   r   assertEqualr   r   r   r   r   r   r   r   test_simple_gridsize1d  s
    z(TestCudaIntrinsic.test_simple_gridsize1dzTests PTX emissionc           
      C   s  t d d  t t d d  f}t|t}t|t}d}d}tjddtj d}| }d|d d< tj|tj d}||df ||| |	|}	| 
d	ttd
|	 tjj||dd tj|tj d}||df ||| |	|}	| 
dttd
|	 tjj||dd d S )N    r      )r+   Z
fill_valuerw   r   r   r   r   r#   z	\s+bra\s+Z	branching)err_msgr   r   )r   r   r   r   r   r   fullr   r   Zinspect_asmr   rl   refindalltestingZassert_array_equal)
r   sigZcu_branching_with_ifsZcu_branching_with_selpsnr:   r-   expectedr9   ptxr   r   r   	test_selp  s$    

zTestCudaIntrinsic.test_selpc                 C   sr   t dt}d}d}tjdtjd}|||f | | |d |d |d   | |d |d |d   d S )Nr   r   r   r#   r   r   r   )r   r   r(   r   r   r   r   r   r   r   r   test_simple_gridsize2d  s    z(TestCudaIntrinsic.test_simple_gridsize2dc              	   C   s   t dt}d}d}|d |d  |d |d  f}tj|tjd}|||f | |\}}|j\}}	tt|d t|d D ]j\}
}||
 ||  }}t||	|D ]B}t|||D ]0}| 	|||f || k|||f || f qqqd S r   )
r   r   r4   r   r   r   r+   zipr,   r   )r   r   r   r   r+   r   r0   r1   r2   r3   r   r   r.   r/   r   r   r   r   r   test_intrinsic_forloop_step  s     
"z-TestCudaIntrinsic.test_intrinsic_forloop_stepc                 C   sF   t jdd }tjdtjdddd}|d | tj|d d S )Nc                 S   s:   t d\}}}t d\}}}|| | | |||f< d S Nr   r%   )outr   r   r   r9   r:   r-   r   r   r   foo  s    z*TestCudaIntrinsic.test_3dgrid.<locals>.fooi  r   	   )r   r   r   r   )r   r   r   r   r   reshaper   Zassert_equal)r   r   arrr   r   r   test_3dgrid  s
    
zTestCudaIntrinsic.test_3dgridc                 C   sZ   t jdd }d\}}}tj|| | tjd|||}|d | | t| d S )Nc           	      S   s   t d\}}}t d\}}}|t jjt jjt jj  kor|t jjt jjt jj  kor|t jjt jjt jj  k}|t jjt j	j ko|t jjt j	j ko|t jjt j	j k}|o|| |||f< d S r   )
r   r    r&   r   r   ZblockIdxr*   r   r   r)   )	r   r   r   r   r9   r:   r-   Zgrid_is_rightZgridsize_is_rightr   r   r   r     s    z,TestCudaIntrinsic.test_3dgrid_2.<locals>.foo)   r      r   ))r   r   r#   )r   r#   r   )r   r   r   r   Zbool_r   r   r   )r   r   r   r   r   r   r   r   r   test_3dgrid_2  s    

"zTestCudaIntrinsic.test_3dgrid_2c                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nvoid(int32[:], uint32)r   r   r      r   r   r   r   r6   r   r   r   assertEqualsr   r   r   r   test_popc_u4  s    zTestCudaIntrinsic.test_popc_u4c                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nzvoid(int32[:], uint64)r   r   r   l        @ r   r   r   r   r   r   r   test_popc_u8  s    zTestCudaIntrinsic.test_popc_u8c                 C   sF   t dt}tjdtjd}|d |ddd tj|d d	 d S )
Nzvoid(f4[:], f4, f4, f4)r   r   r          @      @      @r   r   )r   r   r;   r   r   float32r   assert_allcloser   r   r   r   test_fma_f4  s    zTestCudaIntrinsic.test_fma_f4c                 C   sF   t dt}tjdtjd}|d |ddd tj|d d	 d S )
Nzvoid(f8[:], f8, f8, f8)r   r   r   r   r   r   r   r   )r   r   r;   r   r   float64r   r   r   r   r   r   test_fma_f8"  s    zTestCudaIntrinsic.test_fma_f8c                 C   sl   t dt}tjdtjd}tjdgtjd}tjdgtjd}|d ||| tj|d ||  d S Nvoid(f2[:], f2[:], f2[:])r   r   r   r   r   r   )	r   r   r?   r   r   float16arrayr   r   r   r   r   arg1arg2r   r   r   	test_hadd(  s    zTestCudaIntrinsic.test_haddc                 C   s`   t dt}tjdtjd}td}td}|d ||| || }tj|d | d S )Nvoid(f2[:], f2, f2)r   r   JM!	@r   r   r   )r   r   r@   r   r   r   r   r   r   r   r   r   r   refr   r   r   test_hadd_scalar1  s    

z"TestCudaIntrinsic.test_hadd_scalarz(Compilation unsupported in the simulatorc                 C   s4   t d d  t t f}tt|dd\}}| d| d S )Nr   r   cczadd.f16)r   r   r@   assertInr   argsr   _r   r   r   test_hadd_ptx;  s    zTestCudaIntrinsic.test_hadd_ptxc                 C   s   t dt}tjdtjd}tjdgtjd}tjdgtjd}tjdgtjd}|d |||| tj|d || |  d S )	Nz void(f2[:], f2[:], f2[:], f2[:])r   r   r   r   r   r   r   )	r   r   rB   r   r   r   r   r   r   )r   r   r   r   r   arg3r   r   r   	test_hfmaA  s    zTestCudaIntrinsic.test_hfmac                 C   sp   t dt}tjdtjd}td}td}td}|d |||| || | }tj|d | d S )	Nzvoid(f2[:], f2, f2, f2)r   r   r   r   r   r   r   )r   r   rC   r   r   r   r   r   )r   r   r   r   r   r  r   r   r   r   test_hfma_scalarK  s    


z"TestCudaIntrinsic.test_hfma_scalarc                 C   s6   t d d  t t t f}tt|dd\}}| d| d S )Nr   r  z
fma.rn.f16)r   r   rC   r  r  r   r   r   test_hfma_ptxV  s    zTestCudaIntrinsic.test_hfma_ptxc                 C   sl   t dt}tjdtjd}tjdgtjd}tjdgtjd}|d ||| tj|d ||  d S r   )	r   r   rE   r   r   r   r   r   r   r   r   r   r   	test_hsub\  s    zTestCudaIntrinsic.test_hsubc                 C   s`   t dt}tjdtjd}td}td}|d ||| || }tj|d | d S Nr   r   r   r   gQ?r   r   )r   r   rF   r   r   r   r   r   r   r   r   r   test_hsub_scalare  s    

z"TestCudaIntrinsic.test_hsub_scalarc                 C   s4   t d d  t t f}tt|dd\}}| d| d S )Nr   r  zsub.f16)r   r   rF   r  r  r   r   r   test_hsub_ptxo  s    zTestCudaIntrinsic.test_hsub_ptxc                 C   sj   t  t}tjdtjd}tjdgtjd}tjdgtjd}|d ||| tj|d ||  d S )Nr   r   r   r   r   r   )	r   r   rH   r   r   r   r   r   r   r   r   r   r   	test_hmulu  s    zTestCudaIntrinsic.test_hmulc                 C   s`   t dt}tjdtjd}td}td}|d ||| || }tj|d | d S r  )r   r   rI   r   r   r   r   r   r   r   r   r   test_hmul_scalar~  s    

z"TestCudaIntrinsic.test_hmul_scalarc                 C   s4   t d d  t t f}tt|dd\}}| d| d S )Nr   r  zmul.f16)r   r   rI   r  r  r   r   r   test_hmul_ptx  s    zTestCudaIntrinsic.test_hmul_ptxzNVIDIA Binding needed for NVRTCc                 C   s`   t dt}tjdtjd}td}td}|d ||| || }tj|d | d S r  )r   r   rK   r   r   r   r   r   r   r   r   r   test_hdiv_scalar  s    

z"TestCudaIntrinsic.test_hdiv_scalarc                 C   s   t dt}tjjddddtj}tjjddddtj}tj|tjd}|	|j
||| || }tj|| d S )Nr   i    i  rL   r   )r   r   rM   r   r   randintastyper   
zeros_likeforallrL   r   r   )r   r   Zarry1Zarry2r   r   r   r   r   	test_hdiv  s    zTestCudaIntrinsic.test_hdivc                 C   sV   t dt}tjdtjd}tjdgtjd}|d || tj|d |  d S )Nvoid(f2[:], f2[:])r   r   r   r   r   )	r   r   rP   r   r   r   r   r   r   r   r   r   r   r   r   r   	test_hneg  s
    zTestCudaIntrinsic.test_hnegc                 C   sR   t dt}tjdtjd}td}|d || | }tj|d | d S )Nvoid(f2[:], f2)r   r   r   r   r   )r   r   rQ   r   r   r   r   r   r   r   r   r   r   r   r   r   test_hneg_scalar  s    
z"TestCudaIntrinsic.test_hneg_scalarc                 C   s2   t d d  t f}tt|dd\}}| d| d S )Nr   r  zneg.f16)r   r   rQ   r  r  r   r   r   test_hneg_ptx  s    zTestCudaIntrinsic.test_hneg_ptxc                 C   sV   t  t}tjdtjd}tjdgtjd}|d || tj|d t	| d S )Nr   r         r   r   )
r   r   rS   r   r   r   r   r   r   absr  r   r   r   	test_habs  s
    zTestCudaIntrinsic.test_habsc                 C   sT   t dt}tjdtjd}td}|d || t|}tj|d | d S )Nr  r   r   gJM!	r   r   )	r   r   rT   r   r   r   r#  r   r   r  r   r   r   test_habs_scalar  s    
z"TestCudaIntrinsic.test_habs_scalarc                 C   s2   t d d  t f}tt|dd\}}| d| d S )Nr   r  zabs.f16)r   r   rT   r  r  r   r   r   test_habs_ptx  s    zTestCudaIntrinsic.test_habs_ptxc                 C   s  t ttttttttt	t
tf}ttf}tjtjtjtjtjtjtjtjtjtjtjtf}tjtjf}d}tjd tjjdd|d tj!}t"|}t#||D ]\\}}	| j$|	dB t%&d|}|d|f || |	|tj!d}
tj'(||
 W 5 Q R X qtjjdd|d tj!}t#||D ]^\}}	| j$|	dB t%&d|}|d|f || |	|tj!d}
tj'(||
 W 5 Q R X q"d S )	Nr   r   r  r  fnr  r   r   ))rn   ro   rp   rq   rr   ru   ry   rz   r{   r|   r}   rv   rs   rt   r   sincosloglog2log10sqrtceilfloorZ
reciprocaltruncZrintrx   r   Zexp2r   r   r  r  r   r  r   subTestr   r   r   r   )r   ZkernelsZexp_kernelsZexpected_functionsZexpected_exp_functionsr   r   rd   kernelr(  r   Zx2r   r   r   test_fp16_intrinsics_common  sV                
z-TestCudaIntrinsic.test_fp16_intrinsics_commonc                 C   sf   t  dd }d}tjd tj|tj}t|}|d|f || tj	
|d|  d S )Nc                 S   s.   t d}|t| k r*t j|| | |< d S r   )r   r    rl   r=   Zhexp10rm   r   r   r   hexp10_vectors  s    
z5TestCudaIntrinsic.test_hexp10.<locals>.hexp10_vectorsr   r   r   )r   r   r   r   r   Zrandr  r   r  r   r   )r   r5  r   r   rd   r   r   r   test_hexp10  s    

zTestCudaIntrinsic.test_hexp10c              
   C   s&  t tttttf}tjtjtj	tj
tjtjf}t||D ]\}}| j|d td|}tjdtjd}tjdtjd}td}td}	td}
|d ||	|	 ||	|	}| ||d	  |d ||	|
 ||	|
}| ||d	  |d ||	| ||	|}| ||d	  W 5 Q R X q6d S )
N)opzvoid(b1[:], f2, f2)r   r   r#   r   r   r   r   )rU   rW   rY   rZ   r\   r_   operatoreqnegegtleltr   r2  r   r   r   r   bool8r   r   )r   fnsZopsr(  r7  r3  r   gotr   r  Zarg4r   r   r   test_fp16_comparison  s6        





z&TestCudaIntrinsic.test_fp16_comparisonc              
   C   s   t ttttf}|D ]x}| j|db td|}tj	dtj
d}td}td}td}|d |||| | |d	  W 5 Q R X qd S )
Nr'  zvoid(b1[:], f2, f2, f2)r   r   r   r   r   r   r   )re   rf   rg   rh   ri   r2  r   r   r   r   r?  r   r   )r   Z	functionsr(  r   r   r   r   r  r   r   r   !test_multiple_float16_comparisons+  s    


z3TestCudaIntrinsic.test_multiple_float16_comparisonsc                 C   s   t dt}tjdtjd}td}td}|d ||| tj|d | td}|d ||| tj|d | d S 	Nr   r   r   r   r   r   r   g      @)r   r   rj   r   r   r   r   r   r   r   r   r   	test_hmax<  s    


zTestCudaIntrinsic.test_hmaxc                 C   s   t dt}tjdtjd}td}td}|d ||| tj|d | td}|d ||| tj|d | d S rD  )r   r   rk   r   r   r   r   r   r   r   r   r   	test_hminH  s    


zTestCudaIntrinsic.test_hminc                 C   sJ   t dt}tjdtjd}d}|d || tj|d |d  d S )Nzvoid(float32[:], float32)r   r   r   r   r   UUUUUU?)r   r   r~   r   r   r   r   r   r   r   r   Zcbrt_argr   r   r   test_cbrt_f32T  s
    zTestCudaIntrinsic.test_cbrt_f32c                 C   sJ   t dt}tjdtjd}d}|d || tj|d |d  d S )Nzvoid(float64[:], float64)r   r   g      @r   r   rG  )r   r   r~   r   r   r   r   r   rH  r   r   r   test_cbrt_f64[  s
    zTestCudaIntrinsic.test_cbrt_f64c                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nzvoid(uint32[:], uint32)r   r   r   i0  r   i  )r   r   r   r   r   Zuint32r   r   r   r   r   test_brev_u4b  s    zTestCudaIntrinsic.test_brev_u4z.only get given a Python "int", assumes 32 bitsc                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nzvoid(uint64[:], uint64)r   r   r   l   0  C r   l       `x)r   r   r   r   r   Zuint64r   r   r   r   r   test_brev_u8h  s    zTestCudaIntrinsic.test_brev_u8c                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nvoid(int32[:], int32)r   r   r      r      r   r   r   r   r   r   r   r   r   r   r   test_clz_i4o  s    zTestCudaIntrinsic.test_clz_i4c                 C   s@   t dt}tjdtjd}|d |d | |d d dS )	a  
        Although the CUDA Math API
        (http://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html)
        only says int32 & int64 arguments are supported in C code, the LLVM
        IR input supports i8, i16, i32 & i64 (LLVM doesn't have a concept of
        unsigned integers, just unsigned operations on integers).
        http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics
        r   r   r   r   rN  r   rO  NrP  r   r   r   r   test_clz_u4u  s    	zTestCudaIntrinsic.test_clz_u4c                 C   s@   t dt}tjdtjd}|d |d | |d d d S NrM  r   r   r   l    r   rP  r   r   r   r   test_clz_i4_1s  s    z TestCudaIntrinsic.test_clz_i4_1sc                 C   sB   t dt}tjdtjd}|d |d | |d dd d S )NrM  r   r   r   r   r   CUDA semanticsrP  r   r   r   r   test_clz_i4_0s  s    z TestCudaIntrinsic.test_clz_i4_0sc                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nvoid(int32[:], int64)r   r   r      r   /   rP  r   r   r   r   test_clz_i8  s    zTestCudaIntrinsic.test_clz_i8c                 C   s^   t dt}tjdtjd}|d |d | |d d |d |d | |d d	 d S )
NrM  r   r   r   rN  r              r   r   r   r   r   r   r   r   r   r   r   r   test_ffs_i4  s    zTestCudaIntrinsic.test_ffs_i4c                 C   s^   t dt}tjdtjd}|d |d | |d d |d |d | |d d	 d S )
Nr   r   r   r   rN  r   r[  r\  r   r]  r   r   r   r   test_ffs_u4  s    zTestCudaIntrinsic.test_ffs_u4c                 C   s@   t dt}tjdtjd}|d |d | |d d d S rS  r]  r   r   r   r   test_ffs_i4_1s  s    z TestCudaIntrinsic.test_ffs_i4_1sc                 C   s@   t dt}tjdtjd}|d |d | |d d d S )NrM  r   r   r   r   r]  r   r   r   r   test_ffs_i4_0s  s    z TestCudaIntrinsic.test_ffs_i4_0sc                 C   s^   t dt}tjdtjd}|d |d | |d d |d |d | |d d	 d S )
NrW  r   r   r   rX  r   r   l        !   r]  r   r   r   r   test_ffs_i8  s    zTestCudaIntrinsic.test_ffs_i8c                 C   sj   t dt}d}tj|d tjd}ttjdtjd|}|d|d f | | t	||k d S )Nr   r#   r   r   r   )
r   r   r   r   r   r   Ztiler   r   r   )r   r   countr   r   r   r   r   test_simple_laneid  s    z$TestCudaIntrinsic.test_simple_laneidc                 C   s@   t dt}tjdtjd}|d | | |d dd d S )Nr   r   r   r   r   r   rU  )r   r   r   r   r   r   r   r   r   r   r   test_simple_warpsize  s    z&TestCudaIntrinsic.test_simple_warpsizec                 C   sN   t dt}tjdtjd}dD ]&}|d || | |d t| q"d S )Nzvoid(int64[:], float32)r   r   r"  g      g      g      g      ?g      @g      @g      @r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   test_round_f4  s
    zTestCudaIntrinsic.test_round_f4c                 C   sN   t dt}tjdtjd}dD ]&}|d || | |d t| q"d S )Nzvoid(int64[:], float64)r   r   rg  r   r   rh  ri  r   r   r   test_round_f8  s
    zTestCudaIntrinsic.test_round_f8c              
   C   s   t dt}tjdtjd}tjd tjdtj}t	|t
tjtj tjgf d}t||D ]L\}}| j||d0 |d ||| | j|d	 t||d
d W 5 Q R X qpd S )N void(float32[:], float32, int32)r   r   {   r   )r   r   r#   r   r   r   r   valr   r   r   singleprec)r   r   r   r   r   r   r   r   r  concatenater   infnan	itertoolsproductr2  assertPreciseEqualr   r   r   r   valsdigitsrt  r   r   r   r   test_round_to_f4  s    "	z"TestCudaIntrinsic.test_round_to_f4z$Overflow behavior differs on CPythonc                 C   sT   t dt}tjdtjd}ttjj}d}|d ||| | |d | d S )Nrl  r   r   i,  r   r   )	r   r   r   r   r   r   finfomaxr   r   r   r   rt  r   r   r   r   test_round_to_f4_overflow  s    z+TestCudaIntrinsic.test_round_to_f4_overflowc                 C   sT   t dt}tjdtjd}d}d}|d ||| | j|d t||dd	 d S )
Nrl  r   r   gQ?r   r   r   ru  rv  )r   r   r   r   r   r   r}  r   r  r   r   r   test_round_to_f4_halfway  s    z*TestCudaIntrinsic.test_round_to_f4_halfwayc              
   C   s  t dt}tjdtjd}tjd tjd}t|t	tj
tj
 tjgf d}t||D ]L\}}| j||d0 |d ||| | j|d	 t||d
d W 5 Q R X qhd}d}| j||d0 |d ||| | j|d	 t||dd W 5 Q R X d S )N void(float64[:], float64, int32)r   r   rm  r   )rn  ro  rp  rq  rr  r   r   r#   r   r   r   rs  r   r   exactrv  g`8p=<   double)r   r   r   r   r   r   r   r   rx  r   ry  rz  r{  r|  r2  r}  r   r~  r   r   r   test_round_to_f8  s&    "z"TestCudaIntrinsic.test_round_to_f8c                 C   sT   t dt}tjdtjd}ttjj}d}|d ||| | |d | d S )Nr  r   r   r   r   r   )	r   r   r   r   r   r   r  r  r   r  r   r   r   test_round_to_f8_overflow"  s    z+TestCudaIntrinsic.test_round_to_f8_overflowc                 C   sT   t dt}tjdtjd}d}d}|d ||| | j|d t||dd	 d S )
Nr  r   r   g\(\?r   r   r   r  rv  )r   r   r   r   r   r   r}  r   r  r   r   r   test_round_to_f8_halfway/  s    z*TestCudaIntrinsic.test_round_to_f8_halfway)K__name__
__module____qualname__r   r   r   r   r	   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r
   r   r   r  r	  r
  r  r  r  r  r  r  r  r   r  r  r  r   r!  r$  r%  r&  r4  r6  rB  rC  rE  rF  rI  rJ  rK  rL  rQ  rR  rT  rV  rZ  r^  r_  r`  ra  rc  re  rf  rj  rk  r  r  r  r  r  r  __classcell__r   r   r   r   r   m  s   





	

	




	


	
	






"









r   __main__)Zr{  numpyr   r8  r   Znumbar   r   Z
numba.cudar   Znumba.core.errorsr   Znumba.core.typesr   Znumba.cuda.testingr   r   r	   r
   r   r   r   r   r!   r$   r'   r(   r4   r6   r;   r?   r@   rB   rC   rE   rF   rH   rI   rK   rM   rP   rQ   rS   rT   rU   rW   rY   rZ   r\   r_   r   ra   rb   re   rf   rg   rh   ri   rj   rk   rn   ro   rp   rq   rr   rs   rt   ru   rv   rx   ry   rz   r{   r|   r}   r~   r   r   r   r   r   r   r   r   r   r   r   r   r  mainr   r   r   r   <module>   s   



     S
