U
    hdf;                     @   s  d dl Z d dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ ddlmZ d dlmZmZmZ d dlm Z  d dl!m"Z" G dd de	jZ#e $de j%Z&G dd deZ'G dd deZ(dS )    N)cached_property)ir)typingtypes	debuginfoitanium_manglercgutils)
Dispatcher)NumbaInvalidConfigWarning)BaseContext)MinimalCallConv)	cmathdecl)	datamodel   )nvvm)codegen	nvvmutilsufuncs)cuda_data_manager)warnc                       s$   e Zd Zdd Z fddZ  ZS )CUDATypingContextc                 C   s   ddl m}m}m}m} ddlm}m} | |j	 | |j	 | |j	 | t
j	 | |j	 | |j	 | |j d S )Nr   )cudadeclcudamathlibdevicedeclvector_typesr   )enumdecl
cffi_utils) r   r   r   r   numba.core.typingr   r   install_registryregistryr   Ztyping_registry)selfr   r   r   r   r   r    r"   J/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/numba/cuda/target.pyload_additional_registries   s    z,CUDATypingContext.load_additional_registriesc                    s   ddl m} t|trt||sz
|j}W nj tk
r   |jsHtd|j	 }d|d< |
dd|d< |
dd|d< ||j|}||_|}Y nX tt| |S )	Nr   )CUDADispatcherz<using cpu function on device but its compilation is disabledTZdevicedebugFopt)Znumba.cuda.dispatcherr%   
isinstancer	   Z_CUDATypingContext__dispatcherAttributeErrorZ_can_compile
ValueErrortargetoptionscopygetZpy_funcsuperr   resolve_value_type)r!   valr%   r+   Zdisp	__class__r"   r#   r/   %   s"    



z$CUDATypingContext.resolve_value_type)__name__
__module____qualname__r$   r/   __classcell__r"   r"   r1   r#   r      s   r   z	[^a-z0-9]c                       s   e Zd ZdZdZd* fdd	Zedd Zedd Zd	d
 Z	dd Z
dd Zdd Zedd Zedd Zedd ZdddddZd+ddZdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Z  ZS ),CUDATargetContextTcudac                    s    t  || ttj| _d S N)r.   __init__r   chainr   Zdefault_managerZdata_model_manager)r!   Z	typingctxtargetr1   r"   r#   r:   I   s    zCUDATargetContext.__init__c                 C   s*   t  jrtjS d}tt| tjS d S )Nz3debuginfo is not generated for CUDA toolkits < 11.2)r   NVVM	is_nvvm70r   	DIBuilderr   r
   ZDummyDIBuilder)r!   msgr"   r"   r#   r?   O   s
    
zCUDATargetContext.DIBuilderc                 C   s   dS )NFr"   r!   r"   r"   r#   enable_boundscheckX   s    z$CUDATargetContext.enable_boundscheckc                 C   s   | j |S r9   )_internal_codegenZ_create_empty_module)r!   namer"   r"   r#   create_module^   s    zCUDATargetContext.create_modulec                 C   s   t d| _d | _d S )Nznumba.cuda.jit)r   ZJITCUDACodegenrC   _target_datarA   r"   r"   r#   inita   s    zCUDATargetContext.initc                 C   s   ddl m}m}m} ddl m}m}m} ddl m}m} ddl m	}	 ddl
m}
 ddlm} ddlm} d	d
lm}m}m}m}m} ddlm} | |j | |
j | |j | |j | |	j | |j | |j d S )Nr   )numberstupleobjslicing)rangeobj	iteratorsenumimpl)unicodecharseq)	cmathimpl)cffiimpl)arrayobj)
npdatetimer   )cudaimpl	printimpllibdeviceimplmathimplr   )ndarray)Znumba.cpythonrH   rI   rJ   rK   rL   rM   rN   rO   rP   Z
numba.miscrQ   Znumba.nprR   rS   r   rT   rU   rV   rW   r   Znumba.np.unsaferX   r   r    Zimpl_registry)r!   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   r   rX   r"   r"   r#   r$   e   s     z,CUDATargetContext.load_additional_registriesc                 C   s   | j S r9   )rC   rA   r"   r"   r#   r   }   s    zCUDATargetContext.codegenc                 C   s"   | j d krtt j| _ | j S r9   )rF   llZcreate_target_datar   r=   Zdata_layoutrA   r"   r"   r#   target_data   s    
zCUDATargetContext.target_datac                    s*   ddl m  d}t fdd|D }|S )z
        Some CUDA intrinsics are at the module level, but cannot be treated as
        constants, because they are loaded from a special register in the PTX.
        These include threadIdx, blockDim, etc.
        r   r8   )Z	threadIdxZblockDimZblockIdxZgridDimZlaneidZwarpsizec                    s   g | ]}t  |fqS r"   )r   Module).0ncr[   r"   r#   
<listcomp>   s   z;CUDATargetContext.nonconst_module_attrs.<locals>.<listcomp>)Znumbar8   tuple)r!   Z	nonconstsZnonconsts_with_modr"   r[   r#   nonconst_module_attrs   s    z'CUDATargetContext.nonconst_module_attrsc                 C   s   t | S r9   )CUDACallConvrA   r"   r"   r#   	call_conv   s    zCUDATargetContext.call_convr"   Nabi_tagsuidc                C   s   t j||||dS )Nrd   )r   Zmangle)r!   rD   argtypesre   rf   r"   r"   r#   mangler   s    
zCUDATargetContext.manglerc	              	   C   sV   t j|jdd}	|  j|j d|	||d}
|
| | |
||	||||}|
|fS )a  
        Adapt a code library ``codelib`` with the numba compiled CUDA kernel
        with name ``fname`` and arguments ``argtypes`` for NVVM.
        A new library is created with a wrapper function that can be used as
        the kernel entry point for the given kernel.

        Returns the new code library and the wrapper function.

        Parameters:

        codelib:       The CodeLibrary containing the device function to wrap
                       in a kernel call.
        fndesc:        The FunctionDescriptor of the source function.
        debug:         Whether to compile with debug.
        lineinfo:      Whether to emit line info.
        nvvm_options:  Dict of NVVM options used when compiling the new library.
        filename:      The source filename that the function is contained in.
        linenum:       The source line that the function is on.
        max_registers: The max_registers argument for the code library.
        cudapynsZ_kernel_)Z
entry_namenvvm_optionsmax_registers)r   prepend_namespacellvm_func_namer   Zcreate_libraryrD   Zadd_linking_librarygenerate_kernel_wrapper)r!   Zcodelibfndescr&   lineinforl   filenamelinenumrm   kernel_namelibrarywrapperr"   r"   r#   prepare_cuda_kernel   s"     

  z%CUDATargetContext.prepare_cuda_kernelc           $   
      s  |j }| |}	t|	j}
tt |
}| dttd| j	
tjg|
 }t||j}tj|jdd}t|| t d}|s|r|o| }| j|| |d}| ||j|| |||  fdd}|d	}g }g }d
D ](}||d|  ||d|  q|	| j}| j	||tj||\}}|rt||j |  W 5 Q R X | |!|j" t#|j$j%d}t&' j(r|)|||j*dd}|+|d}nPt|j$|j$|j$|j$g}d}tj||d}|,||||j*g}|-d||}t./|} | |^ t0d
|D ] \}!}"| 1|!}#|2|#|" q8t0d
|D ] \}!}"| 3|!}#|2|#|" qdW 5 Q R X W 5 Q R X |  t&4  |5 |s|r|6  |6  |7 j  S )z
        Generate the kernel wrapper in the given ``library``.
        The function being wrapped is described by ``fndesc``.
        The wrapper function is returned.
        zcuda.kernel.wrapper    ri   rj   r   )modulefilepathZcgctxdirectives_onlyc                    s4    j |  }ttd|}t|jjd |_|S )Nry   )	rD   r   add_global_variabler   IntTypeConstanttypepointeeinitializer)ZpostfixrD   gvZwrapfnZwrapper_moduler"   r#   define_error_gv   s    
zBCUDATargetContext.generate_kernel_wrapper.<locals>.define_error_gvZ__errcode__Zxyzz	__tid%s__z__ctaid%s__N	monotonicr   Z___numba_atomic_i32_cas_hack)rD   z==)8rg   Zget_arg_packerlistZargument_typesr   FunctionTypeZVoidTyperE   r~   rc   Zget_return_typer   ZpyobjectFunctionro   r   rn   rD   Z	IRBuilderZappend_basic_blockr?   Zmark_subprogramargsZmark_locationappendZfrom_argumentsZcall_functionvoidr   Z	if_likelyZis_okZret_voidZif_thennot_Zis_python_excr   r   r   r   r=   r>   ZcmpxchgcodeZextract_valuecallZicmp_unsignedr   ZSRegBuilderziptidstoreZctaidZset_cuda_kernelZadd_ir_modulefinalizeZget_function)$r!   rv   rq   ru   r&   rr   rs   rt   rg   ZarginfoZargtysZwrapfntyZfntyfuncprefixedbuilderr|   r   r   Zgv_excZgv_tidZgv_ctaidiZcallargsstatus_oldZxchgchangedZcasfntyZcas_hackZcasfnZsregdimZptrr0   r"   r   r#   rp      s    



         


$

z)CUDATargetContext.generate_kernel_wrapperc              	      s   |j } fddt|jddD }ttdt|}t||}tj	}t
j||jd|d}	d|	_d	|	_||	_ |j}
 |
}d
|d   |	_ttd}||	|d} | |} fdd|jD } fdd|jD } j||||jj|||j|jdd | S )i
        Unlike the parent version.  This returns a a pointer in the constant
        addrspace.
        c                    s   g | ]}  tj|qS r"   )get_constantr   byte)r]   r   rA   r"   r#   r_   *  s   z9CUDATargetContext.make_constant_array.<locals>.<listcomp>A)order   Z_cudapy_cmem	addrspaceinternalT   r   genericc                    s   g | ]}  tj|qS r"   r   r   Zintpr]   srA   r"   r#   r_   C  s     c                    s   g | ]}  tj|qS r"   r   r   rA   r"   r#   r_   D  s     N)datashapestridesitemsizeparentZmeminfo) rz   itertobytesr   	ArrayTyper~   lenr   r   ADDRSPACE_CONSTANTr   r}   r   linkageglobal_constantr   Zget_data_typeZdtypeZget_abi_sizeof
bit_lengthalignPointerTypeaddrspacecastZ
make_arrayr   r   Zpopulate_arraybitcastr   r   r   Z	_getvalue)r!   r   ZarytyZarrlmodZ	constvalsZ
constarytyZconstaryr   r   Zlldtyper   ZptrtyZgenptrZaryZkshapeZkstridesr"   rA   r#   make_constant_array"  s:    

 z%CUDATargetContext.make_constant_arrayc                 C   s   t |dd }ddt|g}|j|}|dkrdt j||j	|t
jd}d|_d|_||_|j	jj}||t
jS )	r   zutf-8    $Z__conststring__Nr   r   T)r   Zmake_bytearrayencodejoinr   Zmangle_identifierglobalsr-   r}   r   r   r   r   r   r   r   elementr   Z
as_pointer)r!   modstringtextrD   r   Zchartyr"   r"   r#   insert_const_stringM  s    
z%CUDATargetContext.insert_const_stringc                 C   s0   |j }| ||}ttd}|||dS )z
        Insert a constant string in the constant addresspace and return a
        generic i8 pointer to the data.

        This function attempts to deduplicate.
        r   r   )rz   r   r   r   r~   r   )r!   r   r   r   r   Z	charptrtyr"   r"   r#   insert_string_const_addrspacec  s    z/CUDATargetContext.insert_string_const_addrspacec                 C   s   dS )zRun O1 function passes
        Nr"   )r!   r   r"   r"   r#   optimize_functiono  s    z#CUDATargetContext.optimize_functionc                 C   s
   t |S r9   )r   get_ufunc_info)r!   Z	ufunc_keyr"   r"   r#   r   |  s    z CUDATargetContext.get_ufunc_info)r8   )N)r3   r4   r5   Zimplement_powi_as_math_callZstrict_alignmentr:   propertyr?   rB   rE   rG   r$   r   rZ   r   ra   rc   rh   rx   rp   r   r   r   r   r   r6   r"   r"   r1   r#   r7   E   s4   




 
$b+r7   c                   @   s   e Zd ZdS )rb   N)r3   r4   r5   r"   r"   r"   r#   rb     s   rb   ))re	functoolsr   Zllvmlite.bindingZbindingrY   Zllvmliter   Z
numba.corer   r   r   r   r   Znumba.core.dispatcherr	   Znumba.core.errorsr
   Znumba.core.baser   Znumba.core.callconvr   r   r   r   Zcudadrvr   Z
numba.cudar   r   r   Znumba.cuda.modelsr   warningsr   r   compileIZVALID_CHARSr7   rb   r"   r"   r"   r#   <module>   s(   *  =