nvrtc_kernel Derived Type

type, public :: nvrtc_kernel

nvRTC Compiled kernel class


Inherits

type~~nvrtc_kernel~~InheritsGraph type~nvrtc_kernel nvrtc_kernel c_ptr c_ptr type~nvrtc_kernel->c_ptr device_pointers type~cufunction CUfunction type~nvrtc_kernel->type~cufunction cuda_kernel type~dim3 dim3 type~nvrtc_kernel->type~dim3 blocks, threads type~kernel_type_t kernel_type_t type~nvrtc_kernel->type~kernel_type_t kernel_type type~kernelargs kernelArgs type~nvrtc_kernel->type~kernelargs kernelParams type~cufunction->c_ptr ptr type~kernelargs->c_ptr ptrs

Inherited by

type~~nvrtc_kernel~~InheritedByGraph type~nvrtc_kernel nvrtc_kernel type~abstract_backend abstract_backend type~abstract_backend->type~nvrtc_kernel unpack_kernel, unpack_kernel2 type~transpose_handle_cuda transpose_handle_cuda type~transpose_handle_cuda->type~nvrtc_kernel transpose_kernel, unpack_kernel, unpack_kernel2 type~transpose_handle_cuda->type~abstract_backend comm_handle type~backend_cufftmp backend_cufftmp type~backend_cufftmp->type~abstract_backend type~backend_mpi backend_mpi type~backend_mpi->type~abstract_backend type~backend_nccl backend_nccl type~backend_nccl->type~abstract_backend type~transpose_plan_cuda transpose_plan_cuda type~transpose_plan_cuda->type~transpose_handle_cuda fplans, bplans

Components

Type Visibility Attributes Name Initial
logical, private :: is_created = .false.

Kernel is created flag.

logical, private :: is_dummy = .false.

If kernel should do anything or not.

type(CUfunction), private :: cuda_kernel

Pointer to CUDA kernel.

type(dim3), private :: blocks

Grid of blocks.

type(dim3), private :: threads

Thread block.

type(kernel_type_t), private :: kernel_type

Type of kernel to execute.

type(kernelArgs), private :: kernelParams

Kernel arguments.

integer(kind=int32), private, allocatable :: pointers(:,:)

Optional pointers that hold info about counts and displacements in KERNEL_UNPACK_PIPELINED kernel.

type(c_ptr), private :: device_pointers(3)

Device pointers for kernel arguments.

logical, private :: has_device_pointers

Flag indicating if device pointers are present

integer(kind=int64), private :: copy_bytes

Number of bytes to copy for KERNEL_UNPACK_SIMPLE_COPY kernel


Type-Bound Procedures

procedure, public, pass(self) :: create

Creates kernel

  • private subroutine create(self, comm, dims, effort, base_storage, transpose_type, kernel_type, pointers, force_effort)

    Creates kernel

    Arguments

    Type IntentOptional Attributes Name
    class(nvrtc_kernel), intent(inout) :: self

    nvRTC Compiled kernel class

    type(MPI_Comm), intent(in) :: comm

    MPI Communicator

    integer(kind=int32), intent(in) :: dims(:)

    Local dimensions to process

    type(dtfft_effort_t), intent(in) :: effort

    Effort level for generating transpose kernels

    integer(kind=int64), intent(in) :: base_storage

    Number of bytes needed to store single element

    type(dtfft_transpose_t), intent(in) :: transpose_type

    Type of transposition to perform

    type(kernel_type_t), intent(in) :: kernel_type

    Type of kernel to build

    integer(kind=int32), intent(in), optional :: pointers(:,:)

    Optional pointers to unpack kernels

    logical, intent(in), optional :: force_effort

    Should effort be forced or not

procedure, public, pass(self) :: execute

Executes kernel

  • private subroutine execute(self, in, out, stream, source)

    Executes kernel on stream

    Arguments

    Type IntentOptional Attributes Name
    class(nvrtc_kernel), intent(inout) :: self

    nvRTC Compiled kernel class

    real(kind=real32), intent(in), target :: in(:)

    Source pointer

    real(kind=real32), intent(in), target :: out(:)

    Target pointer

    type(dtfft_stream_t), intent(in) :: stream

    CUDA Stream

    integer(kind=int32), intent(in), optional :: source

    Source rank for pipelined unpacking

procedure, public, pass(self) :: destroy

Destroys kernel

  • private subroutine destroy(self)

    Destroys kernel

    Arguments

    Type IntentOptional Attributes Name
    class(nvrtc_kernel), intent(inout) :: self

    nvRTC Compiled kernel class