kernel_device Derived Type

type, public, extends(abstract_kernel) :: kernel_device

Device kernel class


Inherits

type~~kernel_device~~InheritsGraph type~kernel_device kernel_device type~abstract_kernel abstract_kernel type~kernel_device->type~abstract_kernel type~cufunction CUfunction type~kernel_device->type~cufunction cuda_kernel type~kernel_type_t kernel_type_t type~kernel_device->type~kernel_type_t internal_kernel_type type~abstract_kernel->type~kernel_type_t kernel_type type~abstract_compressor abstract_compressor type~abstract_kernel->type~abstract_compressor compressor type~string string type~abstract_kernel->type~string kernel_string c_ptr c_ptr type~cufunction->c_ptr ptr

Components

Type Visibility Attributes Name Initial
logical, public :: is_created = .false.

Kernel is created flag.

logical, public :: is_dummy = .false.

If kernel should do anything or not.

logical, public :: is_dummy_kernel = .false.

If kernel is of type KERNEL_DUMMY

logical, public :: is_dummy_compressed = .false.
type(kernel_type_t), public :: kernel_type

Type of the kernel

type(string), public :: kernel_string
integer(kind=int32), public, allocatable :: neighbor_data(:,:)

Neighbor data for pipelined unpacking

integer(kind=int32), public, allocatable :: dims(:)

Local dimensions to process

class(abstract_compressor), public, pointer :: compressor

Compressor pointer. Compressor itself is created by generic handle and passed here

logical, public :: is_compress

Enable compression

logical, public :: is_decompress

Enable decompression

integer(kind=int64), public :: base_storage
type(kernel_type_t), private :: internal_kernel_type

Actual kernel type used for execution, can be different from kernel_type

type(CUfunction), private :: cuda_kernel

Pointer to CUDA kernel.

integer(kind=int32), private :: tile_size

Tile size used for this kernel

integer(kind=int32), private :: block_rows

Number of rows in each block processed by each thread

integer(kind=int64), private :: copy_bytes

Number of bytes to copy for KERNEL_COPY kernel


Type-Bound Procedures

procedure, public, pass(self) :: create

Creates kernel

  • private subroutine create(self, dims, effort, base_storage, kernel_type, neighbor_data, force_effort, with_compression, with_decompression)

    Creates kernel

    Arguments

    Type IntentOptional Attributes Name
    class(abstract_kernel), intent(inout) :: self

    Abstract kernel

    integer(kind=int32), intent(in) :: dims(:)

    Local dimensions to process

    type(dtfft_effort_t), intent(in) :: effort

    Effort level for generating transpose kernels

    integer(kind=int64), intent(in) :: base_storage

    Number of bytes needed to store single element

    type(kernel_type_t), intent(in) :: kernel_type

    Type of kernel to build

    integer(kind=int32), intent(in), optional :: neighbor_data(:,:)

    Optional pointers for unpack kernels

    logical, intent(in), optional :: force_effort

    Should effort be forced or not

    logical, intent(in), optional :: with_compression

    Enable compression after executing kernel

    logical, intent(in), optional :: with_decompression

    Enable decompression before executing kernel

procedure, public, pass(self) :: execute

Executes kernel

  • private subroutine execute(self, in, out, stream, neighbor, aux, csize, csizes, skip_compression, skip_rank, sync)

    Executes kernel

    Arguments

    Type IntentOptional Attributes Name
    class(abstract_kernel), intent(inout) :: self

    Abstract kernel

    type(c_ptr), intent(in) :: in

    Source buffer, can be device or host pointer

    type(c_ptr), intent(in) :: out

    Target buffer, can be device or host pointer

    type(dtfft_stream_t), intent(in) :: stream

    Stream to execute on, used only for device pointers

    integer(kind=int32), intent(in), optional :: neighbor

    Source rank for pipelined unpacking

    type(c_ptr), intent(in), optional :: aux

    Target buffer, can be device or host pointer

    integer(kind=int32), intent(inout), optional :: csize

    Compressed buffer size

    integer(kind=int32), intent(inout), optional :: csizes(:)

    Multiple compression sizes. This should only be used with CUDA backends

    logical, intent(in), optional :: skip_compression

    Skip compression/decompression stage. Should be used when packing/unpacking from itself.

    integer(kind=int32), intent(in), optional :: skip_rank

    Skip compression/decompression for specific rank when neighbor is not specified.

    logical, intent(in), optional :: sync

    Sync stream after packing/compression. Should be used only for fused backends

procedure, public, pass(self) :: destroy

Destroys kernel

  • private subroutine destroy(self)

    Destroys kernel

    Arguments

    Type IntentOptional Attributes Name
    class(abstract_kernel), intent(inout) :: self

    Abstract kernel

procedure, public, pass(self) :: set_compressor

  • private subroutine set_compressor(self, compressor)

    Sets created compressor for the kernel

    Arguments

    Type IntentOptional Attributes Name
    class(abstract_kernel), intent(inout) :: self

    Abstract kernel

    class(abstract_compressor), intent(in), target :: compressor

    Compressor to set

procedure, public :: create_private => create

Creates kernel

  • private subroutine create(self, effort, base_storage, force_effort)

    Creates kernel

    Arguments

    Type IntentOptional Attributes Name
    class(kernel_device), intent(inout) :: self

    Device kernel class

    type(dtfft_effort_t), intent(in) :: effort

    Effort level for generating transpose kernels

    integer(kind=int64), intent(in) :: base_storage

    Number of bytes needed to store single element

    logical, intent(in), optional :: force_effort

    Should effort be forced or not

procedure, public :: execute_private => execute

Executes kernel

  • private subroutine execute(self, in, out, stream, sync, neighbor)

    Executes kernel on stream

    Arguments

    Type IntentOptional Attributes Name
    class(kernel_device), intent(inout) :: self

    Device kernel class

    type(c_ptr), intent(in) :: in

    Source buffer, can be device or host pointer

    type(c_ptr), intent(in) :: out

    Target buffer, can be device or host pointer

    type(dtfft_stream_t), intent(in) :: stream

    Stream to execute on

    logical, intent(in) :: sync

    Sync stream after kernel execution, unused here

    integer(kind=int32), intent(in), optional :: neighbor

    Source rank for pipelined unpacking

procedure, public :: destroy_private => destroy

Destroys kernel

  • private subroutine destroy(self)

    Destroys kernel

    Arguments

    Type IntentOptional Attributes Name
    class(kernel_device), intent(inout) :: self

    Device kernel class