kernel_host Derived Type

type, public, extends(abstract_kernel) :: kernel_host

Host kernel implementation


Inherits

type~~kernel_host~~InheritsGraph type~kernel_host kernel_host type~abstract_kernel abstract_kernel type~kernel_host->type~abstract_kernel type~dtfft_access_mode_t dtfft_access_mode_t type~kernel_host->type~dtfft_access_mode_t access_mode type~abstract_compressor abstract_compressor type~abstract_kernel->type~abstract_compressor compressor type~kernel_type_t kernel_type_t type~abstract_kernel->type~kernel_type_t kernel_type type~string string type~abstract_kernel->type~string kernel_string

Components

Type Visibility Attributes Name Initial
logical, public :: is_created = .false.

Kernel is created flag.

logical, public :: is_dummy = .false.

If kernel should do anything or not.

logical, public :: is_dummy_kernel = .false.

If kernel is of type KERNEL_DUMMY

logical, public :: is_dummy_compressed = .false.
type(kernel_type_t), public :: kernel_type

Type of the kernel

type(string), public :: kernel_string
integer(kind=int32), public, allocatable :: neighbor_data(:,:)

Neighbor data for pipelined unpacking

integer(kind=int32), public, allocatable :: dims(:)

Local dimensions to process

class(abstract_compressor), public, pointer :: compressor

Compressor pointer. Compressor itself is created by generic handle and passed here

logical, public :: is_compress

Enable compression

logical, public :: is_decompress

Enable decompression

integer(kind=int64), public :: base_storage
type(dtfft_access_mode_t), private :: access_mode

Access mode for kernel execution

procedure(execute_host_interface), private, pointer :: execute_impl => null()

Pointer to the execute implementation


Type-Bound Procedures

procedure, public, pass(self) :: create

Creates kernel

  • private subroutine create(self, dims, effort, base_storage, kernel_type, neighbor_data, force_effort, with_compression, with_decompression)

    Creates kernel

    Arguments

    Type IntentOptional Attributes Name
    class(abstract_kernel), intent(inout) :: self

    Abstract kernel

    integer(kind=int32), intent(in) :: dims(:)

    Local dimensions to process

    type(dtfft_effort_t), intent(in) :: effort

    Effort level for generating transpose kernels

    integer(kind=int64), intent(in) :: base_storage

    Number of bytes needed to store single element

    type(kernel_type_t), intent(in) :: kernel_type

    Type of kernel to build

    integer(kind=int32), intent(in), optional :: neighbor_data(:,:)

    Optional pointers for unpack kernels

    logical, intent(in), optional :: force_effort

    Should effort be forced or not

    logical, intent(in), optional :: with_compression

    Enable compression after executing kernel

    logical, intent(in), optional :: with_decompression

    Enable decompression before executing kernel

procedure, public, pass(self) :: execute

Executes kernel

  • private subroutine execute(self, in, out, stream, neighbor, aux, csize, csizes, skip_compression, skip_rank, sync)

    Executes kernel

    Arguments

    Type IntentOptional Attributes Name
    class(abstract_kernel), intent(inout) :: self

    Abstract kernel

    type(c_ptr), intent(in) :: in

    Source buffer, can be device or host pointer

    type(c_ptr), intent(in) :: out

    Target buffer, can be device or host pointer

    type(dtfft_stream_t), intent(in) :: stream

    Stream to execute on, used only for device pointers

    integer(kind=int32), intent(in), optional :: neighbor

    Source rank for pipelined unpacking

    type(c_ptr), intent(in), optional :: aux

    Target buffer, can be device or host pointer

    integer(kind=int32), intent(inout), optional :: csize

    Compressed buffer size

    integer(kind=int32), intent(inout), optional :: csizes(:)

    Multiple compression sizes. This should only be used with CUDA backends

    logical, intent(in), optional :: skip_compression

    Skip compression/decompression stage. Should be used when packing/unpacking from itself.

    integer(kind=int32), intent(in), optional :: skip_rank

    Skip compression/decompression for specific rank when neighbor is not specified.

    logical, intent(in), optional :: sync

    Sync stream after packing/compression. Should be used only for fused backends

procedure, public, pass(self) :: destroy

Destroys kernel

  • private subroutine destroy(self)

    Destroys kernel

    Arguments

    Type IntentOptional Attributes Name
    class(abstract_kernel), intent(inout) :: self

    Abstract kernel

procedure, public, pass(self) :: set_compressor

  • private subroutine set_compressor(self, compressor)

    Sets created compressor for the kernel

    Arguments

    Type IntentOptional Attributes Name
    class(abstract_kernel), intent(inout) :: self

    Abstract kernel

    class(abstract_compressor), intent(in), target :: compressor

    Compressor to set

procedure, public :: create_private => create_host

Creates kernel

  • private subroutine create_host(self, effort, base_storage, force_effort)

    Creates host kernel

    Arguments

    Type IntentOptional Attributes Name
    class(kernel_host), intent(inout) :: self

    Host kernel class

    type(dtfft_effort_t), intent(in) :: effort

    Effort level for generating transpose kernels

    integer(kind=int64), intent(in) :: base_storage

    Number of bytes needed to store single element

    logical, intent(in), optional :: force_effort

    Should effort be forced or not

procedure, public :: execute_private => execute_host

Executes kernel

  • private subroutine execute_host(self, in, out, stream, sync, neighbor)

    Executes host kernel

    Arguments

    Type IntentOptional Attributes Name
    class(kernel_host), intent(inout) :: self

    Host kernel class

    type(c_ptr), intent(in) :: in

    Source host-allocated buffer

    type(c_ptr), intent(in) :: out

    Target host-allocated buffer

    type(dtfft_stream_t), intent(in) :: stream

    Stream to execute on, unused here

    logical, intent(in) :: sync

    Sync stream after kernel execution, unused here

    integer(kind=int32), intent(in), optional :: neighbor

    Source rank for pipelined unpacking

procedure, public :: destroy_private => destroy_host

Destroys kernel

  • private subroutine destroy_host(self)

    Destroys host kernel

    Arguments

    Type IntentOptional Attributes Name
    class(kernel_host), intent(inout) :: self

    Host kernel class

procedure, public :: execute_benchmark

  • private subroutine execute_benchmark(self, in, out, n_warmup_iters, n_iters, execution_time)

    Executes benchmark for the given kernel

    Arguments

    Type IntentOptional Attributes Name
    class(kernel_host), intent(inout) :: self

    Host kernel class

    type(c_ptr), intent(in) :: in

    Source host-allocated buffer

    type(c_ptr), intent(in) :: out

    Target host-allocated buffer

    integer(kind=int32), intent(in) :: n_warmup_iters

    Number of warmup iterations to perform before testing kernel

    integer(kind=int32), intent(in) :: n_iters

    Number of iterations to perform when testing kernel

    real(kind=real64), intent(out) :: execution_time

    Execution time of the selected access

procedure, public :: select_access_mode_f32

  • private subroutine select_access_mode_f32(self, in, out, n_warmup_iters, n_iters, execution_time)

    Selects the best access mode for host kernels, real(real32) version

    Arguments

    Type IntentOptional Attributes Name
    class(kernel_host), intent(inout) :: self

    Host kernel class

    type(c_ptr), intent(in) :: in

    Source host-allocated buffer

    type(c_ptr), intent(in) :: out

    Target host-allocated buffer

    integer(kind=int32), intent(in) :: n_warmup_iters

    Number of warmup iterations to perform before testing kernel

    integer(kind=int32), intent(in) :: n_iters

    Number of iterations to perform when testing kernel

    real(kind=real64), intent(out) :: execution_time

    Execution time of the selected access

procedure, public :: select_access_mode_f64

  • private subroutine select_access_mode_f64(self, in, out, n_warmup_iters, n_iters, execution_time)

    Selects the best access mode for host kernels, real(real64) version

    Arguments

    Type IntentOptional Attributes Name
    class(kernel_host), intent(inout) :: self

    Host kernel class

    type(c_ptr), intent(in) :: in

    Source host-allocated buffer

    type(c_ptr), intent(in) :: out

    Target host-allocated buffer

    integer(kind=int32), intent(in) :: n_warmup_iters

    Number of warmup iterations to perform before testing kernel

    integer(kind=int32), intent(in) :: n_iters

    Number of iterations to perform when testing kernel

    real(kind=real64), intent(out) :: execution_time

    Execution time of the selected access

procedure, public :: select_access_mode_f128

  • private subroutine select_access_mode_f128(self, in, out, n_warmup_iters, n_iters, execution_time)

    Selects the best access mode for host kernels, complex(real64) version

    Arguments

    Type IntentOptional Attributes Name
    class(kernel_host), intent(inout) :: self

    Host kernel class

    type(c_ptr), intent(in) :: in

    Source host-allocated buffer

    type(c_ptr), intent(in) :: out

    Target host-allocated buffer

    integer(kind=int32), intent(in) :: n_warmup_iters

    Number of warmup iterations to perform before testing kernel

    integer(kind=int32), intent(in) :: n_iters

    Number of iterations to perform when testing kernel

    real(kind=real64), intent(out) :: execution_time

    Execution time of the selected access