dtfft_nvrtc_kernel_cache Module


Uses

  • module~~dtfft_nvrtc_kernel_cache~~UsesGraph module~dtfft_nvrtc_kernel_cache dtfft_nvrtc_kernel_cache iso_c_binding iso_c_binding module~dtfft_nvrtc_kernel_cache->iso_c_binding iso_fortran_env iso_fortran_env module~dtfft_nvrtc_kernel_cache->iso_fortran_env module~dtfft_config dtfft_config module~dtfft_nvrtc_kernel_cache->module~dtfft_config module~dtfft_interface_cuda dtfft_interface_cuda module~dtfft_nvrtc_kernel_cache->module~dtfft_interface_cuda module~dtfft_interface_cuda_runtime dtfft_interface_cuda_runtime module~dtfft_nvrtc_kernel_cache->module~dtfft_interface_cuda_runtime module~dtfft_parameters dtfft_parameters module~dtfft_nvrtc_kernel_cache->module~dtfft_parameters module~dtfft_utils dtfft_utils module~dtfft_nvrtc_kernel_cache->module~dtfft_utils mpi_f08 mpi_f08 module~dtfft_nvrtc_kernel_cache->mpi_f08 module~dtfft_config->iso_c_binding module~dtfft_config->iso_fortran_env module~dtfft_config->module~dtfft_interface_cuda_runtime module~dtfft_config->module~dtfft_parameters module~dtfft_config->module~dtfft_utils module~dtfft_config->mpi_f08 module~dtfft_errors dtfft_errors module~dtfft_config->module~dtfft_errors module~dtfft_interface_cuda->iso_c_binding module~dtfft_interface_cuda->iso_fortran_env module~dtfft_interface_cuda->module~dtfft_parameters module~dtfft_interface_cuda->module~dtfft_utils module~dtfft_interface_cuda->module~dtfft_errors module~dtfft_interface_cuda_runtime->iso_c_binding module~dtfft_interface_cuda_runtime->module~dtfft_parameters module~dtfft_interface_cuda_runtime->module~dtfft_utils module~dtfft_parameters->iso_c_binding module~dtfft_parameters->iso_fortran_env module~dtfft_parameters->mpi_f08 module~dtfft_utils->iso_c_binding module~dtfft_utils->iso_fortran_env module~dtfft_utils->module~dtfft_parameters module~dtfft_utils->mpi_f08 module~dtfft_utils->module~dtfft_errors module~dtfft_errors->iso_fortran_env

Used by

  • module~~dtfft_nvrtc_kernel_cache~~UsedByGraph module~dtfft_nvrtc_kernel_cache dtfft_nvrtc_kernel_cache module~dtfft_nvrtc_kernel dtfft_nvrtc_kernel module~dtfft_nvrtc_kernel->module~dtfft_nvrtc_kernel_cache module~dtfft_transpose_plan_cuda dtfft_transpose_plan_cuda module~dtfft_transpose_plan_cuda->module~dtfft_nvrtc_kernel_cache module~dtfft_abstract_backend dtfft_abstract_backend module~dtfft_transpose_plan_cuda->module~dtfft_abstract_backend module~dtfft_abstract_transpose_plan dtfft_abstract_transpose_plan module~dtfft_transpose_plan_cuda->module~dtfft_abstract_transpose_plan module~dtfft_transpose_handle_cuda dtfft_transpose_handle_cuda module~dtfft_transpose_plan_cuda->module~dtfft_transpose_handle_cuda module~dtfft_abstract_backend->module~dtfft_nvrtc_kernel module~dtfft_abstract_transpose_plan->module~dtfft_nvrtc_kernel module~dtfft_abstract_transpose_plan->module~dtfft_abstract_backend module~dtfft_plan dtfft_plan module~dtfft_plan->module~dtfft_transpose_plan_cuda module~dtfft_plan->module~dtfft_abstract_transpose_plan module~dtfft_transpose_plan_host dtfft_transpose_plan_host module~dtfft_plan->module~dtfft_transpose_plan_host module~dtfft_transpose_handle_cuda->module~dtfft_nvrtc_kernel module~dtfft_transpose_handle_cuda->module~dtfft_abstract_backend module~dtfft_backend_cufftmp_m dtfft_backend_cufftmp_m module~dtfft_transpose_handle_cuda->module~dtfft_backend_cufftmp_m module~dtfft_backend_mpi dtfft_backend_mpi module~dtfft_transpose_handle_cuda->module~dtfft_backend_mpi module~dtfft_backend_nccl_m dtfft_backend_nccl_m module~dtfft_transpose_handle_cuda->module~dtfft_backend_nccl_m module~dtfft dtfft module~dtfft->module~dtfft_plan module~dtfft_api dtfft_api module~dtfft_api->module~dtfft_plan module~dtfft_backend_cufftmp_m->module~dtfft_abstract_backend module~dtfft_backend_mpi->module~dtfft_abstract_backend module~dtfft_backend_nccl_m->module~dtfft_abstract_backend module~dtfft_transpose_plan_host->module~dtfft_abstract_transpose_plan

Variables

Type Visibility Attributes Name Initial
type(nvrtc_cache), public, save :: cache

Cache of compiled kernels

integer(kind=int32), private, parameter :: CACHE_PREALLOC_SIZE = 10

Number of preallocated cache entries


Derived Types

type, private ::  nvrtc_cache_entry

Cache entry for a compiled kernel

Components

Type Visibility Attributes Name Initial
integer(kind=int32), private :: ref_count = 0

Number of references to this kernel

type(CUmodule), private :: cuda_module = CUmodule(c_null_ptr)

Pointer to CUDA Module.

type(CUfunction), private :: cuda_kernel = CUfunction(c_null_ptr)

Pointer to CUDA kernel.

type(kernel_type_t), private :: kernel_type

Type of kernel to execute.

type(dtfft_transpose_t), private :: transpose_type

Type of transpose

integer(kind=int32), private :: tile_size

Tile size of transpose kernel

integer(kind=int32), private :: padding

Padding size of transpose kernel

integer(kind=int64), private :: base_storage

Number of bytes needed to store single element

type, private ::  nvrtc_cache

Cache for compiled kernels

Components

Type Visibility Attributes Name Initial
logical, private :: is_created = .false.

Flag indicating if cache is created

type(nvrtc_cache_entry), private, allocatable :: cache(:)

Cache entries

integer(kind=int32), private :: size

Number of entries in cache

Type-Bound Procedures

procedure, public, pass(self) :: create ../../

Creates cache

procedure, public, pass(self) :: add ../../

Adds new entry to cache

procedure, public, pass(self) :: get ../../

Gets entry from cache

procedure, public, pass(self) :: remove ../../

Removes entry from cache

procedure, public, pass(self) :: cleanup ../../

Cleans up cache


Functions

private function get(self, transpose_type, kernel_type, base_storage, tile_size, padding) result(kernel)

Returns cached kernel if it exists. If not returns null pointer.

Arguments

Type IntentOptional Attributes Name
class(nvrtc_cache), intent(inout) :: self

Cache instance

type(dtfft_transpose_t), intent(in) :: transpose_type

Type of transposition to perform

type(kernel_type_t), intent(in) :: kernel_type

Type of kernel to build

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

integer(kind=int32), intent(in) :: tile_size

Tile size

integer(kind=int32), intent(in) :: padding

Padding

Return Value type(CUfunction)

Cached kernel

private function get_true_transpose_type(transpose_type) result(transpose_type_)

Returns generic transpose id. Since X-Y and Y-Z transpositions are symmectric, it returns only one of them. X-Z and Z-X are not symmetric

Arguments

Type IntentOptional Attributes Name
type(dtfft_transpose_t), intent(in) :: transpose_type

Type of transposition to perform

Return Value type(dtfft_transpose_t)

Fixed id of transposition


Subroutines

private subroutine create(self)

Creates cache

Arguments

Type IntentOptional Attributes Name
class(nvrtc_cache), intent(inout) :: self

Cache instance

private subroutine add(self, cuda_module, cuda_kernel, kernel_type, transpose_type, tile_size, padding, base_storage)

Adds new entry to cache

Arguments

Type IntentOptional Attributes Name
class(nvrtc_cache), intent(inout) :: self

Cache instance

type(CUmodule), intent(in) :: cuda_module

Compiled CUDA module

type(CUfunction), intent(in) :: cuda_kernel

Extracted CUDA kernel

type(kernel_type_t), intent(in) :: kernel_type

Kernel type

type(dtfft_transpose_t), intent(in) :: transpose_type

Transpose type

integer(kind=int32), intent(in) :: tile_size

Tile size

integer(kind=int32), intent(in) :: padding

Padding

integer(kind=int64), intent(in) :: base_storage

Base storage

private subroutine cleanup(self)

Removes unused modules from cuda context

Arguments

Type IntentOptional Attributes Name
class(nvrtc_cache), intent(inout) :: self

Cache instance

private subroutine remove(self, kernel)

Takes CUDA kernel as an argument and searches for it in cache If kernel is found than reduces ref_count of such entry and kernel becomes a null pointer

Arguments

Type IntentOptional Attributes Name
class(nvrtc_cache), intent(inout) :: self

Cache instance

type(CUfunction), intent(inout) :: kernel

CUDA kernel to search for