dtfft_nvrtc_module Module

Module for managing nvRTC compiled CUDA kernels Each module has only one templated kernel that can be instantiated with different parameters


Uses

  • module~~dtfft_nvrtc_module~~UsesGraph module~dtfft_nvrtc_module dtfft_nvrtc_module iso_c_binding iso_c_binding module~dtfft_nvrtc_module->iso_c_binding iso_fortran_env iso_fortran_env module~dtfft_nvrtc_module->iso_fortran_env module~dtfft_abstract_kernel dtfft_abstract_kernel module~dtfft_nvrtc_module->module~dtfft_abstract_kernel module~dtfft_config dtfft_config module~dtfft_nvrtc_module->module~dtfft_config module~dtfft_interface_cuda dtfft_interface_cuda module~dtfft_nvrtc_module->module~dtfft_interface_cuda module~dtfft_interface_cuda_runtime dtfft_interface_cuda_runtime module~dtfft_nvrtc_module->module~dtfft_interface_cuda_runtime module~dtfft_interface_nvrtc dtfft_interface_nvrtc module~dtfft_nvrtc_module->module~dtfft_interface_nvrtc module~dtfft_interface_nvtx dtfft_interface_nvtx module~dtfft_nvrtc_module->module~dtfft_interface_nvtx module~dtfft_nvrtc_block_optimizer dtfft_nvrtc_block_optimizer module~dtfft_nvrtc_module->module~dtfft_nvrtc_block_optimizer module~dtfft_parameters dtfft_parameters module~dtfft_nvrtc_module->module~dtfft_parameters module~dtfft_utils dtfft_utils module~dtfft_nvrtc_module->module~dtfft_utils mpi_f08 mpi_f08 module~dtfft_nvrtc_module->mpi_f08 module~dtfft_abstract_kernel->iso_fortran_env module~dtfft_abstract_kernel->module~dtfft_interface_nvtx module~dtfft_abstract_kernel->module~dtfft_parameters module~dtfft_abstract_kernel->module~dtfft_utils module~dtfft_abstract_kernel->mpi_f08 module~dtfft_config->iso_c_binding module~dtfft_config->iso_fortran_env module~dtfft_config->module~dtfft_interface_cuda_runtime module~dtfft_config->module~dtfft_parameters module~dtfft_config->module~dtfft_utils module~dtfft_config->mpi_f08 module~dtfft_errors dtfft_errors module~dtfft_config->module~dtfft_errors module~dtfft_interface_cuda->iso_c_binding module~dtfft_interface_cuda->iso_fortran_env module~dtfft_interface_cuda->module~dtfft_parameters module~dtfft_interface_cuda->module~dtfft_utils module~dtfft_interface_cuda->module~dtfft_errors module~dtfft_interface_cuda_runtime->iso_c_binding module~dtfft_interface_cuda_runtime->module~dtfft_parameters module~dtfft_interface_cuda_runtime->module~dtfft_utils module~dtfft_interface_nvrtc->iso_c_binding module~dtfft_interface_nvrtc->iso_fortran_env module~dtfft_interface_nvrtc->module~dtfft_utils module~dtfft_interface_nvrtc->module~dtfft_errors module~dtfft_interface_nvtx->iso_c_binding module~dtfft_interface_nvtx->module~dtfft_utils module~dtfft_nvrtc_block_optimizer->iso_fortran_env module~dtfft_nvrtc_block_optimizer->module~dtfft_abstract_kernel module~dtfft_nvrtc_block_optimizer->module~dtfft_config module~dtfft_nvrtc_block_optimizer->module~dtfft_interface_cuda module~dtfft_nvrtc_block_optimizer->module~dtfft_interface_cuda_runtime module~dtfft_nvrtc_block_optimizer->module~dtfft_parameters module~dtfft_nvrtc_block_optimizer->module~dtfft_utils module~dtfft_parameters->iso_c_binding module~dtfft_parameters->iso_fortran_env module~dtfft_parameters->mpi_f08 module~dtfft_utils->iso_c_binding module~dtfft_utils->iso_fortran_env module~dtfft_utils->module~dtfft_parameters module~dtfft_utils->mpi_f08 module~dtfft_utils->module~dtfft_errors module~dtfft_errors->iso_fortran_env

Used by

  • module~~dtfft_nvrtc_module~~UsedByGraph module~dtfft_nvrtc_module dtfft_nvrtc_module module~dtfft_nvrtc_module_cache dtfft_nvrtc_module_cache module~dtfft_nvrtc_module_cache->module~dtfft_nvrtc_module module~dtfft_kernel_device dtfft_kernel_device module~dtfft_kernel_device->module~dtfft_nvrtc_module_cache module~dtfft_transpose_handle_generic dtfft_transpose_handle_generic module~dtfft_transpose_handle_generic->module~dtfft_kernel_device module~dtfft_transpose_plan dtfft_transpose_plan module~dtfft_transpose_plan->module~dtfft_kernel_device module~dtfft_transpose_plan->module~dtfft_transpose_handle_generic module~dtfft_plan dtfft_plan module~dtfft_plan->module~dtfft_transpose_plan module~dtfft dtfft module~dtfft->module~dtfft_plan module~dtfft_api dtfft_api module~dtfft_api->module~dtfft_plan

Variables

Type Visibility Attributes Name Initial
character(len=*), private, parameter :: DEFAULT_KERNEL_NAME = "dtfft_kernel"

Basic kernel name


Derived Types

type, public ::  nvrtc_module

Class for managing nvRTC compiled CUDA kernels

Components

Type Visibility Attributes Name Initial
logical, private :: is_created = .false.

Is module created

character(len=:), private, allocatable :: basic_name

Basic kernel name

integer(kind=int32), private :: ndims

Number of dimensions, used only for forward permutation

type(CUmodule), private :: cumod

CUDA module

type(nvrtcProgram), private :: prog

nvRTC program

type(kernel_type_t), private :: kernel_type

Type of kernel

integer(kind=int64), private :: base_storage

Number of bytes needed to store single element

type(kernel_config), private, allocatable :: configs(:)

Kernel configurations that this module was compiled for

Type-Bound Procedures

procedure, public, pass(self) :: create ../../

Creates module with given parameters

procedure, public, pass(self) :: destroy ../../

Destroys module and frees resources

procedure, public, pass(self) :: get ../../

Returns kernel ready to be executed

generic, public :: check => check_instance, check_module ../../

Checks if kernel is with given parameters is available in this module

procedure, private, pass(self) :: check_instance ../../

Checks if kernel with given parameters is available in this module

procedure, private, pass(self) :: check_module ../../

Basic check that this module provides kernels of given type

type, private, extends(string) ::  codegen_t

Class for generating CUDA code

Components

Type Visibility Attributes Name Initial
character(len=:), public, allocatable :: raw

String

Type-Bound Procedures

procedure, public, pass(self) :: destroy => destroy_string
procedure, public, pass(self) :: add => add_line ../../

Adds new line to CUDA code


Functions

private function get(self, ndims, kernel_type, base_storage, tile_size, block_rows) result(fun)

Returns kernel ready to be executed

Arguments

Type IntentOptional Attributes Name
class(nvrtc_module), intent(in) :: self

This module

integer(kind=int32), intent(in) :: ndims

Number of dimensions, used only for forward permutation

type(kernel_type_t), intent(in) :: kernel_type

Type of kernel to build

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

integer(kind=int32), intent(in) :: tile_size

Size of shared memory tile, template parameter

integer(kind=int32), intent(in) :: block_rows

Number of rows processed by single thread, template parameter

Return Value type(CUfunction)

Resulting kernel

private function check_instance(self, ndims, kernel_type, base_storage, tile_size, block_rows)

Checks if kernel with given parameters is available in this module

Arguments

Type IntentOptional Attributes Name
class(nvrtc_module), intent(in) :: self

This module

integer(kind=int32), intent(in) :: ndims

Number of dimensions

type(kernel_type_t), intent(in) :: kernel_type

Type of kernel to build

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

integer(kind=int32), intent(in) :: tile_size

Size of shared memory tile, template parameter

integer(kind=int32), intent(in) :: block_rows

Number of rows processed by single thread, template parameter

Return Value logical

private function check_module(self, ndims, kernel_type, base_storage)

Basic check that this module provides kernels of given type

Arguments

Type IntentOptional Attributes Name
class(nvrtc_module), intent(in) :: self

This module

integer(kind=int32), intent(in) :: ndims

Number of dimensions

type(kernel_type_t), intent(in) :: kernel_type

Type of kernel to build

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

Return Value logical

private function compile_program(code, prog_name, configs, props) result(prog)

Compiles nvRTC program with given configurations

Arguments

Type IntentOptional Attributes Name
type(codegen_t), intent(in) :: code

CUDA code to compile

character(len=*), intent(in) :: prog_name

Basic kernel name

type(kernel_config), intent(in) :: configs(:)

Kernel configurations that this module should be compiled for

type(device_props), intent(in) :: props

GPU architecture properties

Return Value type(nvrtcProgram)

Resulting nvRTC program

private function get_name_expression(basic_name, tile_dim, block_rows, padding) result(expression)

Generates name expression for given template parameters

Arguments

Type IntentOptional Attributes Name
character(len=*), intent(in) :: basic_name

Basic kernel name

integer(kind=int32), intent(in) :: tile_dim

Size of shared memory tile, template parameter

integer(kind=int32), intent(in) :: block_rows

Number of rows processed by single thread, template parameter

integer(kind=int32), intent(in) :: padding

Padding to avoid shared memory bank conflicts, template parameter

Return Value character(len=c_char), allocatable, (:)

Resulting name expression

private function get_mangled_name(basic_name, prog, tile_dim, block_rows, padding) result(mangled)

Gets mangled name for given template parameters from nvRTC program

Arguments

Type IntentOptional Attributes Name
character(len=*), intent(in) :: basic_name

Basic kernel name

type(nvrtcProgram), intent(in) :: prog

nvRTC program

integer(kind=int32), intent(in) :: tile_dim

Size of shared memory tile, template parameter

integer(kind=int32), intent(in) :: block_rows

Number of rows processed by single thread, template parameter

integer(kind=int32), intent(in) :: padding

Padding to avoid shared memory bank conflicts, template parameter

Return Value type(c_ptr)

Mangled kernel name

private function get_code(kernel_name, ndims, base_storage, kernel_type) result(code)

Generates code that will be used to locally tranpose data and prepares to send it to other processes

Arguments

Type IntentOptional Attributes Name
character(len=*), intent(in) :: kernel_name

Name of CUDA kernel

integer(kind=int32), intent(in) :: ndims

Number of dimensions

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

type(kernel_type_t), intent(in) :: kernel_type

Type of kernel to generate code for

Return Value type(codegen_t)

Resulting code


Subroutines

private subroutine add_line(self, line)

Adds new line to CUDA code

Arguments

Type IntentOptional Attributes Name
class(codegen_t), intent(inout) :: self

Kernel code

character(len=*), intent(in) :: line

Line to add

private subroutine create(self, ndims, kernel_type, base_storage, configs, props)

Creates module with given parameters, compiles nvRTC program and loads it as CUDA module

Arguments

Type IntentOptional Attributes Name
class(nvrtc_module), intent(inout) :: self

This module

integer(kind=int32), intent(in) :: ndims

Number of dimensions, used only for forward permutation

type(kernel_type_t), intent(in) :: kernel_type

Type of kernel to build

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

type(kernel_config), intent(in) :: configs(:)

Kernel configurations that this module should be compiled for

type(device_props), intent(in) :: props

GPU architecture properties

private subroutine destroy(self)

Destroys module and frees resources

Arguments

Type IntentOptional Attributes Name
class(nvrtc_module), intent(inout) :: self

private subroutine set_name_expression(prog, basic_name, tile_dim, block_rows, padding)

Sets name expression for given template parameters to nvRTC program

Arguments

Type IntentOptional Attributes Name
type(nvrtcProgram), intent(in) :: prog

nvRTC program

character(len=*), intent(in) :: basic_name

Basic kernel name

integer(kind=int32), intent(in) :: tile_dim

Size of shared memory tile, template parameter

integer(kind=int32), intent(in) :: block_rows

Number of rows processed by single thread, template parameter

integer(kind=int32), intent(in) :: padding

Padding to avoid shared memory bank conflicts, template parameter