dtfft_nvrtc_kernel_generator Module


Uses

  • module~~dtfft_nvrtc_kernel_generator~~UsesGraph module~dtfft_nvrtc_kernel_generator dtfft_nvrtc_kernel_generator iso_c_binding iso_c_binding module~dtfft_nvrtc_kernel_generator->iso_c_binding iso_fortran_env iso_fortran_env module~dtfft_nvrtc_kernel_generator->iso_fortran_env module~dtfft_interface_cuda dtfft_interface_cuda module~dtfft_nvrtc_kernel_generator->module~dtfft_interface_cuda module~dtfft_interface_cuda_runtime dtfft_interface_cuda_runtime module~dtfft_nvrtc_kernel_generator->module~dtfft_interface_cuda_runtime module~dtfft_interface_nvrtc dtfft_interface_nvrtc module~dtfft_nvrtc_kernel_generator->module~dtfft_interface_nvrtc module~dtfft_nvrtc_block_optimizer dtfft_nvrtc_block_optimizer module~dtfft_nvrtc_kernel_generator->module~dtfft_nvrtc_block_optimizer module~dtfft_parameters dtfft_parameters module~dtfft_nvrtc_kernel_generator->module~dtfft_parameters module~dtfft_utils dtfft_utils module~dtfft_nvrtc_kernel_generator->module~dtfft_utils mpi_f08 mpi_f08 module~dtfft_nvrtc_kernel_generator->mpi_f08 module~dtfft_interface_cuda->iso_c_binding module~dtfft_interface_cuda->iso_fortran_env module~dtfft_interface_cuda->module~dtfft_parameters module~dtfft_interface_cuda->module~dtfft_utils module~dtfft_errors dtfft_errors module~dtfft_interface_cuda->module~dtfft_errors module~dtfft_interface_cuda_runtime->iso_c_binding module~dtfft_interface_cuda_runtime->module~dtfft_parameters module~dtfft_interface_cuda_runtime->module~dtfft_utils module~dtfft_interface_nvrtc->iso_c_binding module~dtfft_interface_nvrtc->iso_fortran_env module~dtfft_interface_nvrtc->module~dtfft_utils module~dtfft_interface_nvrtc->module~dtfft_errors module~dtfft_nvrtc_block_optimizer->iso_fortran_env module~dtfft_nvrtc_block_optimizer->module~dtfft_interface_cuda module~dtfft_nvrtc_block_optimizer->module~dtfft_interface_cuda_runtime module~dtfft_nvrtc_block_optimizer->module~dtfft_parameters module~dtfft_nvrtc_block_optimizer->module~dtfft_utils module~dtfft_config dtfft_config module~dtfft_nvrtc_block_optimizer->module~dtfft_config module~dtfft_parameters->iso_c_binding module~dtfft_parameters->iso_fortran_env module~dtfft_parameters->mpi_f08 module~dtfft_utils->iso_c_binding module~dtfft_utils->iso_fortran_env module~dtfft_utils->module~dtfft_parameters module~dtfft_utils->mpi_f08 module~dtfft_utils->module~dtfft_errors module~dtfft_config->iso_c_binding module~dtfft_config->iso_fortran_env module~dtfft_config->module~dtfft_interface_cuda_runtime module~dtfft_config->module~dtfft_parameters module~dtfft_config->module~dtfft_utils module~dtfft_config->mpi_f08 module~dtfft_config->module~dtfft_errors module~dtfft_errors->iso_fortran_env

Used by

  • module~~dtfft_nvrtc_kernel_generator~~UsedByGraph module~dtfft_nvrtc_kernel_generator dtfft_nvrtc_kernel_generator module~dtfft_nvrtc_kernel dtfft_nvrtc_kernel module~dtfft_nvrtc_kernel->module~dtfft_nvrtc_kernel_generator module~dtfft_abstract_backend dtfft_abstract_backend module~dtfft_abstract_backend->module~dtfft_nvrtc_kernel module~dtfft_abstract_transpose_plan dtfft_abstract_transpose_plan module~dtfft_abstract_transpose_plan->module~dtfft_nvrtc_kernel module~dtfft_abstract_transpose_plan->module~dtfft_abstract_backend module~dtfft_transpose_handle_cuda dtfft_transpose_handle_cuda module~dtfft_transpose_handle_cuda->module~dtfft_nvrtc_kernel module~dtfft_transpose_handle_cuda->module~dtfft_abstract_backend module~dtfft_backend_cufftmp_m dtfft_backend_cufftmp_m module~dtfft_transpose_handle_cuda->module~dtfft_backend_cufftmp_m module~dtfft_backend_mpi dtfft_backend_mpi module~dtfft_transpose_handle_cuda->module~dtfft_backend_mpi module~dtfft_backend_nccl_m dtfft_backend_nccl_m module~dtfft_transpose_handle_cuda->module~dtfft_backend_nccl_m module~dtfft_backend_cufftmp_m->module~dtfft_abstract_backend module~dtfft_backend_mpi->module~dtfft_abstract_backend module~dtfft_backend_nccl_m->module~dtfft_abstract_backend module~dtfft_plan dtfft_plan module~dtfft_plan->module~dtfft_abstract_transpose_plan module~dtfft_transpose_plan_cuda dtfft_transpose_plan_cuda module~dtfft_plan->module~dtfft_transpose_plan_cuda module~dtfft_transpose_plan_host dtfft_transpose_plan_host module~dtfft_plan->module~dtfft_transpose_plan_host module~dtfft_transpose_plan_cuda->module~dtfft_abstract_backend module~dtfft_transpose_plan_cuda->module~dtfft_abstract_transpose_plan module~dtfft_transpose_plan_cuda->module~dtfft_transpose_handle_cuda module~dtfft_transpose_plan_host->module~dtfft_abstract_transpose_plan module~dtfft dtfft module~dtfft->module~dtfft_plan module~dtfft_api dtfft_api module~dtfft_api->module~dtfft_plan

Derived Types

type, public ::  kernel_codegen

Class to build CUDA kernel code

Components

Type Visibility Attributes Name Initial
character(len=:), public, allocatable :: raw

String that holds CUDA code

Type-Bound Procedures

procedure, public, pass(self) :: to_cstr ../../

Converts Fortran CUDA code to C pointer

procedure, public, pass(self) :: add_line ../../

Adds new line to CUDA code

procedure, public, pass(self) :: destroy => destroy_code ../../

Frees all memory


Functions

public function get_transpose_kernel_code(kernel_name, ndims, base_storage, transpose_type, enable_packing, padding) result(code)

Generates code that will be used to locally tranpose data and prepares to send it to other processes ndims == 2

Arguments

Type IntentOptional Attributes Name
character(len=*), intent(in) :: kernel_name

Name of CUDA kernel

integer(kind=int8), intent(in) :: ndims

Number of dimensions

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

type(dtfft_transpose_t), intent(in) :: transpose_type

Transpose id

logical, intent(in) :: enable_packing

If data should be manually packed or not

integer(kind=int32), intent(in) :: padding

Return Value type(kernel_codegen)

Resulting code

public function get_unpack_kernel_code(kernel_name, base_storage, is_partial) result(code)

Generates code that will be used to unpack data when it is recieved

Arguments

Type IntentOptional Attributes Name
character(len=*), intent(in) :: kernel_name

Name of CUDA kernel

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

logical, intent(in) :: is_partial

Return Value type(kernel_codegen)

Resulting code

public function get_unpack_pipelined_kernel_code(kernel_name, base_storage) result(code)

Generates code that will be used to partially unpack data when it is recieved from other process

Arguments

Type IntentOptional Attributes Name
character(len=*), intent(in) :: kernel_name

Name of CUDA kernel

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

Return Value type(kernel_codegen)

Resulting code


Subroutines

private subroutine to_cstr(self, c_code)

Converts Fortran CUDA code to C pointer

Arguments

Type IntentOptional Attributes Name
class(kernel_codegen), intent(in) :: self

Kernel code

character(len=c_char), intent(out), allocatable :: c_code(:)

C pointer to code

private subroutine add_line(self, line)

Adds new line to CUDA code

Arguments

Type IntentOptional Attributes Name
class(kernel_codegen), intent(inout) :: self

Kernel code

character(len=*), intent(in) :: line

Line to add

private subroutine destroy_code(self)

Frees all memory

Arguments

Type IntentOptional Attributes Name
class(kernel_codegen), intent(inout) :: self

Kernel code

private subroutine get_neighbor_function_code(code)

Generated device function that is used to determite id of process that to which data is being sent or from which data has been recieved based on local element coordinate

Arguments

Type IntentOptional Attributes Name
type(kernel_codegen), intent(inout) :: code

Resulting code

private subroutine get_code_init(kernel_name, base_storage, code, buffer_type)

Generates basic code that is used in all other kernels

Arguments

Type IntentOptional Attributes Name
character(len=*), intent(in) :: kernel_name

Name of CUDA kernel

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

type(kernel_codegen), intent(inout) :: code

Resulting code

character(len=:), intent(out), optional, allocatable :: buffer_type

Type of buffer that should be used