dtfft_transpose_handle_generic Module

This module describes transpose_handle_generic class It is responsible for managing both Host and CUDA-based transposition operations It executes transpose kernels, memory transfers between GPUs/Hosts, and data unpacking if required


Uses

  • module~~dtfft_transpose_handle_generic~~UsesGraph module~dtfft_transpose_handle_generic dtfft_transpose_handle_generic iso_c_binding iso_c_binding module~dtfft_transpose_handle_generic->iso_c_binding iso_fortran_env iso_fortran_env module~dtfft_transpose_handle_generic->iso_fortran_env module~dtfft_abstract_backend dtfft_abstract_backend module~dtfft_transpose_handle_generic->module~dtfft_abstract_backend module~dtfft_abstract_kernel dtfft_abstract_kernel module~dtfft_transpose_handle_generic->module~dtfft_abstract_kernel module~dtfft_abstract_transpose_handle dtfft_abstract_transpose_handle module~dtfft_transpose_handle_generic->module~dtfft_abstract_transpose_handle module~dtfft_backend_cufftmp_m dtfft_backend_cufftmp_m module~dtfft_transpose_handle_generic->module~dtfft_backend_cufftmp_m module~dtfft_backend_mpi dtfft_backend_mpi module~dtfft_transpose_handle_generic->module~dtfft_backend_mpi module~dtfft_backend_nccl_m dtfft_backend_nccl_m module~dtfft_transpose_handle_generic->module~dtfft_backend_nccl_m module~dtfft_errors dtfft_errors module~dtfft_transpose_handle_generic->module~dtfft_errors module~dtfft_kernel_device dtfft_kernel_device module~dtfft_transpose_handle_generic->module~dtfft_kernel_device module~dtfft_kernel_host dtfft_kernel_host module~dtfft_transpose_handle_generic->module~dtfft_kernel_host module~dtfft_parameters dtfft_parameters module~dtfft_transpose_handle_generic->module~dtfft_parameters module~dtfft_pencil dtfft_pencil module~dtfft_transpose_handle_generic->module~dtfft_pencil module~dtfft_utils dtfft_utils module~dtfft_transpose_handle_generic->module~dtfft_utils mpi_f08 mpi_f08 module~dtfft_transpose_handle_generic->mpi_f08 module~dtfft_abstract_backend->iso_c_binding module~dtfft_abstract_backend->iso_fortran_env module~dtfft_abstract_backend->module~dtfft_abstract_kernel module~dtfft_abstract_backend->module~dtfft_errors module~dtfft_abstract_backend->module~dtfft_parameters module~dtfft_abstract_backend->module~dtfft_pencil module~dtfft_abstract_backend->module~dtfft_utils module~dtfft_abstract_backend->mpi_f08 module~dtfft_config dtfft_config module~dtfft_abstract_backend->module~dtfft_config module~dtfft_interface_cuda_runtime dtfft_interface_cuda_runtime module~dtfft_abstract_backend->module~dtfft_interface_cuda_runtime module~dtfft_interface_nccl dtfft_interface_nccl module~dtfft_abstract_backend->module~dtfft_interface_nccl module~dtfft_abstract_kernel->iso_fortran_env module~dtfft_abstract_kernel->module~dtfft_parameters module~dtfft_abstract_kernel->module~dtfft_utils module~dtfft_abstract_kernel->mpi_f08 module~dtfft_interface_nvtx dtfft_interface_nvtx module~dtfft_abstract_kernel->module~dtfft_interface_nvtx module~dtfft_abstract_transpose_handle->iso_fortran_env module~dtfft_abstract_transpose_handle->module~dtfft_abstract_backend module~dtfft_abstract_transpose_handle->module~dtfft_parameters module~dtfft_abstract_transpose_handle->module~dtfft_pencil module~dtfft_abstract_transpose_handle->mpi_f08 module~dtfft_backend_cufftmp_m->iso_c_binding module~dtfft_backend_cufftmp_m->iso_fortran_env module~dtfft_backend_cufftmp_m->module~dtfft_abstract_backend module~dtfft_backend_cufftmp_m->module~dtfft_errors module~dtfft_backend_cufftmp_m->module~dtfft_parameters module~dtfft_backend_cufftmp_m->module~dtfft_pencil module~dtfft_backend_cufftmp_m->module~dtfft_utils module~dtfft_backend_cufftmp_m->mpi_f08 module~dtfft_backend_cufftmp_m->module~dtfft_interface_cuda_runtime module~dtfft_interface_cufft dtfft_interface_cufft module~dtfft_backend_cufftmp_m->module~dtfft_interface_cufft module~dtfft_interface_nvshmem dtfft_interface_nvshmem module~dtfft_backend_cufftmp_m->module~dtfft_interface_nvshmem module~dtfft_backend_mpi->iso_c_binding module~dtfft_backend_mpi->iso_fortran_env module~dtfft_backend_mpi->module~dtfft_abstract_backend module~dtfft_backend_mpi->module~dtfft_errors module~dtfft_backend_mpi->module~dtfft_parameters module~dtfft_backend_mpi->module~dtfft_utils module~dtfft_backend_mpi->mpi_f08 module~dtfft_backend_mpi->module~dtfft_interface_cuda_runtime module~dtfft_backend_mpi->module~dtfft_interface_nvtx module~dtfft_backend_nccl_m->iso_c_binding module~dtfft_backend_nccl_m->iso_fortran_env module~dtfft_backend_nccl_m->module~dtfft_abstract_backend module~dtfft_backend_nccl_m->module~dtfft_errors module~dtfft_backend_nccl_m->module~dtfft_parameters module~dtfft_backend_nccl_m->module~dtfft_utils module~dtfft_backend_nccl_m->mpi_f08 module~dtfft_backend_nccl_m->module~dtfft_interface_cuda_runtime module~dtfft_backend_nccl_m->module~dtfft_interface_nccl module~dtfft_errors->iso_fortran_env module~dtfft_kernel_device->iso_c_binding module~dtfft_kernel_device->iso_fortran_env module~dtfft_kernel_device->module~dtfft_abstract_kernel module~dtfft_kernel_device->module~dtfft_parameters module~dtfft_kernel_device->module~dtfft_utils module~dtfft_kernel_device->mpi_f08 module~dtfft_kernel_device->module~dtfft_config module~dtfft_interface_cuda dtfft_interface_cuda module~dtfft_kernel_device->module~dtfft_interface_cuda module~dtfft_kernel_device->module~dtfft_interface_cuda_runtime module~dtfft_kernel_device->module~dtfft_interface_nvtx module~dtfft_nvrtc_block_optimizer dtfft_nvrtc_block_optimizer module~dtfft_kernel_device->module~dtfft_nvrtc_block_optimizer module~dtfft_nvrtc_module_cache dtfft_nvrtc_module_cache module~dtfft_kernel_device->module~dtfft_nvrtc_module_cache module~dtfft_kernel_host->iso_c_binding module~dtfft_kernel_host->iso_fortran_env module~dtfft_kernel_host->module~dtfft_abstract_kernel module~dtfft_kernel_host->module~dtfft_parameters module~dtfft_kernel_host->module~dtfft_utils module~dtfft_kernel_host->mpi_f08 module~dtfft_kernel_host->module~dtfft_config module~dtfft_kernel_host->module~dtfft_interface_nvtx module~dtfft_parameters->iso_c_binding module~dtfft_parameters->iso_fortran_env module~dtfft_parameters->mpi_f08 module~dtfft_pencil->iso_c_binding module~dtfft_pencil->iso_fortran_env module~dtfft_pencil->module~dtfft_errors module~dtfft_pencil->module~dtfft_parameters module~dtfft_pencil->module~dtfft_utils module~dtfft_pencil->mpi_f08 module~dtfft_pencil->module~dtfft_interface_cuda_runtime module~dtfft_utils->iso_c_binding module~dtfft_utils->iso_fortran_env module~dtfft_utils->module~dtfft_errors module~dtfft_utils->module~dtfft_parameters module~dtfft_utils->mpi_f08 module~dtfft_config->iso_c_binding module~dtfft_config->iso_fortran_env module~dtfft_config->module~dtfft_errors module~dtfft_config->module~dtfft_parameters module~dtfft_config->module~dtfft_utils module~dtfft_config->mpi_f08 module~dtfft_config->module~dtfft_interface_cuda_runtime module~dtfft_interface_cuda->iso_c_binding module~dtfft_interface_cuda->iso_fortran_env module~dtfft_interface_cuda->module~dtfft_errors module~dtfft_interface_cuda->module~dtfft_parameters module~dtfft_interface_cuda->module~dtfft_utils module~dtfft_interface_cuda_runtime->iso_c_binding module~dtfft_interface_cuda_runtime->module~dtfft_parameters module~dtfft_interface_cuda_runtime->module~dtfft_utils module~dtfft_interface_cufft->iso_c_binding module~dtfft_interface_cufft->iso_fortran_env module~dtfft_interface_cufft->module~dtfft_parameters module~dtfft_interface_cufft->module~dtfft_utils module~dtfft_interface_nccl->iso_c_binding module~dtfft_interface_nccl->module~dtfft_parameters module~dtfft_interface_nccl->module~dtfft_utils module~dtfft_interface_nvshmem->iso_c_binding module~dtfft_interface_nvshmem->iso_fortran_env module~dtfft_interface_nvshmem->module~dtfft_parameters module~dtfft_interface_nvshmem->module~dtfft_utils module~dtfft_interface_nvshmem->mpi_f08 module~dtfft_interface_nvtx->iso_c_binding module~dtfft_interface_nvtx->module~dtfft_utils module~dtfft_nvrtc_block_optimizer->iso_fortran_env module~dtfft_nvrtc_block_optimizer->module~dtfft_abstract_kernel module~dtfft_nvrtc_block_optimizer->module~dtfft_parameters module~dtfft_nvrtc_block_optimizer->module~dtfft_utils module~dtfft_nvrtc_block_optimizer->module~dtfft_config module~dtfft_nvrtc_block_optimizer->module~dtfft_interface_cuda module~dtfft_nvrtc_block_optimizer->module~dtfft_interface_cuda_runtime module~dtfft_nvrtc_module_cache->iso_c_binding module~dtfft_nvrtc_module_cache->iso_fortran_env module~dtfft_nvrtc_module_cache->module~dtfft_abstract_kernel module~dtfft_nvrtc_module_cache->module~dtfft_utils module~dtfft_nvrtc_module_cache->module~dtfft_config module~dtfft_nvrtc_module_cache->module~dtfft_interface_cuda module~dtfft_nvrtc_module_cache->module~dtfft_interface_cuda_runtime module~dtfft_nvrtc_module_cache->module~dtfft_nvrtc_block_optimizer module~dtfft_nvrtc_module dtfft_nvrtc_module module~dtfft_nvrtc_module_cache->module~dtfft_nvrtc_module module~dtfft_nvrtc_module->iso_c_binding module~dtfft_nvrtc_module->iso_fortran_env module~dtfft_nvrtc_module->module~dtfft_abstract_kernel module~dtfft_nvrtc_module->module~dtfft_parameters module~dtfft_nvrtc_module->module~dtfft_utils module~dtfft_nvrtc_module->mpi_f08 module~dtfft_nvrtc_module->module~dtfft_config module~dtfft_nvrtc_module->module~dtfft_interface_cuda module~dtfft_nvrtc_module->module~dtfft_interface_cuda_runtime module~dtfft_nvrtc_module->module~dtfft_interface_nvtx module~dtfft_nvrtc_module->module~dtfft_nvrtc_block_optimizer module~dtfft_interface_nvrtc dtfft_interface_nvrtc module~dtfft_nvrtc_module->module~dtfft_interface_nvrtc module~dtfft_interface_nvrtc->iso_c_binding module~dtfft_interface_nvrtc->iso_fortran_env module~dtfft_interface_nvrtc->module~dtfft_errors module~dtfft_interface_nvrtc->module~dtfft_utils

Used by

  • module~~dtfft_transpose_handle_generic~~UsedByGraph module~dtfft_transpose_handle_generic dtfft_transpose_handle_generic module~dtfft_transpose_plan dtfft_transpose_plan module~dtfft_transpose_plan->module~dtfft_transpose_handle_generic module~dtfft_plan dtfft_plan module~dtfft_plan->module~dtfft_transpose_plan module~dtfft dtfft module~dtfft->module~dtfft_plan module~dtfft_api dtfft_api module~dtfft_api->module~dtfft_plan

Derived Types

Generic Transpose Handle Executes transposition in 3 steps:

Read more…

Components

Type Visibility Attributes Name Initial
logical, private :: has_exchange = .false.

If current handle has exchanges between GPUs

logical, private :: is_pipelined = .false.

If underlying exchanges are pipelined

logical, private :: is_async_supported = .false.

If underlying backend support async execution(execute/execute_end)

class(abstract_kernel), private, allocatable :: transpose_kernel

Kernel for data transposition

class(abstract_kernel), private, allocatable :: unpack_kernel

Kernel for unpacking data

class(abstract_backend), private, allocatable :: comm_handle

Communication handle

Type-Bound Procedures

procedure, public, non_overridable, pass(self) :: create ../../

Creates transpose handle

procedure, public, pass(self) :: create_private => create ../../

Creates Generic Transpose Handle

procedure, public, pass(self) :: execute ../../

Executes transpose - exchange - unpack

procedure, public, pass(self) :: execute_end ../../

Finalizes async transpose

procedure, public, pass(self) :: get_async_active ../../

Returns if async transpose is active

procedure, public, pass(self) :: destroy ../../

Destroys Generic Transpose Handle

procedure, public, pass(self) :: get_aux_size ../../

Returns number of bytes required by aux buffer

type, private ::  data_handle

Helper class used to obtain displacements and counts needed to send to other processes

Components

Type Visibility Attributes Name Initial
integer(kind=int32), public, allocatable :: ls(:,:)

Starts of my data that I should send or recv while communicating with other processes

integer(kind=int32), public, allocatable :: ln(:,:)

Counts of my data that I should send or recv while communicating with other processes

integer(kind=int32), public, allocatable :: sizes(:,:)

Counts of every rank in a comm

integer(kind=int32), public, allocatable :: starts(:,:)

Starts of every rank in a comm

integer(kind=int32), public, allocatable :: displs(:)

Local buffer displacement

integer(kind=int32), public, allocatable :: counts(:)

Number of elements to send or recv

Type-Bound Procedures

procedure, public, pass(self) :: create => create_data_handle ../../

Creates handle

procedure, public, pass(self) :: destroy => destroy_data_handle ../../

Destroys handle


Functions

private elemental function get_async_active(self)

Arguments

Type IntentOptional Attributes Name
class(transpose_handle_generic), intent(in) :: self

Generic Transpose Handle

Return Value logical

private pure function get_aux_size(self)

Returns number of bytes required by aux buffer

Arguments

Type IntentOptional Attributes Name
class(transpose_handle_generic), intent(in) :: self

Generic Transpose Handle

Return Value integer(kind=int64)


Subroutines

private subroutine create_data_handle(self, info, comm, comm_size)

Creates handle

Arguments

Type IntentOptional Attributes Name
class(data_handle), intent(inout) :: self

Helper class

type(pencil), intent(in) :: info

Pencil info

type(MPI_Comm), intent(in) :: comm

MPI communicator

integer(kind=int32), intent(in) :: comm_size

Size of comm

private subroutine destroy_data_handle(self)

Destroys handle

Arguments

Type IntentOptional Attributes Name
class(data_handle), intent(inout) :: self

Helper class

private subroutine check_if_overflow(sizes)

Checks if product of sizes fits into integer(int32)

Arguments

Type IntentOptional Attributes Name
integer(kind=int32), intent(in) :: sizes(:)

Sizes to check

private subroutine create(self, comm, send, recv, transpose_type, base_storage, kwargs)

Creates Generic Transpose Handle

Arguments

Type IntentOptional Attributes Name
class(transpose_handle_generic), intent(inout) :: self

Generic Transpose Handle

type(MPI_Comm), intent(in) :: comm

MPI Communicator

type(pencil), intent(in) :: send

Send pencil

type(pencil), intent(in) :: recv

Recv pencil

type(dtfft_transpose_t), intent(in) :: transpose_type

Type of transpose to create

integer(kind=int64), intent(in) :: base_storage

Base storage

type(create_args), intent(in) :: kwargs

Additional arguments

private subroutine execute(self, in, out, kwargs, error_code)

Executes transpose - exchange - unpack

Arguments

Type IntentOptional Attributes Name
class(transpose_handle_generic), intent(inout) :: self

Generic Transpose Handle

real(kind=real32), intent(inout) :: in(:)

Send pointer

real(kind=real32), intent(inout) :: out(:)

Recv pointer

type(execute_args), intent(inout) :: kwargs

Additional arguments

integer(kind=int32), intent(out) :: error_code

Error code

private subroutine execute_end(self, kwargs, error_code)

Ends execution of transposition

Arguments

Type IntentOptional Attributes Name
class(transpose_handle_generic), intent(inout) :: self

Generic Transpose Handle

type(execute_args), intent(inout) :: kwargs

Additional arguments

integer(kind=int32), intent(out) :: error_code

Error code

private subroutine destroy(self)

Destroys Generic Transpose Handle

Arguments

Type IntentOptional Attributes Name
class(transpose_handle_generic), intent(inout) :: self

Generic Transpose Handle