dtfft_transpose_plan_cuda Module

This module describes transpose_plan_cuda class


Uses

  • module~~dtfft_transpose_plan_cuda~~UsesGraph module~dtfft_transpose_plan_cuda dtfft_transpose_plan_cuda iso_c_binding iso_c_binding module~dtfft_transpose_plan_cuda->iso_c_binding iso_fortran_env iso_fortran_env module~dtfft_transpose_plan_cuda->iso_fortran_env module~dtfft_abstract_backend dtfft_abstract_backend module~dtfft_transpose_plan_cuda->module~dtfft_abstract_backend module~dtfft_abstract_transpose_plan dtfft_abstract_transpose_plan module~dtfft_transpose_plan_cuda->module~dtfft_abstract_transpose_plan module~dtfft_config dtfft_config module~dtfft_transpose_plan_cuda->module~dtfft_config module~dtfft_interface_cuda dtfft_interface_cuda module~dtfft_transpose_plan_cuda->module~dtfft_interface_cuda module~dtfft_interface_cuda_runtime dtfft_interface_cuda_runtime module~dtfft_transpose_plan_cuda->module~dtfft_interface_cuda_runtime module~dtfft_interface_nvrtc dtfft_interface_nvrtc module~dtfft_transpose_plan_cuda->module~dtfft_interface_nvrtc module~dtfft_interface_nvtx dtfft_interface_nvtx module~dtfft_transpose_plan_cuda->module~dtfft_interface_nvtx module~dtfft_nvrtc_kernel dtfft_nvrtc_kernel module~dtfft_transpose_plan_cuda->module~dtfft_nvrtc_kernel module~dtfft_parameters dtfft_parameters module~dtfft_transpose_plan_cuda->module~dtfft_parameters module~dtfft_pencil dtfft_pencil module~dtfft_transpose_plan_cuda->module~dtfft_pencil module~dtfft_transpose_handle_cuda dtfft_transpose_handle_cuda module~dtfft_transpose_plan_cuda->module~dtfft_transpose_handle_cuda module~dtfft_utils dtfft_utils module~dtfft_transpose_plan_cuda->module~dtfft_utils mpi_f08 mpi_f08 module~dtfft_transpose_plan_cuda->mpi_f08 module~dtfft_abstract_backend->iso_c_binding module~dtfft_abstract_backend->iso_fortran_env module~dtfft_abstract_backend->module~dtfft_interface_cuda_runtime module~dtfft_abstract_backend->module~dtfft_nvrtc_kernel module~dtfft_abstract_backend->module~dtfft_parameters module~dtfft_abstract_backend->module~dtfft_pencil module~dtfft_abstract_backend->module~dtfft_utils module~dtfft_abstract_backend->mpi_f08 module~dtfft_interface_nccl dtfft_interface_nccl module~dtfft_abstract_backend->module~dtfft_interface_nccl module~dtfft_abstract_transpose_plan->iso_c_binding module~dtfft_abstract_transpose_plan->iso_fortran_env module~dtfft_abstract_transpose_plan->module~dtfft_abstract_backend module~dtfft_abstract_transpose_plan->module~dtfft_config module~dtfft_abstract_transpose_plan->module~dtfft_interface_cuda_runtime module~dtfft_abstract_transpose_plan->module~dtfft_interface_nvtx module~dtfft_abstract_transpose_plan->module~dtfft_nvrtc_kernel module~dtfft_abstract_transpose_plan->module~dtfft_parameters module~dtfft_abstract_transpose_plan->module~dtfft_pencil module~dtfft_abstract_transpose_plan->module~dtfft_utils module~dtfft_abstract_transpose_plan->mpi_f08 module~dtfft_abstract_transpose_plan->module~dtfft_interface_nccl module~dtfft_interface_nvshmem dtfft_interface_nvshmem module~dtfft_abstract_transpose_plan->module~dtfft_interface_nvshmem module~dtfft_config->iso_c_binding module~dtfft_config->iso_fortran_env module~dtfft_config->module~dtfft_interface_cuda_runtime module~dtfft_config->module~dtfft_parameters module~dtfft_config->module~dtfft_utils module~dtfft_config->mpi_f08 module~dtfft_interface_cuda->iso_c_binding module~dtfft_interface_cuda->iso_fortran_env module~dtfft_interface_cuda->module~dtfft_interface_cuda_runtime module~dtfft_interface_cuda->module~dtfft_parameters module~dtfft_interface_cuda->module~dtfft_utils module~dtfft_interface_cuda_runtime->iso_c_binding module~dtfft_interface_cuda_runtime->module~dtfft_parameters module~dtfft_interface_cuda_runtime->module~dtfft_utils module~dtfft_interface_nvrtc->iso_c_binding module~dtfft_interface_nvrtc->iso_fortran_env module~dtfft_interface_nvrtc->module~dtfft_interface_cuda_runtime module~dtfft_interface_nvrtc->module~dtfft_parameters module~dtfft_interface_nvrtc->module~dtfft_utils module~dtfft_interface_nvtx->iso_c_binding module~dtfft_interface_nvtx->module~dtfft_utils module~dtfft_nvrtc_kernel->iso_c_binding module~dtfft_nvrtc_kernel->iso_fortran_env module~dtfft_nvrtc_kernel->module~dtfft_interface_cuda module~dtfft_nvrtc_kernel->module~dtfft_interface_cuda_runtime module~dtfft_nvrtc_kernel->module~dtfft_interface_nvrtc module~dtfft_nvrtc_kernel->module~dtfft_interface_nvtx module~dtfft_nvrtc_kernel->module~dtfft_parameters module~dtfft_nvrtc_kernel->module~dtfft_utils module~dtfft_nvrtc_kernel->mpi_f08 module~dtfft_parameters->iso_c_binding module~dtfft_parameters->iso_fortran_env module~dtfft_parameters->mpi_f08 module~dtfft_pencil->iso_c_binding module~dtfft_pencil->iso_fortran_env module~dtfft_pencil->module~dtfft_interface_cuda_runtime module~dtfft_pencil->module~dtfft_parameters module~dtfft_pencil->module~dtfft_utils module~dtfft_pencil->mpi_f08 module~dtfft_transpose_handle_cuda->iso_c_binding module~dtfft_transpose_handle_cuda->iso_fortran_env module~dtfft_transpose_handle_cuda->module~dtfft_abstract_backend module~dtfft_transpose_handle_cuda->module~dtfft_interface_cuda_runtime module~dtfft_transpose_handle_cuda->module~dtfft_interface_nvtx module~dtfft_transpose_handle_cuda->module~dtfft_nvrtc_kernel module~dtfft_transpose_handle_cuda->module~dtfft_parameters module~dtfft_transpose_handle_cuda->module~dtfft_pencil module~dtfft_transpose_handle_cuda->module~dtfft_utils module~dtfft_transpose_handle_cuda->mpi_f08 module~dtfft_backend_cufftmp_m dtfft_backend_cufftmp_m module~dtfft_transpose_handle_cuda->module~dtfft_backend_cufftmp_m module~dtfft_backend_mpi dtfft_backend_mpi module~dtfft_transpose_handle_cuda->module~dtfft_backend_mpi module~dtfft_backend_nccl_m dtfft_backend_nccl_m module~dtfft_transpose_handle_cuda->module~dtfft_backend_nccl_m module~dtfft_utils->iso_c_binding module~dtfft_utils->iso_fortran_env module~dtfft_utils->module~dtfft_parameters module~dtfft_utils->mpi_f08 module~dtfft_backend_cufftmp_m->iso_c_binding module~dtfft_backend_cufftmp_m->iso_fortran_env module~dtfft_backend_cufftmp_m->module~dtfft_abstract_backend module~dtfft_backend_cufftmp_m->module~dtfft_interface_cuda_runtime module~dtfft_backend_cufftmp_m->module~dtfft_parameters module~dtfft_backend_cufftmp_m->module~dtfft_pencil module~dtfft_backend_cufftmp_m->module~dtfft_utils module~dtfft_backend_cufftmp_m->mpi_f08 module~dtfft_backend_cufftmp_m->module~dtfft_interface_nvshmem module~dtfft_interface_cufft dtfft_interface_cufft module~dtfft_backend_cufftmp_m->module~dtfft_interface_cufft module~dtfft_backend_mpi->iso_c_binding module~dtfft_backend_mpi->iso_fortran_env module~dtfft_backend_mpi->module~dtfft_abstract_backend module~dtfft_backend_mpi->module~dtfft_interface_cuda_runtime module~dtfft_backend_mpi->module~dtfft_parameters module~dtfft_backend_mpi->module~dtfft_utils module~dtfft_backend_mpi->mpi_f08 module~dtfft_backend_nccl_m->iso_c_binding module~dtfft_backend_nccl_m->iso_fortran_env module~dtfft_backend_nccl_m->module~dtfft_abstract_backend module~dtfft_backend_nccl_m->module~dtfft_interface_cuda_runtime module~dtfft_backend_nccl_m->module~dtfft_parameters module~dtfft_backend_nccl_m->module~dtfft_utils module~dtfft_backend_nccl_m->mpi_f08 module~dtfft_backend_nccl_m->module~dtfft_interface_nccl module~dtfft_interface_nccl->iso_c_binding module~dtfft_interface_nccl->module~dtfft_parameters module~dtfft_interface_nccl->module~dtfft_utils module~dtfft_interface_nvshmem->iso_c_binding module~dtfft_interface_nvshmem->iso_fortran_env module~dtfft_interface_nvshmem->module~dtfft_parameters module~dtfft_interface_nvshmem->module~dtfft_utils module~dtfft_interface_cufft->iso_c_binding module~dtfft_interface_cufft->iso_fortran_env module~dtfft_interface_cufft->module~dtfft_parameters module~dtfft_interface_cufft->module~dtfft_utils

Used by

  • module~~dtfft_transpose_plan_cuda~~UsedByGraph module~dtfft_transpose_plan_cuda dtfft_transpose_plan_cuda module~dtfft_plan dtfft_plan module~dtfft_plan->module~dtfft_transpose_plan_cuda module~dtfft dtfft module~dtfft->module~dtfft_plan module~dtfft_api dtfft_api module~dtfft_api->module~dtfft_plan

Variables

Type Visibility Attributes Name Initial
real(kind=real32), private, parameter :: MaxR4P = huge(1._real32)

Maximum value of real32


Derived Types

type, public, extends(abstract_transpose_plan) ::  transpose_plan_cuda

CUDA Transpose Plan

Components

Type Visibility Attributes Name Initial
type(dtfft_backend_t), public :: backend = DTFFT_BACKEND_MPI_DATATYPE

GPU backend

type(backend_helper), public :: helper

Backend helper

logical, public :: is_z_slab

Z-slab optimization flag (for 3D transforms)

integer(kind=int64), public :: min_buffer_size

Minimal buffer size for transposition

type(dtfft_stream_t), private :: stream

CUDA stream

type(c_ptr), private :: aux

Auxiliary memory

real(kind=real32), private, pointer :: paux(:)

Pointer to auxiliary memory

logical, private :: is_aux_alloc

Is auxiliary memory allocated

type(transpose_handle_cuda), private, allocatable :: fplans(:)

Forward transposition plans

type(transpose_handle_cuda), private, allocatable :: bplans(:)

Backward transposition plans

Type-Bound Procedures

procedure, public, pass(self) :: create ../../

Create transposition plan

procedure, public, pass(self) :: execute ../../

Executes transposition

procedure, public, non_overridable, pass(self) :: get_backend ../../

Returns backend id

procedure, public, non_overridable, pass(self) :: mem_alloc ../../

Allocates memory based on selected backend

procedure, public, non_overridable, pass(self) :: mem_free ../../

Frees memory allocated with mem_alloc

procedure, public :: create_private => create_cuda ../../

Creates CUDA transpose plan

procedure, public :: execute_private => execute_cuda ../../

Executes single transposition

procedure, public :: destroy => destroy_cuda ../../

Destroys CUDA transpose plan


Functions

private function create_cuda(self, dims, transposed_dims, base_comm, comm_dims, effort, base_dtype, base_storage, is_custom_cart_comm, cart_comm, comms, pencils)

Creates CUDA transpose plan

Arguments

Type IntentOptional Attributes Name
class(transpose_plan_cuda), intent(inout) :: self

GPU transpose plan

integer(kind=int32), intent(in) :: dims(:)

Global sizes of the transform requested

integer(kind=int32), intent(in) :: transposed_dims(:,:)

Transposed dimensions

type(MPI_Comm), intent(in) :: base_comm

Base communicator

integer(kind=int32), intent(in) :: comm_dims(:)

Number of processors in each dimension

type(dtfft_effort_t), intent(in) :: effort

How thoroughly dtFFT searches for the optimal plan

type(MPI_Datatype), intent(in) :: base_dtype

Base MPI_Datatype

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

logical, intent(in) :: is_custom_cart_comm

is custom Cartesian communicator provided by user

type(MPI_Comm), intent(out) :: cart_comm

Cartesian communicator

type(MPI_Comm), intent(out) :: comms(:)

Array of 1d communicators

type(pencil), intent(out) :: pencils(:)

Data distributing meta

Return Value integer(kind=int32)

private function alloc_and_set_aux(helper, backend, cart_comm, aux, paux, plans, bplans) result(is_aux_alloc)

Allocates auxiliary memory according to the backend and sets it to the plans

Arguments

Type IntentOptional Attributes Name
type(backend_helper), intent(inout) :: helper

Backend helper

type(dtfft_backend_t), intent(in) :: backend

GPU backend

type(MPI_Comm), intent(in) :: cart_comm

Cartesian communicator

type(c_ptr), intent(inout) :: aux

Allocatable auxiliary memory

real(kind=real32), intent(inout), pointer :: paux(:)

Pointer to auxiliary memory

type(transpose_handle_cuda), intent(inout) :: plans(:)

Plans

type(transpose_handle_cuda), intent(inout), optional :: bplans(:)

Backward plans

Return Value logical

Is auxiliary memory allocated


Subroutines

private subroutine execute_cuda(self, in, out, transpose_type)

Executes single transposition

Arguments

Type IntentOptional Attributes Name
class(transpose_plan_cuda), intent(inout) :: self

Transposition class

real(kind=real32), intent(inout) :: in(:)

Incoming buffer

real(kind=real32), intent(inout) :: out(:)

Resulting buffer

type(dtfft_transpose_t), intent(in) :: transpose_type

Type of transpose to execute

private subroutine destroy_cuda(self)

Destroys transposition plans

Arguments

Type IntentOptional Attributes Name
class(transpose_plan_cuda), intent(inout) :: self

Transposition class

private subroutine autotune_grid_decomposition(dims, transposed_dims, base_comm, base_storage, stream, best_decomposition, backend, min_execution_time, best_backend)

Runs through all possible grid decompositions and selects the best one based on the lowest average execution time

Arguments

Type IntentOptional Attributes Name
integer(kind=int32), intent(in) :: dims(:)

Global sizes of the transform requested

integer(kind=int32), intent(in) :: transposed_dims(:,:)

Transposed dimensions

type(MPI_Comm), intent(in) :: base_comm

3D comm

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

type(dtfft_stream_t), intent(in) :: stream

Stream to use

integer(kind=int32), intent(out) :: best_decomposition(:)

Best decomposition found

type(dtfft_backend_t), intent(in), optional :: backend

GPU Backend to test. Should be passed only when effort is DTFFT_ESTIMATE or DTFFT_MEASURE

real(kind=real32), intent(out), optional :: min_execution_time

Elapsed time for best plan selected

type(dtfft_backend_t), intent(out), optional :: best_backend

Best backend selected

private subroutine autotune_grid(dims, transposed_dims, base_comm, comm_dims, base_storage, is_z_slab, stream, backend, best_time, best_backend)

Creates cartesian grid and runs various backends on it. Can return best backend and execution time

Arguments

Type IntentOptional Attributes Name
integer(kind=int32), intent(in) :: dims(:)

Global sizes of the transform requested

integer(kind=int32), intent(in) :: transposed_dims(:,:)

Transposed dimensions

type(MPI_Comm), intent(in) :: base_comm

Basic communicator to create 3d grid from

integer(kind=int32), intent(in) :: comm_dims(:)

Number of processors in each dimension

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

logical, intent(in) :: is_z_slab

Is Z-slab optimization enabled

type(dtfft_stream_t), intent(in) :: stream

Stream to use

type(dtfft_backend_t), intent(in), optional :: backend

GPU Backend to test. Should be passed only when effort is DTFFT_ESTIMATE or DTFFT_MEASURE

real(kind=real32), intent(out), optional :: best_time

Elapsed time for best plan selected

type(dtfft_backend_t), intent(out), optional :: best_backend

Best backend selected for the grid

private subroutine run_autotune_backend(comms, cart_comm, pencils, base_storage, stream, is_z_slab, backend, best_time, best_backend)

Runs autotune for all backends

Arguments

Type IntentOptional Attributes Name
type(MPI_Comm), intent(in) :: comms(:)

1D comms

type(MPI_Comm), intent(in) :: cart_comm

3D Cartesian comm

type(pencil), intent(in) :: pencils(:)

Source meta

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

type(dtfft_stream_t), intent(in) :: stream

Stream to use

logical, intent(in) :: is_z_slab

Is Z-slab optimization enabled

type(dtfft_backend_t), intent(in), optional :: backend

GPU Backend to test. Should be passed only when effort is DTFFT_ESTIMATE or DTFFT_MEASURE

real(kind=real32), intent(out), optional :: best_time

Elapsed time for best backend

type(dtfft_backend_t), intent(out), optional :: best_backend

Best backend selected