dtfft_transpose_plan Module

This module describes transpose_plan class


Uses

  • module~~dtfft_transpose_plan~~UsesGraph module~dtfft_transpose_plan dtfft_transpose_plan iso_c_binding iso_c_binding module~dtfft_transpose_plan->iso_c_binding iso_fortran_env iso_fortran_env module~dtfft_transpose_plan->iso_fortran_env module~dtfft_abstract_backend dtfft_abstract_backend module~dtfft_transpose_plan->module~dtfft_abstract_backend module~dtfft_abstract_transpose_handle dtfft_abstract_transpose_handle module~dtfft_transpose_plan->module~dtfft_abstract_transpose_handle module~dtfft_config dtfft_config module~dtfft_transpose_plan->module~dtfft_config module~dtfft_errors dtfft_errors module~dtfft_transpose_plan->module~dtfft_errors module~dtfft_interface_cuda dtfft_interface_cuda module~dtfft_transpose_plan->module~dtfft_interface_cuda module~dtfft_interface_cuda_runtime dtfft_interface_cuda_runtime module~dtfft_transpose_plan->module~dtfft_interface_cuda_runtime module~dtfft_interface_nccl dtfft_interface_nccl module~dtfft_transpose_plan->module~dtfft_interface_nccl module~dtfft_interface_nvrtc dtfft_interface_nvrtc module~dtfft_transpose_plan->module~dtfft_interface_nvrtc module~dtfft_interface_nvshmem dtfft_interface_nvshmem module~dtfft_transpose_plan->module~dtfft_interface_nvshmem module~dtfft_interface_nvtx dtfft_interface_nvtx module~dtfft_transpose_plan->module~dtfft_interface_nvtx module~dtfft_kernel_device dtfft_kernel_device module~dtfft_transpose_plan->module~dtfft_kernel_device module~dtfft_parameters dtfft_parameters module~dtfft_transpose_plan->module~dtfft_parameters module~dtfft_pencil dtfft_pencil module~dtfft_transpose_plan->module~dtfft_pencil module~dtfft_transpose_handle_datatype dtfft_transpose_handle_datatype module~dtfft_transpose_plan->module~dtfft_transpose_handle_datatype module~dtfft_transpose_handle_generic dtfft_transpose_handle_generic module~dtfft_transpose_plan->module~dtfft_transpose_handle_generic module~dtfft_utils dtfft_utils module~dtfft_transpose_plan->module~dtfft_utils mpi_f08 mpi_f08 module~dtfft_transpose_plan->mpi_f08 module~dtfft_abstract_backend->iso_c_binding module~dtfft_abstract_backend->iso_fortran_env module~dtfft_abstract_backend->module~dtfft_config module~dtfft_abstract_backend->module~dtfft_errors module~dtfft_abstract_backend->module~dtfft_interface_cuda_runtime module~dtfft_abstract_backend->module~dtfft_interface_nccl module~dtfft_abstract_backend->module~dtfft_parameters module~dtfft_abstract_backend->module~dtfft_pencil module~dtfft_abstract_backend->module~dtfft_utils module~dtfft_abstract_backend->mpi_f08 module~dtfft_abstract_kernel dtfft_abstract_kernel module~dtfft_abstract_backend->module~dtfft_abstract_kernel module~dtfft_abstract_transpose_handle->iso_fortran_env module~dtfft_abstract_transpose_handle->module~dtfft_abstract_backend module~dtfft_abstract_transpose_handle->module~dtfft_parameters module~dtfft_abstract_transpose_handle->module~dtfft_pencil module~dtfft_abstract_transpose_handle->mpi_f08 module~dtfft_config->iso_c_binding module~dtfft_config->iso_fortran_env module~dtfft_config->module~dtfft_errors module~dtfft_config->module~dtfft_interface_cuda_runtime module~dtfft_config->module~dtfft_parameters module~dtfft_config->module~dtfft_utils module~dtfft_config->mpi_f08 module~dtfft_errors->iso_fortran_env module~dtfft_interface_cuda->iso_c_binding module~dtfft_interface_cuda->iso_fortran_env module~dtfft_interface_cuda->module~dtfft_errors module~dtfft_interface_cuda->module~dtfft_parameters module~dtfft_interface_cuda->module~dtfft_utils module~dtfft_interface_cuda_runtime->iso_c_binding module~dtfft_interface_cuda_runtime->module~dtfft_parameters module~dtfft_interface_cuda_runtime->module~dtfft_utils module~dtfft_interface_nccl->iso_c_binding module~dtfft_interface_nccl->module~dtfft_parameters module~dtfft_interface_nccl->module~dtfft_utils module~dtfft_interface_nvrtc->iso_c_binding module~dtfft_interface_nvrtc->iso_fortran_env module~dtfft_interface_nvrtc->module~dtfft_errors module~dtfft_interface_nvrtc->module~dtfft_utils module~dtfft_interface_nvshmem->iso_c_binding module~dtfft_interface_nvshmem->iso_fortran_env module~dtfft_interface_nvshmem->module~dtfft_parameters module~dtfft_interface_nvshmem->module~dtfft_utils module~dtfft_interface_nvshmem->mpi_f08 module~dtfft_interface_nvtx->iso_c_binding module~dtfft_interface_nvtx->module~dtfft_utils module~dtfft_kernel_device->iso_c_binding module~dtfft_kernel_device->iso_fortran_env module~dtfft_kernel_device->module~dtfft_config module~dtfft_kernel_device->module~dtfft_interface_cuda module~dtfft_kernel_device->module~dtfft_interface_cuda_runtime module~dtfft_kernel_device->module~dtfft_interface_nvtx module~dtfft_kernel_device->module~dtfft_parameters module~dtfft_kernel_device->module~dtfft_utils module~dtfft_kernel_device->mpi_f08 module~dtfft_kernel_device->module~dtfft_abstract_kernel module~dtfft_nvrtc_block_optimizer dtfft_nvrtc_block_optimizer module~dtfft_kernel_device->module~dtfft_nvrtc_block_optimizer module~dtfft_nvrtc_module_cache dtfft_nvrtc_module_cache module~dtfft_kernel_device->module~dtfft_nvrtc_module_cache module~dtfft_parameters->iso_c_binding module~dtfft_parameters->iso_fortran_env module~dtfft_parameters->mpi_f08 module~dtfft_pencil->iso_c_binding module~dtfft_pencil->iso_fortran_env module~dtfft_pencil->module~dtfft_errors module~dtfft_pencil->module~dtfft_interface_cuda_runtime module~dtfft_pencil->module~dtfft_parameters module~dtfft_pencil->module~dtfft_utils module~dtfft_pencil->mpi_f08 module~dtfft_transpose_handle_datatype->iso_fortran_env module~dtfft_transpose_handle_datatype->module~dtfft_abstract_transpose_handle module~dtfft_transpose_handle_datatype->module~dtfft_errors module~dtfft_transpose_handle_datatype->module~dtfft_interface_nvtx module~dtfft_transpose_handle_datatype->module~dtfft_parameters module~dtfft_transpose_handle_datatype->module~dtfft_pencil module~dtfft_transpose_handle_datatype->module~dtfft_utils module~dtfft_transpose_handle_datatype->mpi_f08 module~dtfft_transpose_handle_generic->iso_c_binding module~dtfft_transpose_handle_generic->iso_fortran_env module~dtfft_transpose_handle_generic->module~dtfft_abstract_backend module~dtfft_transpose_handle_generic->module~dtfft_abstract_transpose_handle module~dtfft_transpose_handle_generic->module~dtfft_errors module~dtfft_transpose_handle_generic->module~dtfft_kernel_device module~dtfft_transpose_handle_generic->module~dtfft_parameters module~dtfft_transpose_handle_generic->module~dtfft_pencil module~dtfft_transpose_handle_generic->module~dtfft_utils module~dtfft_transpose_handle_generic->mpi_f08 module~dtfft_transpose_handle_generic->module~dtfft_abstract_kernel module~dtfft_backend_cufftmp_m dtfft_backend_cufftmp_m module~dtfft_transpose_handle_generic->module~dtfft_backend_cufftmp_m module~dtfft_backend_mpi dtfft_backend_mpi module~dtfft_transpose_handle_generic->module~dtfft_backend_mpi module~dtfft_backend_nccl_m dtfft_backend_nccl_m module~dtfft_transpose_handle_generic->module~dtfft_backend_nccl_m module~dtfft_kernel_host dtfft_kernel_host module~dtfft_transpose_handle_generic->module~dtfft_kernel_host module~dtfft_utils->iso_c_binding module~dtfft_utils->iso_fortran_env module~dtfft_utils->module~dtfft_errors module~dtfft_utils->module~dtfft_parameters module~dtfft_utils->mpi_f08 module~dtfft_abstract_kernel->iso_fortran_env module~dtfft_abstract_kernel->module~dtfft_interface_nvtx module~dtfft_abstract_kernel->module~dtfft_parameters module~dtfft_abstract_kernel->module~dtfft_utils module~dtfft_abstract_kernel->mpi_f08 module~dtfft_backend_cufftmp_m->iso_c_binding module~dtfft_backend_cufftmp_m->iso_fortran_env module~dtfft_backend_cufftmp_m->module~dtfft_abstract_backend module~dtfft_backend_cufftmp_m->module~dtfft_errors module~dtfft_backend_cufftmp_m->module~dtfft_interface_cuda_runtime module~dtfft_backend_cufftmp_m->module~dtfft_interface_nvshmem module~dtfft_backend_cufftmp_m->module~dtfft_parameters module~dtfft_backend_cufftmp_m->module~dtfft_pencil module~dtfft_backend_cufftmp_m->module~dtfft_utils module~dtfft_backend_cufftmp_m->mpi_f08 module~dtfft_interface_cufft dtfft_interface_cufft module~dtfft_backend_cufftmp_m->module~dtfft_interface_cufft module~dtfft_backend_mpi->iso_c_binding module~dtfft_backend_mpi->iso_fortran_env module~dtfft_backend_mpi->module~dtfft_abstract_backend module~dtfft_backend_mpi->module~dtfft_errors module~dtfft_backend_mpi->module~dtfft_interface_cuda_runtime module~dtfft_backend_mpi->module~dtfft_interface_nvtx module~dtfft_backend_mpi->module~dtfft_parameters module~dtfft_backend_mpi->module~dtfft_utils module~dtfft_backend_mpi->mpi_f08 module~dtfft_backend_nccl_m->iso_c_binding module~dtfft_backend_nccl_m->iso_fortran_env module~dtfft_backend_nccl_m->module~dtfft_abstract_backend module~dtfft_backend_nccl_m->module~dtfft_errors module~dtfft_backend_nccl_m->module~dtfft_interface_cuda_runtime module~dtfft_backend_nccl_m->module~dtfft_interface_nccl module~dtfft_backend_nccl_m->module~dtfft_parameters module~dtfft_backend_nccl_m->module~dtfft_utils module~dtfft_backend_nccl_m->mpi_f08 module~dtfft_kernel_host->iso_c_binding module~dtfft_kernel_host->iso_fortran_env module~dtfft_kernel_host->module~dtfft_config module~dtfft_kernel_host->module~dtfft_interface_nvtx module~dtfft_kernel_host->module~dtfft_parameters module~dtfft_kernel_host->module~dtfft_utils module~dtfft_kernel_host->mpi_f08 module~dtfft_kernel_host->module~dtfft_abstract_kernel module~dtfft_nvrtc_block_optimizer->iso_fortran_env module~dtfft_nvrtc_block_optimizer->module~dtfft_config module~dtfft_nvrtc_block_optimizer->module~dtfft_interface_cuda module~dtfft_nvrtc_block_optimizer->module~dtfft_interface_cuda_runtime module~dtfft_nvrtc_block_optimizer->module~dtfft_parameters module~dtfft_nvrtc_block_optimizer->module~dtfft_utils module~dtfft_nvrtc_block_optimizer->module~dtfft_abstract_kernel module~dtfft_nvrtc_module_cache->iso_c_binding module~dtfft_nvrtc_module_cache->iso_fortran_env module~dtfft_nvrtc_module_cache->module~dtfft_config module~dtfft_nvrtc_module_cache->module~dtfft_interface_cuda module~dtfft_nvrtc_module_cache->module~dtfft_interface_cuda_runtime module~dtfft_nvrtc_module_cache->module~dtfft_utils module~dtfft_nvrtc_module_cache->module~dtfft_abstract_kernel module~dtfft_nvrtc_module_cache->module~dtfft_nvrtc_block_optimizer module~dtfft_nvrtc_module dtfft_nvrtc_module module~dtfft_nvrtc_module_cache->module~dtfft_nvrtc_module module~dtfft_interface_cufft->iso_c_binding module~dtfft_interface_cufft->iso_fortran_env module~dtfft_interface_cufft->module~dtfft_parameters module~dtfft_interface_cufft->module~dtfft_utils module~dtfft_nvrtc_module->iso_c_binding module~dtfft_nvrtc_module->iso_fortran_env module~dtfft_nvrtc_module->module~dtfft_config module~dtfft_nvrtc_module->module~dtfft_interface_cuda module~dtfft_nvrtc_module->module~dtfft_interface_cuda_runtime module~dtfft_nvrtc_module->module~dtfft_interface_nvrtc module~dtfft_nvrtc_module->module~dtfft_interface_nvtx module~dtfft_nvrtc_module->module~dtfft_parameters module~dtfft_nvrtc_module->module~dtfft_utils module~dtfft_nvrtc_module->mpi_f08 module~dtfft_nvrtc_module->module~dtfft_abstract_kernel module~dtfft_nvrtc_module->module~dtfft_nvrtc_block_optimizer

Used by

  • module~~dtfft_transpose_plan~~UsedByGraph module~dtfft_transpose_plan dtfft_transpose_plan module~dtfft_plan dtfft_plan module~dtfft_plan->module~dtfft_transpose_plan module~dtfft dtfft module~dtfft->module~dtfft_plan module~dtfft_api dtfft_api module~dtfft_api->module~dtfft_plan

Variables

Type Visibility Attributes Name Initial
integer(kind=int8), private, save :: FORWARD_PLAN_IDS(3)

Default data types for forward transpositions

integer(kind=int8), private, save :: BACKWARD_PLAN_IDS(3)

Default data types for backward transpositions

logical, private, save :: ARE_DATATYPES_SET = .false.

Are default data types set


Derived Types

type, public ::  transpose_plan

Transpose Plan class This class is a container for transposition plans

Components

Type Visibility Attributes Name Initial
type(dtfft_backend_t), private :: backend

Backend

type(backend_helper), private :: helper

Backend helper

logical, private :: is_z_slab

Z-slab optimization flag (for 3D transforms)

integer(kind=int64), private :: min_buffer_size

Minimal buffer size for transposition

type(dtfft_platform_t), private :: platform

Platform used for transposition

type(dtfft_stream_t), private :: stream

CUDA stream

type(c_ptr), private :: aux

Auxiliary memory

real(kind=real32), private, pointer :: paux(:)

Pointer to auxiliary memory

logical, private :: is_aux_alloc = .false.

Is auxiliary memory allocated

type(plan_t), private, allocatable :: plans(:)

Plans for each transposition

Type-Bound Procedures

procedure, public, non_overridable, pass(self) :: create ../../

Creates transpose plan

procedure, public, non_overridable, pass(self) :: execute ../../

Executes transposition

procedure, public, non_overridable, pass(self) :: execute_end ../../

Finishes asynchronous transposition

procedure, public, non_overridable, pass(self) :: get_async_active ../../

Returns .true. if any of the plans is running asynchronously

procedure, public, non_overridable, pass(self) :: destroy ../../

Destroys transpose plan

procedure, public, non_overridable, pass(self) :: get_aux_size ../../

Returns auxiliary buffer size

procedure, public, non_overridable, pass(self) :: get_backend ../../

Returns backend id

procedure, public, non_overridable, pass(self) :: get_z_slab ../../

Returns .true. if Z-slab optimization is enabled

procedure, public, non_overridable, pass(self) :: mem_alloc ../../

Allocates memory based on selected backend

procedure, public, non_overridable, pass(self) :: mem_free ../../

Frees memory allocated with mem_alloc

type, private ::  plan_t

This type is a container for allocatable transpose handles

Components

Type Visibility Attributes Name Initial
class(abstract_transpose_handle), public, allocatable :: p

Transpose handle


Functions

private function create(self, platform, dims, base_comm, effort, base_dtype, base_storage, cart_comm, comms, pencils, ipencil) result(error_code)

Creates transposition plan

Arguments

Type IntentOptional Attributes Name
class(transpose_plan), intent(inout) :: self

Transposition class

type(dtfft_platform_t), intent(in) :: platform

Platform to create plan for

integer(kind=int32), intent(in) :: dims(:)

Global sizes of the transform requested

type(MPI_Comm), intent(in) :: base_comm

Base communicator

type(dtfft_effort_t), intent(in) :: effort

dtFFT planner type of effort

type(MPI_Datatype), intent(in) :: base_dtype

Base MPI_Datatype

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

type(MPI_Comm), intent(out) :: cart_comm

Cartesian communicator

type(MPI_Comm), intent(out) :: comms(:)

Array of 1d communicators

type(pencil), intent(out) :: pencils(:)

Data distributing meta

type(pencil_init), intent(in), optional :: ipencil

Pencil passed by user

Return Value integer(kind=int32)

Error code

private function get_async_active(self)

Returns .true. if any of the plans is running asynchronously

Arguments

Type IntentOptional Attributes Name
class(transpose_plan), intent(in) :: self

Transposition class

Return Value logical

private function get_z_slab(self)

Returns .true. if Z-slab optimization is enabled

Arguments

Type IntentOptional Attributes Name
class(transpose_plan), intent(in) :: self

Transposition class

Return Value logical

private function autotune_transpose_id(helper, from, to, base_dtype, base_storage, transpose_name_id, a, b, forward_id, backward_id) result(elapsed_time)

Creates forward and backward transpose plans for backend DTFFT_BACKEND_MPI_DATATYPE based on source and target data distributions and, executes them DTFFT_MEASURE_ITERS times ( 4 * DTFFT_MEASURE_ITERS iterations total ) + 4 * DTFFT_MEASURE_WARMUP_ITERS warmup iterations

Read more…

Arguments

Type IntentOptional Attributes Name
type(backend_helper), intent(inout) :: helper

Backend helper

type(pencil), intent(in) :: from

Source meta

type(pencil), intent(in) :: to

Target meta

type(MPI_Datatype), intent(in) :: base_dtype

Basic MPI Datatype

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store Basic MPI Datatype

integer(kind=int8), intent(in) :: transpose_name_id

ID of transpose name (from -3 to 3, except 0)

real(kind=real32), intent(inout) :: a(:)

Source buffer

real(kind=real32), intent(inout) :: b(:)

Target buffer

integer(kind=int8), intent(out) :: forward_id

Best forward plan ID

integer(kind=int8), intent(out) :: backward_id

Best backward plan ID

Return Value real(kind=real32)

Elapsed time for best plans selected

private function get_plan_execution_time(helper, from, to, base_dtype, base_storage, datatype_id, transpose_name_id, a, b) result(elapsed_time)

Creates transpose plan for backend DTFFT_BACKEND_MPI_DATATYPE and executes it DTFFT_MEASURE_WARMUP_ITERS + DTFFT_MEASURE_ITERS times

Read more…

Arguments

Type IntentOptional Attributes Name
type(backend_helper), intent(inout) :: helper

Backend helper

type(pencil), intent(in) :: from

Source meta

type(pencil), intent(in) :: to

Target meta

type(MPI_Datatype), intent(in) :: base_dtype

Basic MPI Datatype

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store Basic MPI Datatype

integer(kind=int8), intent(in) :: datatype_id

ID of transpose (1 or 2)

integer(kind=int8), intent(in) :: transpose_name_id

ID of transpose name (from -3 to 3, except 0)

real(kind=real32), intent(inout) :: a(:)

Source buffer

real(kind=real32), intent(inout) :: b(:)

Target buffer

Return Value real(kind=real32)

Execution time [ms]

private function report_timings(comm, elapsed_time, n_iters, space_count) result(max_time)

Arguments

Type IntentOptional Attributes Name
type(MPI_Comm), intent(in) :: comm
real(kind=real32), intent(in) :: elapsed_time
integer(kind=int32), intent(in) :: n_iters
integer(kind=int32), intent(in), optional :: space_count

Return Value real(kind=real32)

private function get_aux_size(self) result(aux_size)

Returns maximum auxiliary memory size needed by transpose plan

Arguments

Type IntentOptional Attributes Name
class(transpose_plan), intent(in) :: self

Transposition class

Return Value integer(kind=int64)

private function get_aux_size_generic(plans) result(aux_size)

Returns maximum auxiliary memory size needed by plans

Arguments

Type IntentOptional Attributes Name
type(plan_t), intent(in) :: plans(:)

Transpose plans

Return Value integer(kind=int64)

Maximum auxiliary memory size needed

private function get_backend(self)

Returns plan GPU backend

Arguments

Type IntentOptional Attributes Name
class(transpose_plan), intent(in) :: self

Transposition class

Return Value type(dtfft_backend_t)


Subroutines

private subroutine execute(self, in, out, transpose_type, exec_type, error_code)

Executes transposition

Arguments

Type IntentOptional Attributes Name
class(transpose_plan), intent(inout) :: self

Transposition class

type(c_ptr), intent(in) :: in

Incoming buffer

type(c_ptr), intent(in) :: out

Resulting buffer

type(dtfft_transpose_t), intent(in) :: transpose_type

Type of transpose to execute

type(async_exec_t), intent(in) :: exec_type

Type of execution (sync/async)

integer(kind=int32), intent(out), optional :: error_code

Error code

private subroutine execute_end(self, in, out, transpose_type, error_code)

Finishes asynchronous transposition

Arguments

Type IntentOptional Attributes Name
class(transpose_plan), intent(inout) :: self

Transposition class

type(c_ptr), intent(in) :: in

Incoming buffer

type(c_ptr), intent(in) :: out

Resulting buffer

type(dtfft_transpose_t), intent(in) :: transpose_type

Type of transpose

integer(kind=int32), intent(out) :: error_code

Error code

private subroutine destroy(self)

Destroys transposition plans

Arguments

Type IntentOptional Attributes Name
class(transpose_plan), intent(inout) :: self

Transposition class

private subroutine allocate_plans(plans, backend)

Allocates array of plans

Arguments

Type IntentOptional Attributes Name
type(plan_t), intent(inout) :: plans(:)

Plans to allocate

type(dtfft_backend_t), intent(in) :: backend

Backend to use

private subroutine destroy_plans(plans)

Destroys array of plans

Arguments

Type IntentOptional Attributes Name
type(plan_t), intent(inout) :: plans(:)

Plans to destroy

private subroutine autotune_grid_decomposition(platform, dims, transposed_dims, base_comm, effort, base_dtype, base_storage, stream, best_forward_ids, best_backward_ids, best_decomposition, backend, min_execution_time, best_backend)

Runs through all possible grid decompositions and selects the best one based on the lowest average execution time

Arguments

Type IntentOptional Attributes Name
type(dtfft_platform_t), intent(in) :: platform

Platform to use

integer(kind=int32), intent(in) :: dims(:)

Global sizes of the transform requested

integer(kind=int32), intent(in) :: transposed_dims(:,:)

Transposed dimensions

type(MPI_Comm), intent(in) :: base_comm

3D comm

type(dtfft_effort_t), intent(in) :: effort

How thoroughly dtFFT searches for the optimal plan

type(MPI_Datatype), intent(in) :: base_dtype

Base MPI_Datatype

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

type(dtfft_stream_t), intent(in) :: stream

Stream to use

integer(kind=int8), intent(inout) :: best_forward_ids(:)

Best Datatype ids for forward plan

integer(kind=int8), intent(inout) :: best_backward_ids(:)

Best Datatype ids for backward plan

integer(kind=int32), intent(out) :: best_decomposition(:)

Best decomposition found

type(dtfft_backend_t), intent(in), optional :: backend

GPU Backend to test. Should be passed only when effort is DTFFT_ESTIMATE or DTFFT_MEASURE

real(kind=real32), intent(out), optional :: min_execution_time

Elapsed time for best plan selected

type(dtfft_backend_t), intent(out), optional :: best_backend

Best backend selected

private subroutine autotune_grid(platform, dims, transposed_dims, base_comm, effort, base_dtype, comm_dims, base_storage, stream, is_z_slab, best_forward_ids, best_backward_ids, backend, best_time, best_backend)

Creates cartesian grid and runs various backends on it. Returns best backend and execution time

Arguments

Type IntentOptional Attributes Name
type(dtfft_platform_t), intent(in) :: platform

Platform to create plan for

integer(kind=int32), intent(in) :: dims(:)

Global sizes of the transform requested

integer(kind=int32), intent(in) :: transposed_dims(:,:)

Transposed dimensions

type(MPI_Comm), intent(in) :: base_comm

Basic communicator to create 3d grid from

type(dtfft_effort_t), intent(in) :: effort

How thoroughly dtFFT searches for the optimal plan

type(MPI_Datatype), intent(in) :: base_dtype

Base MPI_Datatype

integer(kind=int32), intent(in) :: comm_dims(:)

Number of processors in each dimension

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

type(dtfft_stream_t), intent(in) :: stream

Stream to use

logical, intent(in) :: is_z_slab

Is Z-slab optimization enabled

integer(kind=int8), intent(inout) :: best_forward_ids(:)

Best Datatype ids for forward plan

integer(kind=int8), intent(inout) :: best_backward_ids(:)

Best Datatype ids for backward plan

type(dtfft_backend_t), intent(in), optional :: backend

GPU Backend to test. Should be passed only when effort is DTFFT_ESTIMATE or DTFFT_MEASURE

real(kind=real32), intent(out), optional :: best_time

Elapsed time for best plan selected

type(dtfft_backend_t), intent(out), optional :: best_backend

Best backend selected for the grid

private subroutine run_autotune_backend(platform, comms, cart_comm, effort, base_dtype, pencils, base_storage, stream, is_z_slab, best_forward_ids, best_backward_ids, backend, best_time, best_backend)

Runs autotune for all backends Symmetric heap can be allocated after nvshmem_init, which is done during plan creation

Arguments

Type IntentOptional Attributes Name
type(dtfft_platform_t), intent(in) :: platform

Platform to create plan for

type(MPI_Comm), intent(in) :: comms(:)

1D comms

type(MPI_Comm), intent(in) :: cart_comm

3D Cartesian comm

type(dtfft_effort_t), intent(in) :: effort
type(MPI_Datatype), intent(in) :: base_dtype

Base MPI_Datatype

type(pencil), intent(in) :: pencils(:)

Source meta

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

type(dtfft_stream_t), intent(in) :: stream

Stream to use

logical, intent(in) :: is_z_slab

Is Z-slab optimization enabled

integer(kind=int8), intent(inout) :: best_forward_ids(:)

Best Datatype ids for forward plan

integer(kind=int8), intent(inout) :: best_backward_ids(:)

Best Datatype ids for backward plan

type(dtfft_backend_t), intent(in), optional :: backend

GPU Backend to test. Should be passed only when effort is DTFFT_ESTIMATE or DTFFT_MEASURE

real(kind=real32), intent(out), optional :: best_time

Elapsed time for best backend

type(dtfft_backend_t), intent(out), optional :: best_backend

Best backend selected

private subroutine run_autotune_datatypes(helper, base_dtype, pencils, base_storage, is_z_slab, best_forward_ids, best_backward_ids, a, b, elapsed_time)

Arguments

Type IntentOptional Attributes Name
type(backend_helper), intent(inout) :: helper
type(MPI_Datatype), intent(in) :: base_dtype

Base MPI_Datatype

type(pencil), intent(in) :: pencils(:)

Source meta

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

logical, intent(in) :: is_z_slab

Is Z-slab optimization enabled

integer(kind=int8), intent(out) :: best_forward_ids(:)

Best Datatype ids for forward plan

integer(kind=int8), intent(out) :: best_backward_ids(:)

Best Datatype ids for backward plan

real(kind=real32), intent(inout) :: a(:)

Source buffer

real(kind=real32), intent(inout) :: b(:)

Target buffer

real(kind=real32), intent(out) :: elapsed_time

Elapsed time for best plans selected in [ms]

private subroutine alloc_and_set_aux(platform, helper, backend, cart_comm, aux, paux, plans, is_aux_alloc)

Allocates auxiliary memory according to the backend and sets it to the plans

Arguments

Type IntentOptional Attributes Name
type(dtfft_platform_t), intent(in) :: platform
type(backend_helper), intent(inout) :: helper

Backend helper

type(dtfft_backend_t), intent(in) :: backend

GPU backend

type(MPI_Comm), intent(in) :: cart_comm

Cartesian communicator

type(c_ptr), intent(inout) :: aux

Allocatable auxiliary memory

real(kind=real32), intent(inout), pointer :: paux(:)

Pointer to auxiliary memory

type(plan_t), intent(in) :: plans(:)
logical :: is_aux_alloc

Is auxiliary memory allocated

private subroutine create_pencils_and_comm(transposed_dims, old_comm, comm_dims, comm, local_comms, pencils, ipencil)

Creates cartesian communicator

Arguments

Type IntentOptional Attributes Name
integer(kind=int32), intent(in) :: transposed_dims(:,:)

Global counts in transposed coordinates

type(MPI_Comm), intent(in) :: old_comm

Communicator to create cartesian from

integer(kind=int32), intent(in) :: comm_dims(:)

Dims in cartesian communicator

type(MPI_Comm), intent(out) :: comm

Cartesian communicator

type(MPI_Comm), intent(out) :: local_comms(:)

1d communicators in cartesian communicator

type(pencil), intent(out) :: pencils(:)

Data distributing meta

type(pencil_init), intent(in), optional :: ipencil

Pencil passed by user

private subroutine create_cart_comm(old_comm, comm_dims, comm, local_comms, ipencil)

Creates cartesian communicator

Arguments

Type IntentOptional Attributes Name
type(MPI_Comm), intent(in) :: old_comm

Communicator to create cartesian from

integer(kind=int32), intent(in) :: comm_dims(:)

Dims in cartesian communicator

type(MPI_Comm), intent(out) :: comm

Cartesian communicator

type(MPI_Comm), intent(out) :: local_comms(:)

1d communicators in cartesian communicator

type(pencil_init), intent(in), optional :: ipencil

Pencil passed by user

private subroutine mem_alloc(self, comm, alloc_bytes, ptr, error_code)

Allocates memory based on selected backend

Arguments

Type IntentOptional Attributes Name
class(transpose_plan), intent(inout) :: self

Transposition class

type(MPI_Comm), intent(in) :: comm

MPI communicator

integer(kind=int64), intent(in) :: alloc_bytes

Number of bytes to allocate

type(c_ptr), intent(out) :: ptr

Pointer to the allocated memory

integer(kind=int32), intent(out) :: error_code

Error code

private subroutine mem_free(self, ptr, error_code)

Frees memory allocated with mem_alloc

Arguments

Type IntentOptional Attributes Name
class(transpose_plan), intent(inout) :: self

Transposition class

type(c_ptr), intent(in) :: ptr

Pointer to the memory to free

integer(kind=int32), intent(out) :: error_code

Error code

private subroutine alloc_mem(platform, helper, backend, comm, alloc_bytes, ptr, error_code)

Allocates memory based on backend

Arguments

Type IntentOptional Attributes Name
type(dtfft_platform_t), intent(in) :: platform
type(backend_helper), intent(inout) :: helper

Backend helper

type(dtfft_backend_t), intent(in) :: backend

GPU backend

type(MPI_Comm), intent(in) :: comm

MPI communicator

integer(kind=int64), intent(in) :: alloc_bytes

Number of bytes to allocate

type(c_ptr), intent(out) :: ptr

Pointer to the allocated memory

integer(kind=int32), intent(out) :: error_code

Error code

private subroutine free_mem(platform, helper, backend, ptr, error_code)

Frees memory based on backend

Arguments

Type IntentOptional Attributes Name
type(dtfft_platform_t), intent(in) :: platform
type(backend_helper), intent(inout) :: helper

Backend helper

type(dtfft_backend_t), intent(in) :: backend

GPU backend

type(c_ptr), intent(in) :: ptr

Pointer to the memory to free

integer(kind=int32), intent(out) :: error_code

Error code