This module describes transpose_plan_cuda class
Type | Visibility | Attributes | Name | Initial | |||
---|---|---|---|---|---|---|---|
real(kind=real32), | private, | parameter | :: | MaxR4P | = | huge(1._real32) |
Maximum value of real32 |
CUDA Transpose Plan
Type | Visibility | Attributes | Name | Initial | |||
---|---|---|---|---|---|---|---|
type(dtfft_backend_t), | public | :: | backend | = | DTFFT_BACKEND_MPI_DATATYPE |
GPU backend |
|
type(backend_helper), | public | :: | helper |
Backend helper |
|||
logical, | public | :: | is_z_slab |
Z-slab optimization flag (for 3D transforms) |
|||
integer(kind=int64), | public | :: | min_buffer_size |
Minimal buffer size for transposition |
|||
type(dtfft_stream_t), | private | :: | stream |
CUDA stream |
|||
type(c_ptr), | private | :: | aux |
Auxiliary memory |
|||
real(kind=real32), | private, | pointer | :: | paux(:) |
Pointer to auxiliary memory |
||
logical, | private | :: | is_aux_alloc |
Is auxiliary memory allocated |
|||
type(transpose_handle_cuda), | private, | allocatable | :: | fplans(:) |
Forward transposition plans |
||
type(transpose_handle_cuda), | private, | allocatable | :: | bplans(:) |
Backward transposition plans |
procedure, public, pass(self) :: create | ../../ Create transposition plan |
procedure, public, pass(self) :: execute | ../../ Executes transposition |
procedure, public, non_overridable, pass(self) :: get_backend | ../../ Returns backend id |
procedure, public, non_overridable, pass(self) :: mem_alloc | ../../ Allocates memory based on selected backend |
procedure, public, non_overridable, pass(self) :: mem_free | ../../ Frees memory allocated with mem_alloc |
procedure, public :: create_private => create_cuda | ../../ Creates CUDA transpose plan |
procedure, public :: execute_private => execute_cuda | ../../ Executes single transposition |
procedure, public :: destroy => destroy_cuda | ../../ Destroys CUDA transpose plan |
Creates CUDA transpose plan
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
class(transpose_plan_cuda), | intent(inout) | :: | self |
GPU transpose plan |
||
integer(kind=int32), | intent(in) | :: | dims(:) |
Global sizes of the transform requested |
||
integer(kind=int32), | intent(in) | :: | transposed_dims(:,:) |
Transposed dimensions |
||
type(MPI_Comm), | intent(in) | :: | base_comm |
Base communicator |
||
integer(kind=int32), | intent(in) | :: | comm_dims(:) |
Number of processors in each dimension |
||
type(dtfft_effort_t), | intent(in) | :: | effort |
How thoroughly |
||
type(MPI_Datatype), | intent(in) | :: | base_dtype |
Base MPI_Datatype |
||
integer(kind=int64), | intent(in) | :: | base_storage |
Number of bytes needed to store single element |
||
logical, | intent(in) | :: | is_custom_cart_comm |
is custom Cartesian communicator provided by user |
||
type(MPI_Comm), | intent(out) | :: | cart_comm |
Cartesian communicator |
||
type(MPI_Comm), | intent(out) | :: | comms(:) |
Array of 1d communicators |
||
type(pencil), | intent(out) | :: | pencils(:) |
Data distributing meta |
Allocates auxiliary memory according to the backend and sets it to the plans
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(backend_helper), | intent(inout) | :: | helper |
Backend helper |
||
type(dtfft_backend_t), | intent(in) | :: | backend |
GPU backend |
||
type(MPI_Comm), | intent(in) | :: | cart_comm |
Cartesian communicator |
||
type(c_ptr), | intent(inout) | :: | aux |
Allocatable auxiliary memory |
||
real(kind=real32), | intent(inout), | pointer | :: | paux(:) |
Pointer to auxiliary memory |
|
type(transpose_handle_cuda), | intent(inout) | :: | plans(:) |
Plans |
||
type(transpose_handle_cuda), | intent(inout), | optional | :: | bplans(:) |
Backward plans |
Is auxiliary memory allocated
Executes single transposition
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
class(transpose_plan_cuda), | intent(inout) | :: | self |
Transposition class |
||
real(kind=real32), | intent(inout) | :: | in(:) |
Incoming buffer |
||
real(kind=real32), | intent(inout) | :: | out(:) |
Resulting buffer |
||
type(dtfft_transpose_t), | intent(in) | :: | transpose_type |
Type of transpose to execute |
Destroys transposition plans
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
class(transpose_plan_cuda), | intent(inout) | :: | self |
Transposition class |
Runs through all possible grid decompositions and selects the best one based on the lowest average execution time
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
integer(kind=int32), | intent(in) | :: | dims(:) |
Global sizes of the transform requested |
||
integer(kind=int32), | intent(in) | :: | transposed_dims(:,:) |
Transposed dimensions |
||
type(MPI_Comm), | intent(in) | :: | base_comm |
3D comm |
||
integer(kind=int64), | intent(in) | :: | base_storage |
Number of bytes needed to store single element |
||
type(dtfft_stream_t), | intent(in) | :: | stream |
Stream to use |
||
integer(kind=int32), | intent(out) | :: | best_decomposition(:) |
Best decomposition found |
||
type(dtfft_backend_t), | intent(in), | optional | :: | backend |
GPU Backend to test. Should be passed only when effort is |
|
real(kind=real32), | intent(out), | optional | :: | min_execution_time |
Elapsed time for best plan selected |
|
type(dtfft_backend_t), | intent(out), | optional | :: | best_backend |
Best backend selected |
Creates cartesian grid and runs various backends on it. Can return best backend and execution time
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
integer(kind=int32), | intent(in) | :: | dims(:) |
Global sizes of the transform requested |
||
integer(kind=int32), | intent(in) | :: | transposed_dims(:,:) |
Transposed dimensions |
||
type(MPI_Comm), | intent(in) | :: | base_comm |
Basic communicator to create 3d grid from |
||
integer(kind=int32), | intent(in) | :: | comm_dims(:) |
Number of processors in each dimension |
||
integer(kind=int64), | intent(in) | :: | base_storage |
Number of bytes needed to store single element |
||
logical, | intent(in) | :: | is_z_slab |
Is Z-slab optimization enabled |
||
type(dtfft_stream_t), | intent(in) | :: | stream |
Stream to use |
||
type(dtfft_backend_t), | intent(in), | optional | :: | backend |
GPU Backend to test. Should be passed only when effort is |
|
real(kind=real32), | intent(out), | optional | :: | best_time |
Elapsed time for best plan selected |
|
type(dtfft_backend_t), | intent(out), | optional | :: | best_backend |
Best backend selected for the grid |
Runs autotune for all backends
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(MPI_Comm), | intent(in) | :: | comms(:) |
1D comms |
||
type(MPI_Comm), | intent(in) | :: | cart_comm |
3D Cartesian comm |
||
type(pencil), | intent(in) | :: | pencils(:) |
Source meta |
||
integer(kind=int64), | intent(in) | :: | base_storage |
Number of bytes needed to store single element |
||
type(dtfft_stream_t), | intent(in) | :: | stream |
Stream to use |
||
logical, | intent(in) | :: | is_z_slab |
Is Z-slab optimization enabled |
||
type(dtfft_backend_t), | intent(in), | optional | :: | backend |
GPU Backend to test. Should be passed only when effort is |
|
real(kind=real32), | intent(out), | optional | :: | best_time |
Elapsed time for best backend |
|
type(dtfft_backend_t), | intent(out), | optional | :: | best_backend |
Best backend selected |