Procedures

ProcedureLocationProcedure TypeDescription
add dtfft_nvrtc_kernel_cache Subroutine

Adds new entry to cache

add_line dtfft_nvrtc_kernel_generator Subroutine

Adds new line to CUDA code

aligned_alloc dtfft_utils Interface
alloc_and_set_aux dtfft_transpose_plan_cuda Function

Allocates auxiliary memory according to the backend and sets it to the plans

alloc_fft_plans dtfft_plan Subroutine

Allocates abstract_executor with required FFT class and populates fft_mapping with similar FFT ids

alloc_mem dtfft_abstract_transpose_plan Subroutine

Allocates memory based on backend

astring_f2c dtfft_utils Subroutine

Convert Fortran string to C allocatable string

autotune_grid dtfft_transpose_plan_host Subroutine

Creates cartesian communicator and executes various datatypes on it

autotune_grid dtfft_transpose_plan_cuda Subroutine

Creates cartesian grid and runs various backends on it. Can return best backend and execution time

autotune_grid_decomposition dtfft_transpose_plan_host Subroutine

Runs through all possible grid decompositions and selects the best one based on the lowest average execution time

autotune_grid_decomposition dtfft_transpose_plan_cuda Subroutine

Runs through all possible grid decompositions and selects the best one based on the lowest average execution time

autotune_mpi_datatypes dtfft_transpose_plan_host Subroutine
autotune_transpose_id dtfft_transpose_plan_host Function

Creates forward and backward transpose plans bases on source and target data distributing, executes them DTFFT_MEASURE_ITERS times ( 4 * DTFFT_MEASURE_ITERS iterations total )

Read more…
check_aux dtfft_plan Subroutine

Checks if aux buffer was passed by user and if not will allocate one internally

check_continuity dtfft_pencil Function

Check if the local pencils cover the global space without gaps

check_create_args dtfft_plan Function

Check arguments provided by user and sets private variables

check_device_pointers dtfft_plan Function

Checks if device pointers are provided by user

check_if_even dtfft_pencil Function

Checks if data is evenly distributed across processes

check_overlap dtfft_pencil Function

Check if two pencols overlap in ndims-dimensional space

cleanup dtfft_nvrtc_kernel_cache Subroutine

Removes unused modules from cuda context

Comm_f2c dtfft_utils Interface
compile_and_cache dtfft_nvrtc_kernel Function

Compiles kernel stored in code and caches pointer to CUfunction

config_constructor dtfft_config Function

Creates a new configuration

count_bank_conflicts dtfft_nvrtc_block_optimizer Function

Counts bank conflicts for a given tile size, padding, element size, and block rows.

count_unique dtfft_utils Function

Count the number of unique elements in the array

create dtfft_pencil Subroutine

Creates pencil

create dtfft_abstract_executor Function

Creates FFT plan

create dtfft_executor_fftw_m Subroutine

Creates FFT plan via FFTW3 Interface

create dtfft_nvrtc_kernel Subroutine

Creates kernel

create dtfft_executor_vkfft_m Subroutine

Creates FFT plan via vkFFT Interface

create dtfft_abstract_transpose_plan Function

Creates transposition plans

create dtfft_transpose_handle_host Subroutine

Creates transpose_handle_host class

create dtfft_executor_mkl_m Subroutine

Creates FFT plan via MKL DFTI Interface

create dtfft_transpose_handle_cuda Subroutine

Creates CUDA Transpose Handle

create dtfft_abstract_backend Subroutine

Creates Abstract GPU Backend

create dtfft_executor_cufft_m Subroutine

Creates FFT plan via cuFFT Interface

create dtfft_nvrtc_kernel_cache Subroutine

Creates cache

create dtfft_backend_cufftmp_m Subroutine

Creates cuFFTMp GPU Backend

create_1d_comm dtfft_pencil Subroutine

Creates a new 1D communicator based on the fixed dimensions of the current pencil

create_c2c dtfft_plan Subroutine

C2C Plan Constructor

create_c2c_core dtfft_plan Function

Creates plan for both C2C and R2C

create_c2c_internal dtfft_plan Function

Private method that combines common logic for C2C plan creation

create_c2c_pencil dtfft_plan Subroutine

C2C Plan Constructor

create_cart_comm dtfft_abstract_transpose_plan Subroutine

Creates cartesian communicator

create_cuda dtfft_transpose_plan_cuda Function

Creates CUDA transpose plan

create_data_handle dtfft_transpose_handle_cuda Subroutine

Creates handle

create_device_pointer dtfft_nvrtc_kernel Subroutine

Allocates memory on a device and copies values to it.

create_handle dtfft_transpose_handle_host Subroutine

Creates transposition handle

create_helper dtfft_abstract_backend Subroutine

Creates helper

create_helper dtfft_backend_mpi Subroutine

Creates MPI helper

create_mpi dtfft_backend_mpi Subroutine

Creates MPI backend

create_nccl dtfft_backend_nccl_m Subroutine

Creates NCCL backend

create_nvtx_domain dtfft_interface_nvtx Subroutine

Creates a new NVTX domain

create_pencil_init dtfft_pencil Function

Creates and validates pencil passed by user to plan constructors

create_pencil_t dtfft_pencil Function

Creates pencil object, that can be used to create dtFFT plans

create_pencils_and_comm dtfft_abstract_transpose_plan Subroutine

Creates cartesian communicator

create_private dtfft_transpose_plan_host Function

Creates transposition plans

create_private dtfft_plan Function

Creates core

create_r2c dtfft_plan Subroutine

R2C Generic Plan Constructor

create_r2c_internal dtfft_plan Function

Private method that combines common logic for R2C plan creation

create_r2c_pencil dtfft_plan Subroutine

R2C Plan Constructor with pencil

create_r2r dtfft_plan Subroutine

R2R Plan Constructor

create_r2r_internal dtfft_plan Function

Creates plan for R2R plans

create_r2r_pencil dtfft_plan Subroutine

R2R Plan Constructor

create_transpose_2d dtfft_transpose_handle_host Subroutine

Creates two-dimensional transposition datatypes

create_transpose_XY dtfft_transpose_handle_host Subroutine

Creates three-dimensional X –> Y, Y –> X transposition datatypes

create_transpose_XZ dtfft_transpose_handle_host Subroutine

Creates three-dimensional X –> Z transposition datatypes Can only be used with 3D slab decomposition when slabs are distributed in Z direction

create_transpose_YZ dtfft_transpose_handle_host Subroutine

Creates three-dimensional Y –> Z, Z –> Y transposition datatypes

create_transpose_ZX dtfft_transpose_handle_host Subroutine

Creates three-dimensional Z –> X transposition datatypes Can only be used with 3D slab decomposition when slabs are distributed in Z direction

cudaDeviceSynchronize dtfft_interface_cuda_runtime Interface
cudaEventCreate dtfft_interface_cuda_runtime Interface
cudaEventCreateWithFlags dtfft_interface_cuda_runtime Interface
cudaEventDestroy dtfft_interface_cuda_runtime Interface
cudaEventElapsedTime dtfft_interface_cuda_runtime Interface
cudaEventRecord dtfft_interface_cuda_runtime Interface
cudaEventSynchronize dtfft_interface_cuda_runtime Interface
cudaFree dtfft_interface_cuda_runtime Interface
cudaGetDevice dtfft_interface_cuda_runtime Interface
cudaGetDeviceCount dtfft_interface_cuda_runtime Interface
cudaGetErrorString dtfft_interface_cuda_runtime Function

Helper function that returns a string describing the given nvrtcResult code If the error code is not recognized, “unrecognized error code” is returned.

cudaGetErrorString_c dtfft_interface_cuda_runtime Interface
cudaGetLastError dtfft_interface_cuda_runtime Interface
cudaMalloc dtfft_interface_cuda_runtime Interface
cudaMemcpy dtfft_interface_cuda_runtime Interface

Copies data synchronously between host and device.

cudaMemcpyAsync dtfft_interface_cuda_runtime Interface

Copies data asynchronously between host and device.

cudaMemGetInfo dtfft_interface_cuda_runtime Interface
cudaMemset dtfft_interface_cuda_runtime Interface
cudaSetDevice dtfft_interface_cuda_runtime Interface
cudaStreamCreate dtfft_interface_cuda_runtime Interface
cudaStreamDestroy dtfft_interface_cuda_runtime Interface
cudaStreamQuery dtfft_interface_cuda_runtime Interface
cudaStreamSynchronize dtfft_interface_cuda_runtime Interface
cudaStreamWaitEvent dtfft_interface_cuda_runtime Interface
cufftDestroy dtfft_interface_cufft Interface

Frees all GPU resources associated with a cuFFT plan and destroys the internal plan data structure.

cufftGetErrorString dtfft_interface_cufft Function

Returns a string representation of the cuFFT error code.

cufftMpAttachReshapeComm dtfft_interface_cufft Interface

Attaches a communication handle to a reshape. This function is not collective.

cufftMpCreateReshape dtfft_interface_cufft Interface

Initializes a reshape handle for future use. This function is not collective.

cufftMpDestroyReshape dtfft_interface_cufft Interface

Destroys a reshape and all its associated data.

cufftMpExecReshapeAsync dtfft_interface_cufft Interface

Executes the reshape, redistributing data_in into data_out using the workspace in workspace.

cufftMpGetReshapeSize dtfft_interface_cufft Interface

Returns the amount (in bytes) of workspace required to execute the handle.

cufftMpMakeReshape dtfft_interface_cufft Interface

Creates a reshape intended to re-distribute a global array of 3D data.

cufftPlanMany dtfft_interface_cufft Interface

Creates a FFT plan configuration of dimension rank, with sizes specified in the array n.

cufftSetStream dtfft_interface_cufft Interface

Associates a CUDA stream with a cuFFT plan.

cufftXtExec dtfft_interface_cufft Interface

Executes any cuFFT transform regardless of precision and type. In case of complex-to-real and real-to-complex transforms, the direction parameter is ignored.

cuLaunchKernel dtfft_interface_cuda Function

Launches a CUDA kernel

destoy_helper dtfft_backend_mpi Subroutine

Destroys MPI helper

destroy dtfft_pencil Subroutine

Destroys pencil

destroy dtfft_abstract_executor Subroutine

Destroys plan

destroy dtfft_executor_fftw_m Subroutine

Destroys FFTW3 plan

destroy dtfft_nvrtc_kernel Subroutine

Destroys kernel

destroy dtfft_executor_vkfft_m Subroutine

Destroys vkFFT plan

destroy dtfft_transpose_handle_host Subroutine

Destroys transpose_handle_host class

destroy dtfft_transpose_plan_host Subroutine

Destroys transposition plans

destroy dtfft_executor_mkl_m Subroutine

Destroys MKL plan

destroy dtfft_transpose_handle_cuda Subroutine

Destroys CUDA Transpose Handle

destroy dtfft_abstract_backend Subroutine

Destroys Abstract GPU Backend

destroy dtfft_executor_cufft_m Subroutine

Destroys cuFFT plan

destroy dtfft_plan Subroutine

Destroys plan, frees all memory

destroy dtfft_backend_cufftmp_m Subroutine

Destroys cuFFTMp GPU Backend

destroy_code dtfft_nvrtc_kernel_generator Subroutine

Frees all memory

destroy_cuda dtfft_transpose_plan_cuda Subroutine

Destroys transposition plans

destroy_data_handle dtfft_transpose_handle_cuda Subroutine

Destroys handle

destroy_handle dtfft_transpose_handle_host Subroutine

Destroys transposition handle

destroy_helper dtfft_abstract_backend Subroutine

Destroys helper

destroy_mpi dtfft_backend_mpi Subroutine

Destroys MPI backend

destroy_nccl dtfft_backend_nccl_m Subroutine

Destroys NCCL backend

destroy_pencil_init dtfft_pencil Subroutine

Destroys pencil_init

destroy_pencil_t dtfft_pencil Subroutine

Destroys pencil

destroy_pencil_t_private dtfft_pencil Subroutine

Destroys pencil

destroy_stream dtfft_config Subroutine

Destroy the default stream if it was created

destroy_string dtfft_utils Subroutine
destroy_strings dtfft_utils Subroutine

Destroys array of string objects

DftiErrorMessage dtfft_interface_mkl_m Function

Generates an error message.

DftiErrorMessage_c dtfft_interface_mkl_m Interface
dl_error dtfft_utils Subroutine

Writes error message to the error unit

dlclose dtfft_utils Interface
dlerror dtfft_utils Interface
dlopen dtfft_utils Interface
dlsym dtfft_utils Interface
double_to_string dtfft_utils Function

Convert double to string

dtfft_config_t dtfft_config Interface

Interface to create a new configuration

dtfft_create_config dtfft_config Subroutine

Creates a new configuration and sets default values.

Read more…
dtfft_create_plan_c2c_c dtfft_api Function

Creates C2C dtFFT Plan, allocates all structures and prepares FFT, C/C++ interface

dtfft_create_plan_c2c_pencil_c dtfft_api Function

Creates C2C dtFFT plan from Pencil, allocates all structures and prepares FFT, C/C++/Python interface

dtfft_create_plan_r2r_c dtfft_api Function

Creates R2R dtFFT Plan, allocates all structures and prepares FFT, C/C++/Python interface

dtfft_create_plan_r2r_pencil_c dtfft_api Function

Creates R2R dtFFT Plan from Pencil, allocates all structures and prepares FFT, C/C++/Python interface

dtfft_destroy_c dtfft_api Function

Destroys dtFFT Plan, C/C++ interface

dtfft_execute_c dtfft_api Function

Executes dtFFT Plan, C/C++ interface. aux can be NULL.

dtfft_get_alloc_bytes_c dtfft_api Function

Returns minimum number of bytes required to execute plan, C/C++ interface

dtfft_get_alloc_size_c dtfft_api Function

Returns minimum number of bytes to be allocated for in and out buffers, C/C++ interface

dtfft_get_backend_c dtfft_api Function

Returns selected dtfft_backend_t during autotuning

dtfft_get_backend_string dtfft_parameters Function

Gets the string description of a GPU backend

dtfft_get_backend_string_c dtfft_api Subroutine

Returns string representation of dtfft_backend_t

dtfft_get_cuda_stream dtfft_parameters Function

Returns the CUDA stream from dtfft_stream_t

dtfft_get_dims_c dtfft_api Function

Returns dimensions of plan, C/C++ interface

dtfft_get_element_size_c dtfft_api Function

Returns size of element in bytes, C/C++ interface

dtfft_get_error_string dtfft_errors Function

Gets the string description of an error code

dtfft_get_error_string_c dtfft_api Subroutine

Returns an explaination of error_code that could have been previously returned by one of dtFFT API calls, C/C++ interface

dtfft_get_executor_c dtfft_api Function

Returns executor type used in plan, C/C++ interface

dtfft_get_executor_string dtfft_parameters Function

Gets the string description of an executor

dtfft_get_executor_string_c dtfft_api Subroutine
dtfft_get_local_sizes_c dtfft_api Function

Returns local sizes, counts in real and Fourier spaces and number of elements to be allocated for in and out buffers, C/C++ interface.

dtfft_get_pencil_c dtfft_api Function

Returns pencil decomposition info, C/C++ interface

dtfft_get_platform_c dtfft_api Function

Returns selected dtfft_platform_t during autotuning

dtfft_get_precision_c dtfft_api Function

Returns precision used in plan, C/C++ interface

dtfft_get_precision_string dtfft_parameters Function

Gets the string description of a precision

dtfft_get_precision_string_c dtfft_api Subroutine
dtfft_get_stream_c dtfft_api Function

Returns Stream associated with plan

dtfft_get_version dtfft_parameters Interface

Get dtFFT version

dtfft_get_version_current dtfft_parameters Function

Returns the current version code

dtfft_get_version_required dtfft_parameters Function

Returns the version code required by the user

dtfft_get_z_slab_enabled_c dtfft_api Function

Checks if dtFFT Plan is using Z-slab optimization

dtfft_mem_alloc_c dtfft_api Function

Allocates memory for dtFFT Plan, C/C++ interface

dtfft_mem_free_c dtfft_api Function

Frees memory for dtFFT Plan, C/C++ interface

dtfft_pencil_t dtfft_pencil Interface

Type bound constuctor for dtfft_pencil_t

dtfft_report_c dtfft_api Function

Reports dtFFT Plan, C/C++ interface

dtfft_set_config dtfft_config Subroutine

Sets configuration parameters

dtfft_set_config_c dtfft_api Function

Sets dtFFT configuration, C/C++ interface

dtfft_stream_t dtfft_parameters Interface

Creates dtfft_stream_t from integer(cuda_stream_kind)

dtfft_transpose_c dtfft_api Function

Executes single transposition, C/C++ interface.

dynamic_load dtfft_utils Function

Dynamically loads library and its symbols

effort_eq dtfft_parameters Function
effort_ne dtfft_parameters Function
estimate_bank_conflict_ratio dtfft_nvrtc_block_optimizer Function

Estimates the bank conflict ratio for a given kernel configuration

estimate_coalescing dtfft_nvrtc_block_optimizer Function

Estimate memory coalescing efficiency for a given kernel configuration and transpose type

estimate_memory_pressure dtfft_nvrtc_block_optimizer Function

Analytical estimation of memory pressure based on GPU architecture

estimate_occupancy dtfft_nvrtc_block_optimizer Function

Calculates theoretical occupancy for a given kernel configuration

estimate_optimal_padding dtfft_nvrtc_block_optimizer Function

Estimates the optimal padding for a given tile size and element size

evaluate_analytical_performance dtfft_nvrtc_block_optimizer Function

This function evaluates the performance of a kernel configuration based on various architectural and problem-specific parameters.

execute dtfft_abstract_executor Subroutine

Executes plan

execute dtfft_executor_fftw_m Subroutine

Executes FFTW3 plan

execute dtfft_nvrtc_kernel Subroutine

Executes kernel on stream

execute dtfft_executor_vkfft_m Subroutine

Executes vkFFT plan

execute dtfft_abstract_transpose_plan Subroutine

Executes single transposition

execute dtfft_transpose_handle_host Subroutine

Executes transposition

execute dtfft_executor_mkl_m Subroutine

Executes MKL plan

execute dtfft_transpose_handle_cuda Subroutine

Executes transpose - exchange - unpack

execute dtfft_abstract_backend Subroutine

Executes GPU Backend

execute dtfft_executor_cufft_m Subroutine

Executes cuFFT plan

execute dtfft_plan Subroutine

Executes plan

execute dtfft_backend_cufftmp_m Subroutine

Executes cuFFTMp GPU Backend

execute_cuda dtfft_transpose_plan_cuda Subroutine

Executes single transposition

execute_mpi dtfft_backend_mpi Subroutine

Executes MPI backend

execute_nccl dtfft_backend_nccl_m Subroutine

Executes NCCL backend

execute_private dtfft_transpose_plan_host Subroutine

Executes single transposition

execute_private dtfft_plan Subroutine

Executes plan with specified auxiliary buffer

execute_ptr dtfft_plan Subroutine

Executes plan using type(c_ptr) pointers instead of buffers

execute_type_eq dtfft_parameters Function
execute_type_ne dtfft_parameters Function
executor_eq dtfft_parameters Function
executor_ne dtfft_parameters Function
fftw_execute_dft dtfft_interface_fftw_m Interface
fftw_execute_dft_c2r dtfft_interface_fftw_m Interface
fftw_execute_dft_r2c dtfft_interface_fftw_m Interface
fftw_execute_r2r dtfft_interface_fftw_m Interface
fftw_plan_many_dft dtfft_interface_fftw_m Interface
fftw_plan_many_dft_c2r dtfft_interface_fftw_m Interface
fftw_plan_many_dft_r2c dtfft_interface_fftw_m Interface
fftw_plan_many_r2r dtfft_interface_fftw_m Interface
fftwf_execute_dft dtfft_interface_fftw_m Interface
fftwf_execute_dft_c2r dtfft_interface_fftw_m Interface
fftwf_execute_dft_r2c dtfft_interface_fftw_m Interface
fftwf_execute_r2r dtfft_interface_fftw_m Interface
fftwf_plan_many_dft dtfft_interface_fftw_m Interface
fftwf_plan_many_dft_c2r dtfft_interface_fftw_m Interface
fftwf_plan_many_dft_r2c dtfft_interface_fftw_m Interface
fftwf_plan_many_r2r dtfft_interface_fftw_m Interface
find_valid_combination dtfft_nvrtc_block_optimizer Subroutine

This subroutine optimizes the tile size and number of rows for narrow matrices by adjusting them to be compatible with the warp size.

float_to_string dtfft_utils Function

Convert double to string

free_datatypes dtfft_transpose_handle_host Subroutine

Frees temporary datatypes

free_mem dtfft_abstract_transpose_plan Subroutine

Frees memory based on backend

generate_candidates dtfft_nvrtc_block_optimizer Subroutine

Generate kernel configuration candidates for given problem

get dtfft_nvrtc_kernel_cache Function

Returns cached kernel if it exists. If not returns null pointer.

get_alloc_bytes dtfft_plan Function

Returns minimum number of bytes required to execute plan

get_alloc_size dtfft_plan Function

Wrapper around get_local_sizes to obtain number of elements only

get_ampere_architecture dtfft_nvrtc_block_optimizer Function

Ampere architecture (Compute Capability 8.0)

get_aux_size dtfft_abstract_transpose_plan Function
get_aux_size dtfft_transpose_plan_cuda Function
get_aux_size dtfft_transpose_handle_cuda Function

Returns number of bytes required by aux buffer

get_aux_size dtfft_abstract_backend Function

Returns number of bytes required by aux buffer

get_backend dtfft_abstract_transpose_plan Function

Returns plan GPU backend

get_backend dtfft_plan Function

Returns selected GPU backend during autotuning

get_code_init dtfft_nvrtc_kernel_generator Subroutine

Generates basic code that is used in all other kernels

get_comm dtfft_api Function
get_conf_backend dtfft_config Function

Returns GPU backend set by the user or default one

get_conf_configs_to_test dtfft_config Function

Returns the number of configurations to test

get_conf_forced_kernel_optimization dtfft_config Function

Whether forced kernel optimization is enabled or not

get_conf_internal dtfft_config Interface

Returns value from configuration unless environment variable is set

get_conf_internal_int32 dtfft_config Function

Returns value from configuration unless environment variable is set

get_conf_internal_logical dtfft_config Function

Returns value from configuration unless environment variable is set

get_conf_kernel_optimization_enabled dtfft_config Function

Whether kernel optimization is enabled or not

get_conf_log_enabled dtfft_config Function

Whether logging is enabled or not

get_conf_measure_iters dtfft_config Function

Returns the number of measurement iterations

get_conf_measure_warmup_iters dtfft_config Function

Returns the number of warmup iterations

get_conf_mpi_enabled dtfft_config Function

Whether MPI backends are enabled or not

get_conf_nccl_enabled dtfft_config Function

Whether NCCL backends are enabled or not

get_conf_nvshmem_enabled dtfft_config Function

Whether nvshmem backends are enabled or not

get_conf_pipelined_enabled dtfft_config Function

Whether pipelined backends are enabled or not

get_conf_platform dtfft_config Function

Returns platform set by the user or default one

get_conf_stream dtfft_config Function

Returns either the custom provided by user or creates a new one

get_conf_z_slab_enabled dtfft_config Function

Whether Z-slab optimization is enabled or not

get_contiguous_execution_blocks dtfft_nvrtc_kernel Subroutine

Gets the number of blocks and threads for a contiguous execution

get_datatype_from_env dtfft_config Function

Obtains datatype id from environment variable

get_device_props dtfft_interface_cuda_runtime Interface
get_dims dtfft_plan Subroutine

Returns global dimensions

get_element_size dtfft_plan Function

Returns number of bytes required to store single element.

get_env dtfft_config Interface

Obtains environment variable

get_env_base dtfft_config Function

Base function of obtaining dtFFT environment variable

get_env_int32 dtfft_config Function

Base Integer function of obtaining dtFFT environment variable

get_env_int8 dtfft_config Function

Obtains int8 environment variable

get_env_logical dtfft_config Function

Obtains logical environment variable

get_env_string dtfft_config Function

Obtains string environment variable

get_executor dtfft_plan Function

Returns FFT Executor associated with plan

get_inverse_kind dtfft_utils Function

Get the inverse R2R kind of transform for the given R2R kind

get_kernel dtfft_nvrtc_kernel Subroutine

Compiles kernel and caches it. Returns compiled kernel.

get_kernel_args dtfft_nvrtc_kernel Subroutine

Populates kernel arguments based on kernel type

get_local_size dtfft_pencil Subroutine

Computes local portions of data based on global count and position inside grid communicator

get_local_sizes dtfft_pencil Subroutine

Obtain local starts and counts in real and fourier spaces

get_local_sizes dtfft_plan Subroutine

Obtain local starts and counts in real and fourier spaces

get_neighbor_function_code dtfft_nvrtc_kernel_generator Subroutine

Generated device function that is used to determite id of process that to which data is being sent or from which data has been recieved based on local element coordinate

get_pencil dtfft_plan Function

Returns pencil decomposition

get_plan_execution_time dtfft_transpose_plan_host Function

Creates transpose plan and executes it DTFFT_MEASURE_WARMUP_ITERS + DTFFT_MEASURE_ITERS times

Read more…
get_platform dtfft_plan Function

Returns execution platform of the plan (HOST or CUDA)

get_precision dtfft_plan Function

Returns precision of the plan

get_stream_int64 dtfft_plan Subroutine

Returns CUDA stream associated with plan

get_stream_ptr dtfft_plan Subroutine

Returns CUDA stream associated with plan

get_transpose_kernel dtfft_nvrtc_kernel Subroutine
get_transpose_kernel_code dtfft_nvrtc_kernel_generator Function

Generates code that will be used to locally tranpose data and prepares to send it to other processes ndims == 2

get_transpose_type dtfft_pencil Function

Determines transpose ID based on pencils

get_true_transpose_type dtfft_nvrtc_kernel_cache Function

Returns generic transpose id. Since X-Y and Y-Z transpositions are symmectric, it returns only one of them. X-Z and Z-X are not symmetric

get_unpack_kernel_code dtfft_nvrtc_kernel_generator Function

Generates code that will be used to unpack data when it is recieved

get_unpack_pipelined_kernel_code dtfft_nvrtc_kernel_generator Function

Generates code that will be used to partially unpack data when it is recieved from other process

get_varying_dim dtfft_pencil Function
get_volta_architecture dtfft_nvrtc_block_optimizer Function

Volta architecture (Compute Capability 7.0)

get_z_slab_enabled dtfft_plan Function

Returns logical value is Z-slab optimization enabled internally

gpu_backend_eq dtfft_parameters Function
gpu_backend_ne dtfft_parameters Function
init_environment dtfft_config Subroutine
init_internal dtfft_config Function

Checks if MPI is initialized and loads environment variables

int32_to_string dtfft_utils Function

Convert 32-bit integer to string

int64_to_string dtfft_utils Function

Convert 64-bit integer to string

int8_to_string dtfft_utils Function

Convert 8-bit integer to string

is_backend_cufftmp dtfft_parameters Function
is_backend_mpi dtfft_parameters Function
is_backend_nccl dtfft_parameters Function
is_backend_nvshmem dtfft_parameters Function
is_backend_pipelined dtfft_parameters Function
is_cuda_executor dtfft_parameters Function
is_device_ptr dtfft_utils Interface
is_host_executor dtfft_parameters Function
is_null_funptr dtfft_utils Function

Checks if pointer is NULL

is_null_ptr dtfft_utils Function

Checks if pointer is NULL

is_null_ptr dtfft_utils Interface

Checks if pointer is NULL

is_nvshmem_ptr dtfft_interface_nvshmem Function

Checks if pointer is a symmetric nvshmem allocated pointer

is_same_ptr dtfft_utils Function

Checks if two pointer are the same

is_transpose_kernel dtfft_parameters Function
is_unpack_kernel dtfft_parameters Function
is_valid_comm_type dtfft_parameters Function
is_valid_dimension dtfft_parameters Function
is_valid_effort dtfft_parameters Function
is_valid_execute_type dtfft_parameters Function
is_valid_executor dtfft_parameters Function
is_valid_gpu_backend dtfft_parameters Function
is_valid_platform dtfft_parameters Function
is_valid_precision dtfft_parameters Function
is_valid_r2r_kind dtfft_parameters Function
is_valid_transpose_type dtfft_parameters Function
kernel_type_eq dtfft_parameters Function
kernel_type_ne dtfft_parameters Function
load dtfft_interface_vkfft_m Function

Loads VkFFT library

load_cuda dtfft_interface_cuda Function

Loads the CUDA Driver library and needed symbols

load_library dtfft_utils Function

Dynamically loads library

load_nvrtc dtfft_interface_nvrtc Function

Dynamically loads nvRTC library and its functions

load_symbol dtfft_utils Function

Dynamically loads symbol from library

load_vkfft dtfft_interface_vkfft_m Function

Loads VkFFT library based on the platform

make_plan dtfft_executor_mkl_m Subroutine

Creates general MKL plan

make_public dtfft_pencil Function

Creates public object that users can use to create own FFT backends

mem_alloc dtfft_executor_fftw_m Subroutine

Allocates FFTW3 memory

mem_alloc dtfft_executor_vkfft_m Subroutine

Dummy method. Raises error stop

mem_alloc dtfft_abstract_transpose_plan Subroutine

Allocates memory based on selected backend

mem_alloc dtfft_executor_mkl_m Subroutine

Allocates MKL memory

mem_alloc dtfft_executor_cufft_m Subroutine

Dummy method. Raises error stop

mem_alloc_c32_1d dtfft_plan Subroutine

Allocates pointer of rank 1

mem_alloc_c32_2d dtfft_plan Subroutine

Allocates pointer of rank 2

mem_alloc_c32_3d dtfft_plan Subroutine

Allocates pointer of rank 3

mem_alloc_c64_1d dtfft_plan Subroutine

Allocates pointer of rank 1

mem_alloc_c64_2d dtfft_plan Subroutine

Allocates pointer of rank 2

mem_alloc_c64_3d dtfft_plan Subroutine

Allocates pointer of rank 3

mem_alloc_host dtfft_utils Function

Allocates memory using C11 Standard alloc_align with 16 bytes alignment

mem_alloc_ptr dtfft_plan Subroutine

Allocates memory specific for this plan

mem_alloc_r32_1d dtfft_plan Subroutine

Allocates pointer of rank 1

mem_alloc_r32_2d dtfft_plan Subroutine

Allocates pointer of rank 2

mem_alloc_r32_3d dtfft_plan Subroutine

Allocates pointer of rank 3

mem_alloc_r64_1d dtfft_plan Subroutine

Allocates pointer of rank 1

mem_alloc_r64_2d dtfft_plan Subroutine

Allocates pointer of rank 2

mem_alloc_r64_3d dtfft_plan Subroutine

Allocates pointer of rank 3

mem_free dtfft_executor_fftw_m Subroutine

Frees FFTW3 aligned memory

mem_free dtfft_executor_vkfft_m Subroutine

Dummy method. Raises error stop

mem_free dtfft_abstract_transpose_plan Subroutine

Frees memory allocated with mem_alloc

mem_free dtfft_executor_mkl_m Subroutine

Frees MKL aligned memory

mem_free dtfft_executor_cufft_m Subroutine

Dummy method. Raises error stop

mem_free_c32_1d dtfft_plan Subroutine

Frees previously allocated memory specific for this plan

mem_free_c32_2d dtfft_plan Subroutine

Frees previously allocated memory specific for this plan

mem_free_c32_3d dtfft_plan Subroutine

Frees previously allocated memory specific for this plan

mem_free_c64_1d dtfft_plan Subroutine

Frees previously allocated memory specific for this plan

mem_free_c64_2d dtfft_plan Subroutine

Frees previously allocated memory specific for this plan

mem_free_c64_3d dtfft_plan Subroutine

Frees previously allocated memory specific for this plan

mem_free_host dtfft_utils Interface
mem_free_ptr dtfft_plan Subroutine

Frees previously allocated memory specific for this plan

mem_free_r32_1d dtfft_plan Subroutine

Frees previously allocated memory specific for this plan

mem_free_r32_2d dtfft_plan Subroutine

Frees previously allocated memory specific for this plan

mem_free_r32_3d dtfft_plan Subroutine

Frees previously allocated memory specific for this plan

mem_free_r64_1d dtfft_plan Subroutine

Frees previously allocated memory specific for this plan

mem_free_r64_2d dtfft_plan Subroutine

Frees previously allocated memory specific for this plan

mem_free_r64_3d dtfft_plan Subroutine

Frees previously allocated memory specific for this plan

mkl_dfti_commit_desc dtfft_interface_mkl_m Interface
mkl_dfti_create_desc dtfft_interface_mkl_m Interface
mkl_dfti_execute dtfft_interface_mkl_m Interface
mkl_dfti_free_desc dtfft_interface_mkl_m Interface
mkl_dfti_mem_alloc dtfft_interface_mkl_m Interface
mkl_dfti_mem_free dtfft_interface_mkl_m Interface
mkl_dfti_set_value dtfft_interface_mkl_m Interface

Sets one particular configuration parameter with the specified configuration value.

ncclCommDeregister dtfft_interface_nccl Interface

Deregister a buffer for collective communication.

ncclCommDestroy dtfft_interface_nccl Interface

Destroy a communicator object comm.

ncclCommInitRank dtfft_interface_nccl Interface

Creates a new communicator (multi thread/process version).

Read more…
ncclCommRegister dtfft_interface_nccl Interface

Register a buffer for collective communication.

ncclGetErrorString dtfft_interface_nccl Function

Generates an error message.

ncclGetErrorString_c dtfft_interface_nccl Interface

Returns a human-readable string corresponding to the passed error code.

ncclGetUniqueId dtfft_interface_nccl Interface

Generates an Id to be used in ncclCommInitRank. ncclGetUniqueId should be called once when creating a communicator and the Id should be distributed to all ranks in the communicator before calling ncclCommInitRank. uniqueId should point to a ncclUniqueId object allocated by the user.

ncclGroupEnd dtfft_interface_nccl Interface

End a group call.

Read more…
ncclGroupStart dtfft_interface_nccl Interface

Start a group call.

Read more…
ncclMemAlloc dtfft_interface_nccl Interface

Allocate a GPU buffer with size. Allocated buffer head address will be returned by ptr, and the actual allocated size can be larger than requested because of the buffer granularity requirements from all types of NCCL optimizations.

ncclMemFree dtfft_interface_nccl Interface

Free memory allocated by ncclMemAlloc().

ncclRecv dtfft_interface_nccl Interface

Receive data from rank peer into recvbuff.

Read more…
ncclSend dtfft_interface_nccl Interface

Send data from sendbuff to rank peer.

Read more…
nvrtcGetErrorString dtfft_interface_nvrtc Function

Helper function that returns a string describing the given nvrtcResult code For unrecognized enumeration values, it returns “NVRTC_ERROR unknown”

nvshmem_free dtfft_interface_nvshmem Interface
nvshmem_malloc dtfft_interface_nvshmem Interface
nvshmem_my_pe dtfft_interface_nvshmem Interface
nvshmem_ptr dtfft_interface_nvshmem Interface
nvshmemx_float_alltoall_on_stream dtfft_interface_nvshmem Interface
nvshmemx_init_status dtfft_interface_nvshmem Interface
nvshmemx_sync_all_on_stream dtfft_interface_nvshmem Interface
nvtxDomainCreate_c dtfft_interface_nvtx Interface
nvtxDomainRangePop_c dtfft_interface_nvtx Interface
nvtxDomainRangePushEx_c dtfft_interface_nvtx Interface
operator(/=) dtfft_parameters Interface
operator(==) dtfft_parameters Interface
pencil_c2f dtfft_pencil Subroutine

Converts C pencil to Fortran pencil

pencil_f2c dtfft_pencil Subroutine

Converts Fortran pencil to C pencil

platform_eq dtfft_parameters Function
platform_ne dtfft_parameters Function
pop_nvtx_domain_range dtfft_interface_nvtx Subroutine

Pops a range from the NVTX domain

precision_eq dtfft_parameters Function
precision_ne dtfft_parameters Function
push_nvtx_domain_range dtfft_interface_nvtx Subroutine

Pushes a range to the NVTX domain

r2r_kind_eq dtfft_parameters Function
r2r_kind_ne dtfft_parameters Function
remove dtfft_nvrtc_kernel_cache Subroutine

Takes CUDA kernel as an argument and searches for it in cache If kernel is found than reduces ref_count of such entry and kernel becomes a null pointer

report dtfft_plan Subroutine

Prints plan-related information to stdout

run_autotune_backend dtfft_transpose_plan_cuda Subroutine

Runs autotune for all backends

run_mpi_a2a dtfft_backend_mpi Subroutine

Executes MPI all-to-all communication

run_mpi_p2p dtfft_backend_mpi Subroutine

Executes MPI point-to-point communication

set_unpack_kernel dtfft_abstract_backend Subroutine

Sets unpack kernel for pipelined backend

sort_by_varying_dim dtfft_pencil Subroutine
sort_candidates_by_score dtfft_nvrtc_block_optimizer Subroutine

Sorting candidates by their performance scores

stream_from_int64 dtfft_parameters Function

Creates dtfft_stream_t from integer(cuda_stream_kind)

string dtfft_utils Interface

Creates string object

string_c2f dtfft_utils Subroutine

Convert C string to Fortran string

string_constructor dtfft_utils Function

Creates string object

string_f2c dtfft_utils Subroutine

Convert Fortran string to C string

to_cstr dtfft_nvrtc_kernel_generator Subroutine

Converts Fortran CUDA code to C pointer

to_str dtfft_utils Interface

Convert various types to string

transpose dtfft_plan Subroutine

Performs single transposition

Read more…
transpose_ptr dtfft_plan Subroutine

Performs single transposition using type(c_ptr) pointers instead of buffers

Read more…
transpose_type_eq dtfft_parameters Function
transpose_type_ne dtfft_parameters Function
unload_library dtfft_utils Subroutine

Unloads library

write_message dtfft_utils Subroutine

Write message to the specified unit

call~~graph~~CallGraph interface~aligned_alloc aligned_alloc interface~comm_f2c Comm_f2c interface~cudadevicesynchronize cudaDeviceSynchronize interface~cudaeventcreate cudaEventCreate interface~cudaeventcreatewithflags cudaEventCreateWithFlags interface~cudaeventdestroy cudaEventDestroy interface~cudaeventelapsedtime cudaEventElapsedTime interface~cudaeventrecord cudaEventRecord interface~cudaeventsynchronize cudaEventSynchronize interface~cudafree cudaFree interface~cudagetdevice cudaGetDevice interface~cudagetdevicecount cudaGetDeviceCount interface~cudageterrorstring_c cudaGetErrorString_c interface~cudagetlasterror cudaGetLastError interface~cudamalloc cudaMalloc interface~cudamemcpy cudaMemcpy interface~cudamemcpyasync cudaMemcpyAsync interface~cudamemgetinfo cudaMemGetInfo interface~cudamemset cudaMemset interface~cudasetdevice cudaSetDevice interface~cudastreamcreate cudaStreamCreate interface~cudastreamdestroy cudaStreamDestroy interface~cudastreamquery cudaStreamQuery interface~cudastreamsynchronize cudaStreamSynchronize interface~cudastreamwaitevent cudaStreamWaitEvent interface~cufftdestroy cufftDestroy interface~cufftmpattachreshapecomm cufftMpAttachReshapeComm interface~cufftmpcreatereshape cufftMpCreateReshape interface~cufftmpdestroyreshape cufftMpDestroyReshape interface~cufftmpexecreshapeasync cufftMpExecReshapeAsync interface~cufftmpgetreshapesize cufftMpGetReshapeSize interface~cufftmpmakereshape cufftMpMakeReshape interface~cufftplanmany cufftPlanMany interface~cufftsetstream cufftSetStream interface~cufftxtexec cufftXtExec interface~dftierrormessage_c DftiErrorMessage_c interface~dlclose dlclose interface~dlerror dlerror interface~dlopen dlopen interface~dlsym dlsym interface~dtfft_config_t dtfft_config_t proc~config_constructor config_constructor interface~dtfft_config_t->proc~config_constructor interface~dtfft_get_version dtfft_get_version proc~dtfft_get_version_current dtfft_get_version_current interface~dtfft_get_version->proc~dtfft_get_version_current proc~dtfft_get_version_required dtfft_get_version_required interface~dtfft_get_version->proc~dtfft_get_version_required interface~dtfft_pencil_t dtfft_pencil_t proc~create_pencil_t create_pencil_t interface~dtfft_pencil_t->proc~create_pencil_t interface~dtfft_stream_t dtfft_stream_t proc~stream_from_int64 stream_from_int64 interface~dtfft_stream_t->proc~stream_from_int64 interface~fftw_execute_dft fftw_execute_dft interface~fftw_execute_dft_c2r fftw_execute_dft_c2r interface~fftw_execute_dft_r2c fftw_execute_dft_r2c interface~fftw_execute_r2r fftw_execute_r2r interface~fftw_plan_many_dft fftw_plan_many_dft interface~fftw_plan_many_dft_c2r fftw_plan_many_dft_c2r interface~fftw_plan_many_dft_r2c fftw_plan_many_dft_r2c interface~fftw_plan_many_r2r fftw_plan_many_r2r interface~fftwf_execute_dft fftwf_execute_dft interface~fftwf_execute_dft_c2r fftwf_execute_dft_c2r interface~fftwf_execute_dft_r2c fftwf_execute_dft_r2c interface~fftwf_execute_r2r fftwf_execute_r2r interface~fftwf_plan_many_dft fftwf_plan_many_dft interface~fftwf_plan_many_dft_c2r fftwf_plan_many_dft_c2r interface~fftwf_plan_many_dft_r2c fftwf_plan_many_dft_r2c interface~fftwf_plan_many_r2r fftwf_plan_many_r2r interface~get_conf_internal get_conf_internal proc~get_conf_internal_int32 get_conf_internal_int32 interface~get_conf_internal->proc~get_conf_internal_int32 proc~get_conf_internal_logical get_conf_internal_logical interface~get_conf_internal->proc~get_conf_internal_logical interface~get_device_props get_device_props interface~get_env get_env proc~get_env_base get_env_base interface~get_env->proc~get_env_base proc~get_env_int32 get_env_int32 interface~get_env->proc~get_env_int32 proc~get_env_int8 get_env_int8 interface~get_env->proc~get_env_int8 proc~get_env_logical get_env_logical interface~get_env->proc~get_env_logical proc~get_env_string get_env_string interface~get_env->proc~get_env_string interface~is_device_ptr is_device_ptr interface~is_null_ptr is_null_ptr interface~is_null_ptr->interface~is_null_ptr proc~is_null_funptr is_null_funptr interface~is_null_ptr->proc~is_null_funptr interface~mem_free_host mem_free_host interface~mkl_dfti_commit_desc mkl_dfti_commit_desc interface~mkl_dfti_create_desc mkl_dfti_create_desc interface~mkl_dfti_execute mkl_dfti_execute interface~mkl_dfti_free_desc mkl_dfti_free_desc interface~mkl_dfti_mem_alloc mkl_dfti_mem_alloc interface~mkl_dfti_mem_free mkl_dfti_mem_free interface~mkl_dfti_set_value mkl_dfti_set_value interface~ncclcommderegister ncclCommDeregister interface~ncclcommdestroy ncclCommDestroy interface~ncclcomminitrank ncclCommInitRank interface~ncclcommregister ncclCommRegister interface~ncclgeterrorstring_c ncclGetErrorString_c interface~ncclgetuniqueid ncclGetUniqueId interface~ncclgroupend ncclGroupEnd interface~ncclgroupstart ncclGroupStart interface~ncclmemalloc ncclMemAlloc interface~ncclmemfree ncclMemFree interface~ncclrecv ncclRecv interface~ncclsend ncclSend interface~nvshmem_free nvshmem_free interface~nvshmem_malloc nvshmem_malloc interface~nvshmem_my_pe nvshmem_my_pe interface~nvshmem_ptr nvshmem_ptr interface~nvshmemx_float_alltoall_on_stream nvshmemx_float_alltoall_on_stream interface~nvshmemx_init_status nvshmemx_init_status interface~nvshmemx_sync_all_on_stream nvshmemx_sync_all_on_stream interface~nvtxdomaincreate_c nvtxDomainCreate_c interface~nvtxdomainrangepop_c nvtxDomainRangePop_c interface~nvtxdomainrangepushex_c nvtxDomainRangePushEx_c interface~operator(==) operator(==) proc~effort_eq effort_eq interface~operator(==)->proc~effort_eq proc~execute_type_eq execute_type_eq interface~operator(==)->proc~execute_type_eq proc~executor_eq executor_eq interface~operator(==)->proc~executor_eq proc~gpu_backend_eq gpu_backend_eq interface~operator(==)->proc~gpu_backend_eq proc~kernel_type_eq kernel_type_eq interface~operator(==)->proc~kernel_type_eq proc~platform_eq platform_eq interface~operator(==)->proc~platform_eq proc~precision_eq precision_eq interface~operator(==)->proc~precision_eq proc~r2r_kind_eq r2r_kind_eq interface~operator(==)->proc~r2r_kind_eq proc~transpose_type_eq transpose_type_eq interface~operator(==)->proc~transpose_type_eq interface~operator(SLASH=) operator(/=) proc~effort_ne effort_ne interface~operator(SLASH=)->proc~effort_ne proc~execute_type_ne execute_type_ne interface~operator(SLASH=)->proc~execute_type_ne proc~executor_ne executor_ne interface~operator(SLASH=)->proc~executor_ne proc~gpu_backend_ne gpu_backend_ne interface~operator(SLASH=)->proc~gpu_backend_ne proc~kernel_type_ne kernel_type_ne interface~operator(SLASH=)->proc~kernel_type_ne proc~platform_ne platform_ne interface~operator(SLASH=)->proc~platform_ne proc~precision_ne precision_ne interface~operator(SLASH=)->proc~precision_ne proc~r2r_kind_ne r2r_kind_ne interface~operator(SLASH=)->proc~r2r_kind_ne proc~transpose_type_ne transpose_type_ne interface~operator(SLASH=)->proc~transpose_type_ne interface~string string proc~string_constructor string_constructor interface~string->proc~string_constructor interface~to_str to_str proc~double_to_string double_to_string interface~to_str->proc~double_to_string proc~float_to_string float_to_string interface~to_str->proc~float_to_string proc~int32_to_string int32_to_string interface~to_str->proc~int32_to_string proc~int64_to_string int64_to_string interface~to_str->proc~int64_to_string proc~int8_to_string int8_to_string interface~to_str->proc~int8_to_string none~create~14 dtfft_plan_c2c_t%create proc~create_c2c dtfft_plan_c2c_t%create_c2c none~create~14->proc~create_c2c proc~create_c2c_pencil dtfft_plan_c2c_t%create_c2c_pencil none~create~14->proc~create_c2c_pencil none~create~15 dtfft_plan_r2c_t%create proc~create_r2c dtfft_plan_r2c_t%create_r2c none~create~15->proc~create_r2c proc~create_r2c_pencil dtfft_plan_r2c_t%create_r2c_pencil none~create~15->proc~create_r2c_pencil none~create~16 dtfft_plan_r2r_t%create proc~create_r2r dtfft_plan_r2r_t%create_r2r none~create~16->proc~create_r2r proc~create_r2r_pencil dtfft_plan_r2r_t%create_r2r_pencil none~create~16->proc~create_r2r_pencil none~get_stream dtfft_plan_t%get_stream proc~get_stream_int64 dtfft_plan_t%get_stream_int64 none~get_stream->proc~get_stream_int64 proc~get_stream_ptr dtfft_plan_t%get_stream_ptr none~get_stream->proc~get_stream_ptr none~get_stream~2 dtfft_core_c2c%get_stream none~get_stream~2->proc~get_stream_int64 none~get_stream~2->proc~get_stream_ptr none~get_stream~3 dtfft_plan_r2r_t%get_stream none~get_stream~3->proc~get_stream_int64 none~get_stream~3->proc~get_stream_ptr none~get_stream~4 dtfft_plan_c2c_t%get_stream none~get_stream~4->proc~get_stream_int64 none~get_stream~4->proc~get_stream_ptr none~get_stream~5 dtfft_plan_r2c_t%get_stream none~get_stream~5->proc~get_stream_int64 none~get_stream~5->proc~get_stream_ptr none~mem_alloc~10 dtfft_plan_c2c_t%mem_alloc proc~mem_alloc_c32_1d dtfft_plan_t%mem_alloc_c32_1d none~mem_alloc~10->proc~mem_alloc_c32_1d proc~mem_alloc_c32_2d dtfft_plan_t%mem_alloc_c32_2d none~mem_alloc~10->proc~mem_alloc_c32_2d proc~mem_alloc_c32_3d dtfft_plan_t%mem_alloc_c32_3d none~mem_alloc~10->proc~mem_alloc_c32_3d proc~mem_alloc_c64_1d dtfft_plan_t%mem_alloc_c64_1d none~mem_alloc~10->proc~mem_alloc_c64_1d proc~mem_alloc_c64_2d dtfft_plan_t%mem_alloc_c64_2d none~mem_alloc~10->proc~mem_alloc_c64_2d proc~mem_alloc_c64_3d dtfft_plan_t%mem_alloc_c64_3d none~mem_alloc~10->proc~mem_alloc_c64_3d proc~mem_alloc_r32_1d dtfft_plan_t%mem_alloc_r32_1d none~mem_alloc~10->proc~mem_alloc_r32_1d proc~mem_alloc_r32_2d dtfft_plan_t%mem_alloc_r32_2d none~mem_alloc~10->proc~mem_alloc_r32_2d proc~mem_alloc_r32_3d dtfft_plan_t%mem_alloc_r32_3d none~mem_alloc~10->proc~mem_alloc_r32_3d proc~mem_alloc_r64_1d dtfft_plan_t%mem_alloc_r64_1d none~mem_alloc~10->proc~mem_alloc_r64_1d proc~mem_alloc_r64_2d dtfft_plan_t%mem_alloc_r64_2d none~mem_alloc~10->proc~mem_alloc_r64_2d proc~mem_alloc_r64_3d dtfft_plan_t%mem_alloc_r64_3d none~mem_alloc~10->proc~mem_alloc_r64_3d none~mem_alloc~11 dtfft_plan_r2c_t%mem_alloc none~mem_alloc~11->proc~mem_alloc_c32_1d none~mem_alloc~11->proc~mem_alloc_c32_2d none~mem_alloc~11->proc~mem_alloc_c32_3d none~mem_alloc~11->proc~mem_alloc_c64_1d none~mem_alloc~11->proc~mem_alloc_c64_2d none~mem_alloc~11->proc~mem_alloc_c64_3d none~mem_alloc~11->proc~mem_alloc_r32_1d none~mem_alloc~11->proc~mem_alloc_r32_2d none~mem_alloc~11->proc~mem_alloc_r32_3d none~mem_alloc~11->proc~mem_alloc_r64_1d none~mem_alloc~11->proc~mem_alloc_r64_2d none~mem_alloc~11->proc~mem_alloc_r64_3d none~mem_alloc~7 dtfft_plan_t%mem_alloc none~mem_alloc~7->proc~mem_alloc_c32_1d none~mem_alloc~7->proc~mem_alloc_c32_2d none~mem_alloc~7->proc~mem_alloc_c32_3d none~mem_alloc~7->proc~mem_alloc_c64_1d none~mem_alloc~7->proc~mem_alloc_c64_2d none~mem_alloc~7->proc~mem_alloc_c64_3d none~mem_alloc~7->proc~mem_alloc_r32_1d none~mem_alloc~7->proc~mem_alloc_r32_2d none~mem_alloc~7->proc~mem_alloc_r32_3d none~mem_alloc~7->proc~mem_alloc_r64_1d none~mem_alloc~7->proc~mem_alloc_r64_2d none~mem_alloc~7->proc~mem_alloc_r64_3d none~mem_alloc~8 dtfft_core_c2c%mem_alloc none~mem_alloc~8->proc~mem_alloc_c32_1d none~mem_alloc~8->proc~mem_alloc_c32_2d none~mem_alloc~8->proc~mem_alloc_c32_3d none~mem_alloc~8->proc~mem_alloc_c64_1d none~mem_alloc~8->proc~mem_alloc_c64_2d none~mem_alloc~8->proc~mem_alloc_c64_3d none~mem_alloc~8->proc~mem_alloc_r32_1d none~mem_alloc~8->proc~mem_alloc_r32_2d none~mem_alloc~8->proc~mem_alloc_r32_3d none~mem_alloc~8->proc~mem_alloc_r64_1d none~mem_alloc~8->proc~mem_alloc_r64_2d none~mem_alloc~8->proc~mem_alloc_r64_3d none~mem_alloc~9 dtfft_plan_r2r_t%mem_alloc none~mem_alloc~9->proc~mem_alloc_c32_1d none~mem_alloc~9->proc~mem_alloc_c32_2d none~mem_alloc~9->proc~mem_alloc_c32_3d none~mem_alloc~9->proc~mem_alloc_c64_1d none~mem_alloc~9->proc~mem_alloc_c64_2d none~mem_alloc~9->proc~mem_alloc_c64_3d none~mem_alloc~9->proc~mem_alloc_r32_1d none~mem_alloc~9->proc~mem_alloc_r32_2d none~mem_alloc~9->proc~mem_alloc_r32_3d none~mem_alloc~9->proc~mem_alloc_r64_1d none~mem_alloc~9->proc~mem_alloc_r64_2d none~mem_alloc~9->proc~mem_alloc_r64_3d none~mem_free~10 dtfft_plan_r2c_t%mem_free proc~mem_free_c32_1d dtfft_plan_t%mem_free_c32_1d none~mem_free~10->proc~mem_free_c32_1d proc~mem_free_c32_2d dtfft_plan_t%mem_free_c32_2d none~mem_free~10->proc~mem_free_c32_2d proc~mem_free_c32_3d dtfft_plan_t%mem_free_c32_3d none~mem_free~10->proc~mem_free_c32_3d proc~mem_free_c64_1d dtfft_plan_t%mem_free_c64_1d none~mem_free~10->proc~mem_free_c64_1d proc~mem_free_c64_2d dtfft_plan_t%mem_free_c64_2d none~mem_free~10->proc~mem_free_c64_2d proc~mem_free_c64_3d dtfft_plan_t%mem_free_c64_3d none~mem_free~10->proc~mem_free_c64_3d proc~mem_free_r32_1d dtfft_plan_t%mem_free_r32_1d none~mem_free~10->proc~mem_free_r32_1d proc~mem_free_r32_2d dtfft_plan_t%mem_free_r32_2d none~mem_free~10->proc~mem_free_r32_2d proc~mem_free_r32_3d dtfft_plan_t%mem_free_r32_3d none~mem_free~10->proc~mem_free_r32_3d proc~mem_free_r64_1d dtfft_plan_t%mem_free_r64_1d none~mem_free~10->proc~mem_free_r64_1d proc~mem_free_r64_2d dtfft_plan_t%mem_free_r64_2d none~mem_free~10->proc~mem_free_r64_2d proc~mem_free_r64_3d dtfft_plan_t%mem_free_r64_3d none~mem_free~10->proc~mem_free_r64_3d none~mem_free~11 dtfft_plan_c2c_t%mem_free none~mem_free~11->proc~mem_free_c32_1d none~mem_free~11->proc~mem_free_c32_2d none~mem_free~11->proc~mem_free_c32_3d none~mem_free~11->proc~mem_free_c64_1d none~mem_free~11->proc~mem_free_c64_2d none~mem_free~11->proc~mem_free_c64_3d none~mem_free~11->proc~mem_free_r32_1d none~mem_free~11->proc~mem_free_r32_2d none~mem_free~11->proc~mem_free_r32_3d none~mem_free~11->proc~mem_free_r64_1d none~mem_free~11->proc~mem_free_r64_2d none~mem_free~11->proc~mem_free_r64_3d none~mem_free~7 dtfft_plan_t%mem_free none~mem_free~7->proc~mem_free_c32_1d none~mem_free~7->proc~mem_free_c32_2d none~mem_free~7->proc~mem_free_c32_3d none~mem_free~7->proc~mem_free_c64_1d none~mem_free~7->proc~mem_free_c64_2d none~mem_free~7->proc~mem_free_c64_3d none~mem_free~7->proc~mem_free_r32_1d none~mem_free~7->proc~mem_free_r32_2d none~mem_free~7->proc~mem_free_r32_3d none~mem_free~7->proc~mem_free_r64_1d none~mem_free~7->proc~mem_free_r64_2d none~mem_free~7->proc~mem_free_r64_3d none~mem_free~8 dtfft_core_c2c%mem_free none~mem_free~8->proc~mem_free_c32_1d none~mem_free~8->proc~mem_free_c32_2d none~mem_free~8->proc~mem_free_c32_3d none~mem_free~8->proc~mem_free_c64_1d none~mem_free~8->proc~mem_free_c64_2d none~mem_free~8->proc~mem_free_c64_3d none~mem_free~8->proc~mem_free_r32_1d none~mem_free~8->proc~mem_free_r32_2d none~mem_free~8->proc~mem_free_r32_3d none~mem_free~8->proc~mem_free_r64_1d none~mem_free~8->proc~mem_free_r64_2d none~mem_free~8->proc~mem_free_r64_3d none~mem_free~9 dtfft_plan_r2r_t%mem_free none~mem_free~9->proc~mem_free_c32_1d none~mem_free~9->proc~mem_free_c32_2d none~mem_free~9->proc~mem_free_c32_3d none~mem_free~9->proc~mem_free_c64_1d none~mem_free~9->proc~mem_free_c64_2d none~mem_free~9->proc~mem_free_c64_3d none~mem_free~9->proc~mem_free_r32_1d none~mem_free~9->proc~mem_free_r32_2d none~mem_free~9->proc~mem_free_r32_3d none~mem_free~9->proc~mem_free_r64_1d none~mem_free~9->proc~mem_free_r64_2d none~mem_free~9->proc~mem_free_r64_3d proc~add nvrtc_cache%add proc~create~12 nvrtc_cache%create proc~add->proc~create~12 proc~get_true_transpose_type get_true_transpose_type proc~add->proc~get_true_transpose_type proc~add_line kernel_codegen%add_line proc~alloc_and_set_aux alloc_and_set_aux proc~alloc_mem alloc_mem proc~alloc_and_set_aux->proc~alloc_mem proc~dtfft_get_error_string dtfft_get_error_string proc~alloc_and_set_aux->proc~dtfft_get_error_string proc~get_aux_size~3 transpose_handle_cuda%get_aux_size proc~alloc_and_set_aux->proc~get_aux_size~3 mpi_abort mpi_abort proc~alloc_and_set_aux->mpi_abort mpi_allreduce mpi_allreduce proc~alloc_and_set_aux->mpi_allreduce proc~alloc_fft_plans dtfft_plan_t%alloc_fft_plans proc~alloc_mem->interface~cudamalloc proc~alloc_mem->interface~cudamemgetinfo proc~alloc_mem->interface~ncclcommregister proc~alloc_mem->interface~ncclmemalloc proc~alloc_mem->interface~nvshmem_malloc proc~alloc_mem->interface~to_str proc~cudageterrorstring cudaGetErrorString proc~alloc_mem->proc~cudageterrorstring proc~dtfft_get_backend_string dtfft_get_backend_string proc~alloc_mem->proc~dtfft_get_backend_string proc~get_conf_log_enabled get_conf_log_enabled proc~alloc_mem->proc~get_conf_log_enabled proc~is_backend_nccl is_backend_nccl proc~alloc_mem->proc~is_backend_nccl proc~is_backend_nvshmem is_backend_nvshmem proc~alloc_mem->proc~is_backend_nvshmem proc~ncclgeterrorstring ncclGetErrorString proc~alloc_mem->proc~ncclgeterrorstring proc~write_message write_message proc~alloc_mem->proc~write_message is_null_ptr is_null_ptr proc~alloc_mem->is_null_ptr proc~alloc_mem->mpi_abort proc~alloc_mem->mpi_allreduce temp temp proc~alloc_mem->temp proc~astring_f2c astring_f2c proc~string_f2c string_f2c proc~astring_f2c->proc~string_f2c proc~autotune_grid transpose_plan_host%autotune_grid proc~autotune_grid->interface~to_str proc~autotune_mpi_datatypes transpose_plan_host%autotune_mpi_datatypes proc~autotune_grid->proc~autotune_mpi_datatypes proc~create_pencils_and_comm create_pencils_and_comm proc~autotune_grid->proc~create_pencils_and_comm proc~destroy pencil%destroy proc~autotune_grid->proc~destroy proc~autotune_grid->proc~get_conf_log_enabled proc~get_local_sizes get_local_sizes proc~autotune_grid->proc~get_local_sizes proc~get_plan_execution_time transpose_plan_host%get_plan_execution_time proc~autotune_grid->proc~get_plan_execution_time proc~pop_nvtx_domain_range pop_nvtx_domain_range proc~autotune_grid->proc~pop_nvtx_domain_range proc~push_nvtx_domain_range push_nvtx_domain_range proc~autotune_grid->proc~push_nvtx_domain_range proc~autotune_grid->proc~write_message mpi_comm_free mpi_comm_free proc~autotune_grid->mpi_comm_free proc~autotune_grid_decomposition transpose_plan_host%autotune_grid_decomposition proc~autotune_grid_decomposition->interface~to_str proc~autotune_grid_decomposition->proc~autotune_grid proc~autotune_grid_decomposition->proc~get_conf_log_enabled proc~autotune_grid_decomposition->proc~write_message mpi_comm_size mpi_comm_size proc~autotune_grid_decomposition->mpi_comm_size proc~autotune_grid_decomposition~2 autotune_grid_decomposition proc~autotune_grid~2 autotune_grid proc~autotune_grid_decomposition~2->proc~autotune_grid~2 proc~autotune_grid_decomposition~2->mpi_comm_size proc~autotune_grid~2->interface~to_str proc~autotune_grid~2->proc~create_pencils_and_comm proc~autotune_grid~2->proc~destroy proc~autotune_grid~2->proc~get_conf_log_enabled proc~autotune_grid~2->proc~pop_nvtx_domain_range proc~autotune_grid~2->proc~push_nvtx_domain_range proc~run_autotune_backend run_autotune_backend proc~autotune_grid~2->proc~run_autotune_backend proc~autotune_grid~2->proc~write_message proc~autotune_grid~2->mpi_comm_free proc~autotune_transpose_id transpose_plan_host%autotune_transpose_id proc~autotune_mpi_datatypes->proc~autotune_transpose_id proc~autotune_transpose_id->proc~get_plan_execution_time proc~check_aux dtfft_plan_t%check_aux proc~check_aux->interface~to_str proc~check_aux->proc~dtfft_get_error_string proc~get_alloc_size dtfft_plan_t%get_alloc_size proc~check_aux->proc~get_alloc_size proc~check_aux->proc~get_conf_log_enabled proc~get_element_size dtfft_plan_t%get_element_size proc~check_aux->proc~get_element_size proc~mem_alloc_ptr dtfft_plan_t%mem_alloc_ptr proc~check_aux->proc~mem_alloc_ptr proc~check_aux->proc~write_message proc~check_aux->is_null_ptr proc~check_aux->mpi_abort proc~check_continuity check_continuity proc~check_create_args dtfft_plan_t%check_create_args proc~get_conf_platform get_conf_platform proc~check_create_args->proc~get_conf_platform proc~init_internal init_internal proc~check_create_args->proc~init_internal proc~is_cuda_executor is_cuda_executor proc~check_create_args->proc~is_cuda_executor proc~is_host_executor is_host_executor proc~check_create_args->proc~is_host_executor proc~is_valid_comm_type is_valid_comm_type proc~check_create_args->proc~is_valid_comm_type proc~is_valid_dimension is_valid_dimension proc~check_create_args->proc~is_valid_dimension proc~is_valid_effort is_valid_effort proc~check_create_args->proc~is_valid_effort proc~is_valid_executor is_valid_executor proc~check_create_args->proc~is_valid_executor proc~is_valid_precision is_valid_precision proc~check_create_args->proc~is_valid_precision proc~is_valid_r2r_kind is_valid_r2r_kind proc~check_create_args->proc~is_valid_r2r_kind mpi_topo_test mpi_topo_test proc~check_create_args->mpi_topo_test proc~check_device_pointers check_device_pointers proc~check_device_pointers->interface~is_device_ptr proc~check_device_pointers->proc~is_backend_nvshmem proc~is_nvshmem_ptr is_nvshmem_ptr proc~check_device_pointers->proc~is_nvshmem_ptr proc~check_device_pointers->is_null_ptr proc~check_if_even check_if_even mpi_allgather mpi_allgather proc~check_if_even->mpi_allgather proc~check_if_even->mpi_comm_size proc~check_overlap check_overlap proc~cleanup nvrtc_cache%cleanup proc~cleanup->interface~to_str proc~cleanup->proc~cudageterrorstring proc~cleanup->proc~get_conf_log_enabled proc~cleanup->proc~write_message proc~cleanup->is_null_ptr proc~cleanup->mpi_abort proc~compile_and_cache compile_and_cache proc~compile_and_cache->interface~to_str proc~compile_and_cache->proc~add proc~compile_and_cache->proc~astring_f2c proc~compile_and_cache->proc~cudageterrorstring proc~destroy_strings destroy_strings proc~compile_and_cache->proc~destroy_strings proc~get nvrtc_cache%get proc~compile_and_cache->proc~get proc~compile_and_cache->proc~get_conf_log_enabled proc~nvrtcgeterrorstring nvrtcGetErrorString proc~compile_and_cache->proc~nvrtcgeterrorstring proc~compile_and_cache->proc~pop_nvtx_domain_range proc~compile_and_cache->proc~push_nvtx_domain_range proc~string_c2f string_c2f proc~compile_and_cache->proc~string_c2f proc~to_cstr kernel_codegen%to_cstr proc~compile_and_cache->proc~to_cstr proc~compile_and_cache->proc~write_message proc~compile_and_cache->is_null_ptr proc~compile_and_cache->mpi_abort proc~count_bank_conflicts count_bank_conflicts proc~count_unique count_unique proc~create pencil%create proc~create->proc~check_if_even proc~create->proc~destroy proc~get_local_size get_local_size proc~create->proc~get_local_size proc~create_1d_comm create_1d_comm proc~get_varying_dim get_varying_dim proc~create_1d_comm->proc~get_varying_dim proc~sort_by_varying_dim sort_by_varying_dim proc~create_1d_comm->proc~sort_by_varying_dim mpi_comm_create mpi_comm_create proc~create_1d_comm->mpi_comm_create mpi_comm_group mpi_comm_group proc~create_1d_comm->mpi_comm_group proc~create_1d_comm->mpi_comm_size mpi_group_free mpi_group_free proc~create_1d_comm->mpi_group_free mpi_group_incl mpi_group_incl proc~create_1d_comm->mpi_group_incl proc~create_c2c_internal dtfft_plan_c2c_t%create_c2c_internal proc~create_c2c->proc~create_c2c_internal proc~create_c2c->proc~dtfft_get_error_string proc~create_c2c->proc~write_message proc~create_c2c_core dtfft_core_c2c%create_c2c_core proc~create_private~2 dtfft_plan_t%create_private proc~create_c2c_core->proc~create_private~2 create create proc~create_c2c_core->create fft_mapping fft_mapping proc~create_c2c_core->fft_mapping pencils pencils proc~create_c2c_core->pencils proc~create_c2c_internal->proc~create_c2c_core proc~create_c2c_internal->proc~pop_nvtx_domain_range proc~create_c2c_internal->proc~push_nvtx_domain_range proc~create_c2c_pencil->proc~create_c2c_internal proc~create_c2c_pencil->proc~dtfft_get_error_string proc~create_c2c_pencil->proc~write_message proc~create_cart_comm create_cart_comm mpi_cart_create mpi_cart_create proc~create_cart_comm->mpi_cart_create mpi_cart_sub mpi_cart_sub proc~create_cart_comm->mpi_cart_sub mpi_comm_dup mpi_comm_dup proc~create_cart_comm->mpi_comm_dup proc~create_cuda transpose_plan_cuda%create_cuda proc~create_cuda->interface~to_str proc~create_cuda->proc~alloc_and_set_aux proc~create_cuda->proc~autotune_grid_decomposition~2 proc~create_helper backend_helper%create_helper proc~create_cuda->proc~create_helper proc~create_cuda->proc~create_pencils_and_comm proc~create_cuda->proc~dtfft_get_backend_string proc~get_conf_backend get_conf_backend proc~create_cuda->proc~get_conf_backend proc~create_cuda->proc~get_conf_log_enabled proc~get_conf_mpi_enabled get_conf_mpi_enabled proc~create_cuda->proc~get_conf_mpi_enabled proc~get_conf_nccl_enabled get_conf_nccl_enabled proc~create_cuda->proc~get_conf_nccl_enabled proc~get_conf_nvshmem_enabled get_conf_nvshmem_enabled proc~create_cuda->proc~get_conf_nvshmem_enabled proc~get_conf_stream get_conf_stream proc~create_cuda->proc~get_conf_stream proc~create_cuda->proc~is_backend_nccl proc~load_cuda load_cuda proc~create_cuda->proc~load_cuda proc~load_nvrtc load_nvrtc proc~create_cuda->proc~load_nvrtc proc~create_cuda->proc~run_autotune_backend proc~create_cuda->proc~write_message proc~create_cuda->mpi_comm_size mpi_wtime mpi_wtime proc~create_cuda->mpi_wtime proc~create_data_handle data_handle%create_data_handle proc~create_data_handle->mpi_allgather proc~create_device_pointer create_device_pointer proc~create_device_pointer->interface~cudamalloc proc~create_device_pointer->interface~cudamemcpy proc~create_device_pointer->interface~to_str proc~create_device_pointer->proc~cudageterrorstring proc~create_device_pointer->mpi_abort proc~create_handle handle_t%create_handle proc~destroy_handle handle_t%destroy_handle proc~create_handle->proc~destroy_handle proc~create_helper->interface~get_env proc~create_helper->interface~ncclcomminitrank proc~create_helper->interface~ncclgetuniqueid proc~create_helper->interface~to_str proc~destroy_helper backend_helper%destroy_helper proc~create_helper->proc~destroy_helper proc~create_helper->proc~ncclgeterrorstring proc~create_helper->mpi_abort proc~create_helper->mpi_allgather mpi_bcast mpi_bcast proc~create_helper->mpi_bcast mpi_comm_rank mpi_comm_rank proc~create_helper->mpi_comm_rank proc~create_helper->mpi_comm_size proc~create_helper~2 mpi_backend_helper%create_helper proc~create_mpi backend_mpi%create_mpi proc~create_mpi->proc~create_helper~2 proc~is_backend_mpi is_backend_mpi proc~create_mpi->proc~is_backend_mpi proc~create_nccl backend_nccl%create_nccl proc~create_nccl->proc~is_backend_nccl proc~create_nvtx_domain create_nvtx_domain proc~create_nvtx_domain->interface~nvtxdomaincreate_c proc~create_nvtx_domain->proc~astring_f2c proc~create_pencil_init pencil_init%create_pencil_init proc~create_pencil_init->proc~check_continuity proc~create_pencil_init->proc~check_overlap proc~create_pencil_init->proc~create_1d_comm proc~create_pencil_init->proc~dtfft_get_error_string proc~create_pencil_init->proc~write_message proc~create_pencil_init->mpi_allgather proc~create_pencil_init->mpi_allreduce proc~create_pencil_init->mpi_comm_rank proc~create_pencil_init->mpi_comm_size proc~destroy_pencil_t_private dtfft_pencil_t%destroy_pencil_t_private proc~create_pencil_t->proc~destroy_pencil_t_private proc~create_pencils_and_comm->proc~create proc~create_pencils_and_comm->proc~create_cart_comm lcounts lcounts proc~create_pencils_and_comm->lcounts lstarts lstarts proc~create_pencils_and_comm->lstarts proc~create_private transpose_plan_host%create_private proc~create_private->interface~to_str proc~create_private->proc~autotune_grid proc~create_private->proc~autotune_grid_decomposition proc~create_private->proc~create_pencils_and_comm proc~create~7 transpose_handle_host%create proc~create_private->proc~create~7 proc~create_private->proc~get_conf_log_enabled proc~get_datatype_from_env get_datatype_from_env proc~create_private->proc~get_datatype_from_env proc~create_private->proc~write_message back_ids back_ids proc~create_private->back_ids dummy_decomp dummy_decomp proc~create_private->dummy_decomp dummy_timer dummy_timer proc~create_private->dummy_timer forw_ids forw_ids proc~create_private->forw_ids proc~create_private->mpi_comm_size proc~create_private~2->interface~cudagetdevice proc~create_private~2->interface~cudagetdevicecount proc~create_private~2->interface~to_str proc~create_private~2->proc~alloc_fft_plans proc~create_private~2->proc~check_create_args proc~create_private~2->proc~count_unique proc~create_private~2->proc~cudageterrorstring proc~create_private~2->proc~destroy proc~create_private~2->proc~get_conf_backend proc~create_private~2->proc~get_conf_stream counts counts proc~create_private~2->counts proc~create_private~2->create fixed_dims fixed_dims proc~create_private~2->fixed_dims local_devices local_devices proc~create_private~2->local_devices proc~create_private~2->mpi_abort proc~create_private~2->mpi_allgather proc~create_private~2->mpi_comm_free proc~create_private~2->mpi_comm_rank proc~create_private~2->mpi_comm_size mpi_comm_split_type mpi_comm_split_type proc~create_private~2->mpi_comm_split_type proc~create_r2c_internal dtfft_plan_r2c_t%create_r2c_internal proc~create_r2c->proc~create_r2c_internal proc~create_r2c->proc~dtfft_get_error_string proc~create_r2c->proc~write_message proc~create_r2c_internal->proc~create proc~create_r2c_internal->proc~create_c2c_core proc~create_r2c_internal->proc~pop_nvtx_domain_range proc~create_r2c_internal->proc~push_nvtx_domain_range proc~create_r2c_internal->pencils proc~create_r2c_pencil->proc~create_r2c_internal proc~create_r2c_pencil->proc~dtfft_get_error_string proc~create_r2c_pencil->proc~write_message proc~create_r2r_internal dtfft_plan_r2r_t%create_r2r_internal proc~create_r2r->proc~create_r2r_internal proc~create_r2r->proc~dtfft_get_error_string proc~create_r2r->proc~write_message proc~create_r2r_internal->proc~create_private~2 proc~create_r2r_internal->proc~pop_nvtx_domain_range proc~create_r2r_internal->proc~push_nvtx_domain_range proc~create_r2r_internal->create proc~create_r2r_internal->fft_mapping proc~create_r2r_internal->pencils proc~create_r2r_pencil->proc~create_r2r_internal proc~create_r2r_pencil->proc~dtfft_get_error_string proc~create_r2r_pencil->proc~write_message proc~create_transpose_2d transpose_handle_host%create_transpose_2d proc~free_datatypes free_datatypes proc~create_transpose_2d->proc~free_datatypes mpi_type_commit mpi_type_commit proc~create_transpose_2d->mpi_type_commit mpi_type_contiguous mpi_type_contiguous proc~create_transpose_2d->mpi_type_contiguous mpi_type_create_resized mpi_type_create_resized proc~create_transpose_2d->mpi_type_create_resized mpi_type_vector mpi_type_vector proc~create_transpose_2d->mpi_type_vector proc~create_transpose_xy transpose_handle_host%create_transpose_XY proc~create_transpose_xy->proc~free_datatypes proc~create_transpose_xy->mpi_type_commit proc~create_transpose_xy->mpi_type_contiguous mpi_type_create_hvector mpi_type_create_hvector proc~create_transpose_xy->mpi_type_create_hvector proc~create_transpose_xy->mpi_type_create_resized proc~create_transpose_xy->mpi_type_vector proc~create_transpose_xz transpose_handle_host%create_transpose_XZ proc~create_transpose_xz->proc~free_datatypes proc~create_transpose_xz->mpi_type_commit proc~create_transpose_xz->mpi_type_contiguous proc~create_transpose_xz->mpi_type_create_hvector proc~create_transpose_xz->mpi_type_create_resized proc~create_transpose_xz->mpi_type_vector proc~create_transpose_yz transpose_handle_host%create_transpose_YZ proc~create_transpose_yz->proc~free_datatypes proc~create_transpose_yz->mpi_type_commit proc~create_transpose_yz->mpi_type_contiguous proc~create_transpose_yz->mpi_type_create_hvector proc~create_transpose_yz->mpi_type_create_resized proc~create_transpose_yz->mpi_type_vector proc~create_transpose_zx transpose_handle_host%create_transpose_ZX proc~create_transpose_zx->proc~free_datatypes proc~create_transpose_zx->mpi_type_commit proc~create_transpose_zx->mpi_type_contiguous proc~create_transpose_zx->mpi_type_create_hvector proc~create_transpose_zx->mpi_type_create_resized proc~create_transpose_zx->mpi_type_vector proc~create~10 abstract_backend%create proc~create~10->interface~cudaeventcreatewithflags proc~create~10->interface~cudastreamcreate proc~create~10->interface~to_str proc~create~10->proc~cudageterrorstring proc~create~10->proc~is_backend_mpi proc~is_backend_pipelined is_backend_pipelined proc~create~10->proc~is_backend_pipelined create_private create_private proc~create~10->create_private proc~create~10->mpi_abort proc~create~10->mpi_comm_rank proc~create~10->mpi_comm_size proc~create~11 cufft_executor%create proc~create~11->interface~cufftplanmany proc~create~11->interface~cufftsetstream proc~create~11->interface~to_str proc~cufftgeterrorstring cufftGetErrorString proc~create~11->proc~cufftgeterrorstring proc~create~11->proc~get_conf_stream proc~create~11->mpi_abort proc~create~13 backend_cufftmp%create proc~create~13->interface~comm_f2c proc~create~13->interface~cufftmpattachreshapecomm proc~create~13->interface~cufftmpcreatereshape proc~create~13->interface~cufftmpgetreshapesize proc~create~13->interface~cufftmpmakereshape proc~create~13->interface~to_str proc~create~13->proc~cufftgeterrorstring proc~create~13->mpi_abort proc~create~2 abstract_executor%create proc~create~2->proc~pop_nvtx_domain_range proc~create~2->proc~push_nvtx_domain_range proc~create~2->create_private proc~create~2->is_null_ptr proc~create~3 fftw_executor%create proc~get_inverse_kind get_inverse_kind proc~create~3->proc~get_inverse_kind constructor constructor proc~create~3->constructor constructor_inverse constructor_inverse proc~create~3->constructor_inverse inverse_kinds inverse_kinds proc~create~3->inverse_kinds knds knds proc~create~3->knds proc~create~4 nvrtc_kernel%create proc~create~4->interface~cudagetdevice proc~create~4->interface~get_device_props proc~create~4->interface~to_str proc~create~4->proc~create_device_pointer proc~create~4->proc~cudageterrorstring proc~destroy~4 nvrtc_kernel%destroy proc~create~4->proc~destroy~4 proc~get_kernel get_kernel proc~create~4->proc~get_kernel proc~get_kernel_args get_kernel_args proc~create~4->proc~get_kernel_args proc~create~4->mpi_abort proc~create~5 vkfft_executor%create proc~create~5->proc~get_conf_platform proc~create~5->proc~get_conf_stream proc~load_vkfft load_vkfft proc~create~5->proc~load_vkfft proc~create~6 abstract_transpose_plan%create proc~create~6->interface~to_str proc~create~6->proc~get_conf_log_enabled proc~create~6->proc~get_conf_platform proc~get_conf_z_slab_enabled get_conf_z_slab_enabled proc~create~6->proc~get_conf_z_slab_enabled proc~create~6->proc~get_local_sizes proc~create~6->proc~write_message proc~create~6->create_private mpi_cart_get mpi_cart_get proc~create~6->mpi_cart_get mpi_cartdim_get mpi_cartdim_get proc~create~6->mpi_cartdim_get proc~create~6->mpi_comm_size mpi_dims_create mpi_dims_create proc~create~6->mpi_dims_create proc~create~6->mpi_topo_test temp_coords temp_coords proc~create~6->temp_coords temp_dims temp_dims proc~create~6->temp_dims temp_periods temp_periods proc~create~6->temp_periods proc~create~7->proc~create_handle proc~create~7->proc~create_transpose_2d proc~create~7->proc~create_transpose_xy proc~create~7->proc~create_transpose_xz proc~create~7->proc~create_transpose_yz proc~create~7->proc~create_transpose_zx proc~get_transpose_type get_transpose_type proc~create~7->proc~get_transpose_type proc~create~7->mpi_allgather proc~create~7->mpi_comm_size proc~create~8 mkl_executor%create proc~make_plan make_plan proc~create~8->proc~make_plan proc~create~9 transpose_handle_cuda%create proc~create~9->proc~create~4 proc~destroy_data_handle data_handle%destroy_data_handle proc~create~9->proc~destroy_data_handle proc~create~9->proc~get_transpose_type proc~is_backend_cufftmp is_backend_cufftmp proc~create~9->proc~is_backend_cufftmp proc~create~9->proc~is_backend_mpi proc~create~9->proc~is_backend_nccl proc~create~9->proc~is_backend_pipelined proc~set_unpack_kernel abstract_backend%set_unpack_kernel proc~create~9->proc~set_unpack_kernel proc~create~9->mpi_comm_rank proc~create~9->mpi_comm_size mpi_irecv mpi_irecv proc~create~9->mpi_irecv mpi_isend mpi_isend proc~create~9->mpi_isend mpi_wait mpi_wait proc~create~9->mpi_wait proc~cudageterrorstring->interface~cudageterrorstring_c proc~cudageterrorstring->proc~string_c2f proc~culaunchkernel cuLaunchKernel proc~destoy_helper mpi_backend_helper%destoy_helper mpi_request_free mpi_request_free proc~destoy_helper->mpi_request_free proc~destroy_code kernel_codegen%destroy_code proc~destroy_cuda transpose_plan_cuda%destroy_cuda proc~destroy_cuda->proc~cleanup proc~destroy~9 transpose_handle_cuda%destroy proc~destroy_cuda->proc~destroy~9 proc~mem_free~3 abstract_transpose_plan%mem_free proc~destroy_cuda->proc~mem_free~3 mpi_type_free mpi_type_free proc~destroy_handle->mpi_type_free proc~destroy_helper->interface~ncclcommdestroy proc~destroy_helper->interface~to_str proc~destroy_helper->proc~ncclgeterrorstring proc~destroy_helper->proc~write_message proc~destroy_helper->mpi_abort proc~destroy_mpi backend_mpi%destroy_mpi proc~destroy_mpi->proc~destoy_helper proc~destroy_nccl backend_nccl%destroy_nccl proc~destroy_pencil_init pencil_init%destroy_pencil_init proc~destroy_pencil_init->mpi_comm_free proc~destroy_pencil_t destroy_pencil_t proc~destroy_pencil_t->proc~destroy_pencil_t_private proc~destroy_stream destroy_stream proc~destroy_stream->interface~cudastreamdestroy proc~destroy_stream->interface~to_str proc~destroy_stream->proc~cudageterrorstring proc~destroy_stream->mpi_abort proc~destroy_string string%destroy_string proc~destroy_strings->proc~destroy_string proc~destroy~10 abstract_backend%destroy proc~destroy~10->interface~cudaeventdestroy proc~destroy~10->interface~cudastreamdestroy proc~destroy~10->interface~to_str proc~destroy~10->proc~cudageterrorstring destroy_private destroy_private proc~destroy~10->destroy_private proc~destroy~10->mpi_abort proc~destroy~11 cufft_executor%destroy proc~destroy~11->interface~cufftdestroy proc~destroy~11->interface~to_str proc~destroy~11->proc~cufftgeterrorstring proc~destroy~11->mpi_abort proc~destroy~12 dtfft_plan_t%destroy proc~destroy~12->proc~destroy_stream proc~destroy~12->proc~dtfft_get_error_string proc~mem_free_ptr dtfft_plan_t%mem_free_ptr proc~destroy~12->proc~mem_free_ptr proc~destroy~12->proc~pop_nvtx_domain_range proc~destroy~12->proc~push_nvtx_domain_range proc~destroy~12->proc~write_message destroy destroy proc~destroy~12->destroy proc~destroy~12->mpi_comm_free mpi_finalized mpi_finalized proc~destroy~12->mpi_finalized proc~destroy~13 backend_cufftmp%destroy proc~destroy~13->interface~cufftmpdestroyreshape proc~destroy~13->interface~to_str proc~destroy~13->proc~cufftgeterrorstring proc~destroy~13->mpi_abort proc~destroy~2 abstract_executor%destroy proc~destroy~2->destroy_private proc~destroy~3 fftw_executor%destroy proc~destroy~4->interface~cudafree proc~destroy~4->interface~to_str proc~destroy~4->proc~cudageterrorstring proc~remove nvrtc_cache%remove proc~destroy~4->proc~remove proc~destroy~4->mpi_abort proc~destroy~5 vkfft_executor%destroy proc~destroy~6 transpose_handle_host%destroy proc~destroy~6->proc~destroy_handle proc~destroy~6->mpi_request_free proc~destroy~7 transpose_plan_host%destroy proc~destroy~7->proc~destroy~6 proc~destroy~8 mkl_executor%destroy proc~destroy~8->interface~mkl_dfti_free_desc proc~destroy~8->interface~to_str proc~dftierrormessage DftiErrorMessage proc~destroy~8->proc~dftierrormessage proc~destroy~8->mpi_abort proc~destroy~9->proc~destroy~4 proc~dftierrormessage->interface~dftierrormessage_c proc~dftierrormessage->proc~string_c2f proc~dl_error dl_error proc~dl_error->interface~dlerror proc~dl_error->proc~string_c2f proc~dl_error->proc~write_message proc~dtfft_create_config dtfft_create_config proc~dtfft_create_plan_c2c_c dtfft_create_plan_c2c_c proc~get_comm get_comm proc~dtfft_create_plan_c2c_c->proc~get_comm proc~dtfft_create_plan_c2c_c->create proc~dtfft_create_plan_c2c_pencil_c dtfft_create_plan_c2c_pencil_c proc~dtfft_create_plan_c2c_pencil_c->proc~get_comm proc~pencil_c2f pencil_c2f proc~dtfft_create_plan_c2c_pencil_c->proc~pencil_c2f proc~dtfft_create_plan_c2c_pencil_c->create proc~dtfft_create_plan_r2r_c dtfft_create_plan_r2r_c proc~dtfft_create_plan_r2r_c->proc~get_comm proc~dtfft_create_plan_r2r_c->create proc~dtfft_create_plan_r2r_pencil_c dtfft_create_plan_r2r_pencil_c proc~dtfft_create_plan_r2r_pencil_c->proc~get_comm proc~dtfft_create_plan_r2r_pencil_c->proc~pencil_c2f proc~dtfft_create_plan_r2r_pencil_c->create proc~dtfft_destroy_c dtfft_destroy_c proc~dtfft_destroy_c->proc~destroy~12 proc~dtfft_destroy_c->is_null_ptr proc~dtfft_execute_c dtfft_execute_c proc~execute_ptr dtfft_plan_t%execute_ptr proc~dtfft_execute_c->proc~execute_ptr proc~dtfft_execute_c->is_null_ptr proc~dtfft_get_alloc_bytes_c dtfft_get_alloc_bytes_c proc~get_alloc_bytes dtfft_plan_t%get_alloc_bytes proc~dtfft_get_alloc_bytes_c->proc~get_alloc_bytes proc~dtfft_get_alloc_bytes_c->is_null_ptr proc~dtfft_get_alloc_size_c dtfft_get_alloc_size_c proc~dtfft_get_alloc_size_c->proc~get_alloc_size proc~dtfft_get_alloc_size_c->is_null_ptr proc~dtfft_get_backend_c dtfft_get_backend_c proc~get_backend~2 dtfft_plan_t%get_backend proc~dtfft_get_backend_c->proc~get_backend~2 proc~dtfft_get_backend_c->is_null_ptr proc~dtfft_get_backend_string_c dtfft_get_backend_string_c proc~dtfft_get_backend_string_c->proc~dtfft_get_backend_string proc~dtfft_get_backend_string_c->proc~string_f2c proc~dtfft_get_cuda_stream dtfft_get_cuda_stream proc~dtfft_get_dims_c dtfft_get_dims_c proc~get_dims dtfft_plan_t%get_dims proc~dtfft_get_dims_c->proc~get_dims proc~dtfft_get_dims_c->is_null_ptr proc~dtfft_get_element_size_c dtfft_get_element_size_c proc~dtfft_get_element_size_c->proc~get_element_size proc~dtfft_get_element_size_c->is_null_ptr proc~dtfft_get_error_string_c dtfft_get_error_string_c proc~dtfft_get_error_string_c->proc~dtfft_get_error_string proc~dtfft_get_error_string_c->proc~string_f2c proc~dtfft_get_executor_c dtfft_get_executor_c proc~get_executor dtfft_plan_t%get_executor proc~dtfft_get_executor_c->proc~get_executor proc~dtfft_get_executor_c->is_null_ptr proc~dtfft_get_executor_string dtfft_get_executor_string proc~dtfft_get_executor_string_c dtfft_get_executor_string_c proc~dtfft_get_executor_string_c->proc~dtfft_get_executor_string proc~dtfft_get_executor_string_c->proc~string_f2c proc~dtfft_get_local_sizes_c dtfft_get_local_sizes_c proc~get_local_sizes~2 dtfft_plan_t%get_local_sizes proc~dtfft_get_local_sizes_c->proc~get_local_sizes~2 proc~dtfft_get_local_sizes_c->is_null_ptr proc~dtfft_get_pencil_c dtfft_get_pencil_c proc~get_pencil dtfft_plan_t%get_pencil proc~dtfft_get_pencil_c->proc~get_pencil proc~pencil_f2c pencil_f2c proc~dtfft_get_pencil_c->proc~pencil_f2c proc~dtfft_get_pencil_c->is_null_ptr proc~dtfft_get_platform_c dtfft_get_platform_c proc~get_platform dtfft_plan_t%get_platform proc~dtfft_get_platform_c->proc~get_platform proc~dtfft_get_platform_c->is_null_ptr proc~dtfft_get_precision_c dtfft_get_precision_c proc~get_precision dtfft_plan_t%get_precision proc~dtfft_get_precision_c->proc~get_precision proc~dtfft_get_precision_c->is_null_ptr proc~dtfft_get_precision_string dtfft_get_precision_string proc~dtfft_get_precision_string_c dtfft_get_precision_string_c proc~dtfft_get_precision_string_c->proc~dtfft_get_precision_string proc~dtfft_get_precision_string_c->proc~string_f2c proc~dtfft_get_stream_c dtfft_get_stream_c proc~dtfft_get_stream_c->none~get_stream proc~dtfft_get_stream_c->is_null_ptr proc~dtfft_get_z_slab_enabled_c dtfft_get_z_slab_enabled_c proc~get_z_slab_enabled dtfft_plan_t%get_z_slab_enabled proc~dtfft_get_z_slab_enabled_c->proc~get_z_slab_enabled proc~dtfft_get_z_slab_enabled_c->is_null_ptr proc~dtfft_mem_alloc_c dtfft_mem_alloc_c proc~dtfft_mem_alloc_c->proc~mem_alloc_ptr proc~dtfft_mem_alloc_c->is_null_ptr proc~dtfft_mem_free_c dtfft_mem_free_c proc~dtfft_mem_free_c->proc~mem_free_ptr proc~dtfft_mem_free_c->is_null_ptr proc~dtfft_report_c dtfft_report_c proc~report dtfft_plan_t%report proc~dtfft_report_c->proc~report proc~dtfft_report_c->is_null_ptr proc~dtfft_set_config dtfft_set_config proc~dtfft_set_config->interface~cudastreamquery proc~is_valid_gpu_backend is_valid_gpu_backend proc~dtfft_set_config->proc~is_valid_gpu_backend proc~is_valid_platform is_valid_platform proc~dtfft_set_config->proc~is_valid_platform proc~dtfft_set_config->is_null_ptr proc~dtfft_set_config_c dtfft_set_config_c proc~dtfft_set_config_c->proc~dtfft_set_config proc~dtfft_transpose_c dtfft_transpose_c proc~transpose_ptr dtfft_plan_t%transpose_ptr proc~dtfft_transpose_c->proc~transpose_ptr proc~dtfft_transpose_c->is_null_ptr proc~dynamic_load dynamic_load proc~dynamic_load->interface~is_null_ptr proc~load_library load_library proc~dynamic_load->proc~load_library proc~load_symbol load_symbol proc~dynamic_load->proc~load_symbol proc~unload_library unload_library proc~dynamic_load->proc~unload_library proc~estimate_bank_conflict_ratio estimate_bank_conflict_ratio proc~estimate_bank_conflict_ratio->proc~count_bank_conflicts proc~estimate_coalescing estimate_coalescing proc~estimate_memory_pressure estimate_memory_pressure proc~estimate_occupancy estimate_occupancy proc~estimate_optimal_padding estimate_optimal_padding proc~estimate_optimal_padding->proc~count_bank_conflicts proc~evaluate_analytical_performance evaluate_analytical_performance proc~evaluate_analytical_performance->proc~count_bank_conflicts proc~evaluate_analytical_performance->proc~estimate_bank_conflict_ratio proc~evaluate_analytical_performance->proc~estimate_coalescing proc~evaluate_analytical_performance->proc~estimate_occupancy proc~execute abstract_executor%execute proc~execute->proc~pop_nvtx_domain_range proc~execute->proc~push_nvtx_domain_range execute_private execute_private proc~execute->execute_private proc~execute_cuda transpose_plan_cuda%execute_cuda proc~execute~8 transpose_handle_cuda%execute proc~execute_cuda->proc~execute~8 proc~execute_mpi backend_mpi%execute_mpi proc~execute_mpi->interface~cudastreamsynchronize proc~execute_mpi->interface~to_str proc~execute_mpi->proc~cudageterrorstring proc~execute~3 nvrtc_kernel%execute proc~execute_mpi->proc~execute~3 proc~run_mpi_a2a run_mpi_a2a proc~execute_mpi->proc~run_mpi_a2a proc~run_mpi_p2p run_mpi_p2p proc~execute_mpi->proc~run_mpi_p2p proc~execute_mpi->mpi_abort proc~execute_mpi->mpi_wait mpi_waitall mpi_waitall proc~execute_mpi->mpi_waitall proc~execute_nccl backend_nccl%execute_nccl proc~execute_nccl->interface~ncclgroupend proc~execute_nccl->interface~ncclgroupstart proc~execute_nccl->interface~ncclrecv proc~execute_nccl->interface~ncclsend proc~execute_nccl->interface~to_str proc~execute_nccl->proc~execute~3 proc~execute_nccl->proc~ncclgeterrorstring proc~execute_nccl->mpi_abort proc~execute_private transpose_plan_host%execute_private proc~execute~6 transpose_handle_host%execute proc~execute_private->proc~execute~6 proc~execute_private~2 dtfft_plan_t%execute_private proc~execute~5 abstract_transpose_plan%execute proc~execute_private~2->proc~execute~5 proc~execute_ptr->proc~check_aux proc~execute_ptr->proc~check_device_pointers proc~execute_ptr->proc~dtfft_get_error_string proc~execute_ptr->proc~execute_private~2 proc~get_backend abstract_transpose_plan%get_backend proc~execute_ptr->proc~get_backend proc~is_same_ptr is_same_ptr proc~execute_ptr->proc~is_same_ptr proc~is_valid_execute_type is_valid_execute_type proc~execute_ptr->proc~is_valid_execute_type proc~execute_ptr->proc~pop_nvtx_domain_range proc~execute_ptr->proc~push_nvtx_domain_range proc~execute_ptr->proc~write_message proc~execute_ptr->is_null_ptr proc~execute~10 cufft_executor%execute proc~execute~10->interface~cufftxtexec proc~execute~10->interface~to_str proc~execute~10->proc~cufftgeterrorstring proc~execute~10->mpi_abort proc~execute~11 dtfft_plan_t%execute proc~execute~11->proc~execute_ptr proc~execute~12 backend_cufftmp%execute proc~execute~12->interface~cudastreamsynchronize proc~execute~12->interface~cufftmpexecreshapeasync proc~execute~12->interface~to_str proc~execute~12->proc~cudageterrorstring proc~execute~12->proc~cufftgeterrorstring proc~execute~12->mpi_abort mpi_barrier mpi_barrier proc~execute~12->mpi_barrier proc~execute~2 fftw_executor%execute proc~execute~3->interface~cudamemcpyasync proc~execute~3->interface~cudastreamsynchronize proc~execute~3->interface~to_str proc~execute~3->proc~cudageterrorstring proc~execute~3->proc~culaunchkernel proc~get_contiguous_execution_blocks get_contiguous_execution_blocks proc~execute~3->proc~get_contiguous_execution_blocks proc~execute~3->mpi_abort proc~execute~4 vkfft_executor%execute proc~execute~5->proc~pop_nvtx_domain_range proc~execute~5->proc~push_nvtx_domain_range proc~execute~5->execute_private mpi_alltoall_init mpi_alltoall_init proc~execute~6->mpi_alltoall_init mpi_alltoallw_init mpi_alltoallw_init proc~execute~6->mpi_alltoallw_init mpi_start mpi_start proc~execute~6->mpi_start proc~execute~6->mpi_wait proc~execute~7 mkl_executor%execute proc~execute~7->interface~mkl_dfti_commit_desc proc~execute~7->interface~mkl_dfti_execute proc~execute~7->interface~mkl_dfti_set_value proc~execute~7->interface~to_str proc~execute~7->proc~dftierrormessage proc~execute~7->mpi_abort proc~execute~8->proc~execute~3 proc~execute~9 abstract_backend%execute proc~execute~9->interface~cudaeventrecord proc~execute~9->interface~cudamemcpyasync proc~execute~9->interface~cudastreamsynchronize proc~execute~9->interface~cudastreamwaitevent proc~execute~9->interface~to_str proc~execute~9->proc~cudageterrorstring proc~execute~9->proc~execute~3 proc~execute~9->execute_private proc~execute~9->mpi_abort proc~find_valid_combination find_valid_combination proc~free_datatypes->mpi_type_free proc~free_mem free_mem proc~free_mem->interface~cudafree proc~free_mem->interface~ncclcommderegister proc~free_mem->interface~ncclmemfree proc~free_mem->interface~to_str proc~free_mem->proc~get_conf_log_enabled proc~free_mem->proc~is_backend_nccl proc~free_mem->proc~is_backend_nvshmem proc~free_mem->proc~is_same_ptr proc~free_mem->proc~ncclgeterrorstring proc~free_mem->proc~write_message proc~free_mem->mpi_abort nvshmem_free nvshmem_free proc~free_mem->nvshmem_free proc~generate_candidates generate_candidates proc~generate_candidates->proc~estimate_memory_pressure proc~generate_candidates->proc~estimate_optimal_padding proc~generate_candidates->proc~find_valid_combination proc~get->proc~get_true_transpose_type proc~is_unpack_kernel is_unpack_kernel proc~get->proc~is_unpack_kernel proc~get_alloc_bytes->proc~dtfft_get_error_string proc~get_alloc_bytes->proc~get_alloc_size proc~get_alloc_bytes->proc~get_element_size proc~get_alloc_bytes->proc~write_message proc~get_alloc_size->proc~get_local_sizes~2 proc~get_ampere_architecture get_ampere_architecture proc~get_aux_size abstract_transpose_plan%get_aux_size proc~get_aux_size~2 transpose_plan_cuda%get_aux_size proc~get_aux_size~2->proc~get_aux_size~3 proc~get_aux_size~4 abstract_backend%get_aux_size proc~get_aux_size~3->proc~get_aux_size~4 proc~get_backend~2->proc~get_backend proc~get_code_init get_code_init proc~get_code_init->proc~add_line proc~get_conf_configs_to_test get_conf_configs_to_test proc~get_conf_configs_to_test->interface~get_conf_internal proc~get_conf_forced_kernel_optimization get_conf_forced_kernel_optimization proc~get_conf_forced_kernel_optimization->interface~get_conf_internal proc~get_conf_kernel_optimization_enabled get_conf_kernel_optimization_enabled proc~get_conf_kernel_optimization_enabled->interface~get_conf_internal proc~get_conf_log_enabled->interface~get_conf_internal proc~get_conf_measure_iters get_conf_measure_iters proc~get_conf_measure_iters->interface~get_conf_internal proc~get_conf_measure_warmup_iters get_conf_measure_warmup_iters proc~get_conf_measure_warmup_iters->interface~get_conf_internal proc~get_conf_mpi_enabled->interface~get_conf_internal proc~get_conf_nccl_enabled->interface~get_conf_internal proc~get_conf_nvshmem_enabled->interface~get_conf_internal proc~get_conf_pipelined_enabled get_conf_pipelined_enabled proc~get_conf_pipelined_enabled->interface~get_conf_internal proc~get_conf_stream->interface~cudastreamcreate proc~get_conf_stream->interface~to_str proc~get_conf_stream->proc~cudageterrorstring proc~get_conf_stream->mpi_abort proc~get_conf_z_slab_enabled->interface~get_conf_internal proc~get_datatype_from_env->interface~get_env proc~get_dims->proc~dtfft_get_error_string proc~get_dims->proc~write_message proc~get_element_size->proc~dtfft_get_error_string proc~get_element_size->proc~write_message proc~get_env_base->proc~destroy_string proc~get_env_int32->interface~get_env proc~get_env_int32->proc~write_message proc~get_env_int8->interface~get_env proc~get_env_logical->interface~get_env proc~get_env_string->interface~get_env proc~get_env_string->proc~write_message proc~get_executor->proc~dtfft_get_error_string proc~get_executor->proc~write_message proc~get_kernel->interface~cudaeventcreate proc~get_kernel->interface~cudaeventdestroy proc~get_kernel->interface~cudaeventelapsedtime proc~get_kernel->interface~cudaeventrecord proc~get_kernel->interface~cudaeventsynchronize proc~get_kernel->interface~cudafree proc~get_kernel->interface~cudamalloc proc~get_kernel->interface~cudastreamsynchronize proc~get_kernel->interface~to_str proc~get_kernel->proc~compile_and_cache proc~get_kernel->proc~cudageterrorstring proc~get_kernel->proc~culaunchkernel proc~get_kernel->proc~destroy_code proc~get_kernel->proc~evaluate_analytical_performance proc~get_kernel->proc~generate_candidates proc~get_kernel->proc~get_conf_configs_to_test proc~get_kernel->proc~get_conf_forced_kernel_optimization proc~get_kernel->proc~get_conf_kernel_optimization_enabled proc~get_kernel->proc~get_conf_log_enabled proc~get_kernel->proc~get_conf_measure_iters proc~get_kernel->proc~get_conf_measure_warmup_iters proc~get_kernel->proc~get_conf_stream proc~get_kernel->proc~get_contiguous_execution_blocks proc~get_kernel->proc~get_kernel_args proc~get_transpose_kernel get_transpose_kernel proc~get_kernel->proc~get_transpose_kernel proc~get_unpack_kernel_code get_unpack_kernel_code proc~get_kernel->proc~get_unpack_kernel_code proc~get_unpack_pipelined_kernel_code get_unpack_pipelined_kernel_code proc~get_kernel->proc~get_unpack_pipelined_kernel_code proc~get_kernel->proc~is_unpack_kernel proc~get_kernel->proc~pop_nvtx_domain_range proc~get_kernel->proc~push_nvtx_domain_range proc~get_kernel->proc~remove proc~sort_candidates_by_score sort_candidates_by_score proc~get_kernel->proc~sort_candidates_by_score proc~get_kernel->proc~write_message proc~get_kernel->mpi_abort proc~get_kernel_args->mpi_comm_rank proc~get_kernel_args->mpi_comm_size proc~get_local_size->mpi_allgather proc~get_local_size->mpi_comm_rank proc~get_local_size->mpi_comm_size proc~get_local_sizes~2->proc~dtfft_get_error_string proc~get_local_sizes~2->proc~get_aux_size proc~get_local_sizes~2->proc~get_backend proc~get_local_sizes~2->proc~get_local_sizes proc~get_local_sizes~2->proc~is_backend_nvshmem proc~get_local_sizes~2->proc~write_message proc~get_local_sizes~2->counts proc~get_local_sizes~2->mpi_allreduce starts starts proc~get_local_sizes~2->starts proc~get_neighbor_function_code get_neighbor_function_code proc~get_neighbor_function_code->proc~add_line proc~get_pencil->proc~dtfft_get_error_string proc~get_pencil->proc~write_message make_public make_public proc~get_pencil->make_public proc~get_plan_execution_time->interface~to_str proc~get_plan_execution_time->proc~create~7 proc~get_plan_execution_time->proc~destroy~6 proc~get_plan_execution_time->proc~execute~6 proc~get_plan_execution_time->proc~get_conf_log_enabled proc~get_plan_execution_time->proc~get_conf_measure_iters proc~get_plan_execution_time->proc~get_conf_measure_warmup_iters proc~get_plan_execution_time->proc~pop_nvtx_domain_range proc~get_plan_execution_time->proc~push_nvtx_domain_range proc~get_plan_execution_time->proc~write_message proc~get_plan_execution_time->mpi_allreduce proc~get_plan_execution_time->mpi_comm_size proc~get_plan_execution_time->mpi_wtime proc~get_precision->proc~dtfft_get_error_string proc~get_precision->proc~write_message proc~get_stream_int64->none~get_stream proc~get_stream_int64->proc~dtfft_get_cuda_stream proc~get_stream_ptr->proc~dtfft_get_error_string proc~get_stream_ptr->proc~write_message proc~get_transpose_kernel->proc~compile_and_cache proc~get_transpose_kernel->proc~destroy_code proc~get_transpose_kernel_code get_transpose_kernel_code proc~get_transpose_kernel->proc~get_transpose_kernel_code proc~get_transpose_kernel_code->interface~to_str proc~get_transpose_kernel_code->proc~add_line proc~get_transpose_kernel_code->proc~get_code_init proc~get_transpose_kernel_code->proc~get_neighbor_function_code proc~get_unpack_kernel_code->proc~add_line proc~get_unpack_kernel_code->proc~get_code_init proc~get_unpack_kernel_code->proc~get_neighbor_function_code proc~get_unpack_pipelined_kernel_code->proc~add_line proc~get_unpack_pipelined_kernel_code->proc~get_code_init proc~get_volta_architecture get_volta_architecture proc~init_environment init_environment proc~init_environment->interface~get_env proc~init_environment->proc~destroy_strings backends backends proc~init_environment->backends platforms platforms proc~init_environment->platforms proc~init_internal->proc~init_environment mpi_initialized mpi_initialized proc~init_internal->mpi_initialized proc~is_null_ptr is_null_ptr proc~is_nvshmem_ptr->interface~nvshmem_my_pe proc~is_nvshmem_ptr->interface~nvshmem_ptr proc~is_nvshmem_ptr->is_null_ptr proc~is_transpose_kernel is_transpose_kernel proc~is_valid_transpose_type is_valid_transpose_type proc~load load proc~load->proc~destroy_strings proc~load->proc~dynamic_load proc~load_cuda->proc~destroy_strings proc~load_cuda->proc~dynamic_load proc~load_library->interface~dlopen proc~load_library->interface~is_null_ptr proc~load_library->proc~astring_f2c proc~load_library->proc~dl_error proc~load_nvrtc->proc~destroy_strings proc~load_nvrtc->proc~dynamic_load proc~load_symbol->interface~dlsym proc~load_symbol->interface~is_null_ptr proc~load_symbol->proc~astring_f2c proc~load_symbol->proc~dl_error proc~load_vkfft->proc~load proc~make_plan->interface~mkl_dfti_commit_desc proc~make_plan->interface~mkl_dfti_create_desc proc~make_plan->interface~mkl_dfti_set_value proc~make_plan->interface~to_str proc~make_plan->proc~dftierrormessage proc~make_plan->mpi_abort proc~make_public pencil%make_public proc~mem_alloc fftw_executor%mem_alloc fftw_malloc fftw_malloc proc~mem_alloc->fftw_malloc proc~mem_alloc_c32_1d->proc~mem_alloc_ptr proc~mem_alloc_c32_2d->proc~mem_alloc_ptr proc~mem_alloc_c32_3d->proc~mem_alloc_ptr proc~mem_alloc_c64_1d->proc~mem_alloc_ptr proc~mem_alloc_c64_2d->proc~mem_alloc_ptr proc~mem_alloc_c64_3d->proc~mem_alloc_ptr proc~mem_alloc_host mem_alloc_host proc~mem_alloc_host->interface~aligned_alloc proc~mem_alloc_ptr->proc~dtfft_get_error_string proc~mem_alloc_ptr->proc~mem_alloc_host proc~mem_alloc_ptr->proc~write_message proc~mem_alloc_ptr->is_null_ptr mem_alloc mem_alloc proc~mem_alloc_ptr->mem_alloc proc~mem_alloc_r32_1d->proc~mem_alloc_ptr proc~mem_alloc_r32_2d->proc~mem_alloc_ptr proc~mem_alloc_r32_3d->proc~mem_alloc_ptr proc~mem_alloc_r64_1d->proc~mem_alloc_ptr proc~mem_alloc_r64_2d->proc~mem_alloc_ptr proc~mem_alloc_r64_3d->proc~mem_alloc_ptr proc~mem_alloc~2 vkfft_executor%mem_alloc proc~mem_alloc~3 abstract_transpose_plan%mem_alloc proc~mem_alloc~3->proc~alloc_mem proc~mem_alloc~4 mkl_executor%mem_alloc proc~mem_alloc~4->interface~mkl_dfti_mem_alloc proc~mem_alloc~4->interface~to_str proc~mem_alloc~4->proc~dftierrormessage proc~mem_alloc~4->mpi_abort proc~mem_alloc~5 cufft_executor%mem_alloc proc~mem_free fftw_executor%mem_free fftw_free fftw_free proc~mem_free->fftw_free proc~mem_free_c32_1d->proc~mem_free_ptr proc~mem_free_c32_2d->proc~mem_free_ptr proc~mem_free_c32_3d->proc~mem_free_ptr proc~mem_free_c64_1d->proc~mem_free_ptr proc~mem_free_c64_2d->proc~mem_free_ptr proc~mem_free_c64_3d->proc~mem_free_ptr proc~mem_free_ptr->interface~mem_free_host proc~mem_free_ptr->proc~dtfft_get_error_string proc~mem_free_ptr->proc~write_message mem_free mem_free proc~mem_free_ptr->mem_free proc~mem_free_r32_1d->proc~mem_free_ptr proc~mem_free_r32_2d->proc~mem_free_ptr proc~mem_free_r32_3d->proc~mem_free_ptr proc~mem_free_r64_1d->proc~mem_free_ptr proc~mem_free_r64_2d->proc~mem_free_ptr proc~mem_free_r64_3d->proc~mem_free_ptr proc~mem_free~2 vkfft_executor%mem_free proc~mem_free~3->proc~free_mem proc~mem_free~4 mkl_executor%mem_free proc~mem_free~4->interface~mkl_dfti_mem_free proc~mem_free~4->interface~to_str proc~mem_free~4->proc~dftierrormessage proc~mem_free~4->mpi_abort proc~mem_free~5 cufft_executor%mem_free proc~ncclgeterrorstring->interface~ncclgeterrorstring_c proc~ncclgeterrorstring->proc~string_c2f proc~nvrtcgeterrorstring->proc~string_c2f proc~pop_nvtx_domain_range->interface~nvtxdomainrangepop_c proc~push_nvtx_domain_range->interface~nvtxdomainrangepushex_c proc~push_nvtx_domain_range->proc~astring_f2c proc~push_nvtx_domain_range->proc~create_nvtx_domain proc~remove->proc~is_same_ptr proc~remove->is_null_ptr proc~report->interface~to_str proc~report->proc~dtfft_get_backend_string proc~report->proc~dtfft_get_error_string proc~report->proc~dtfft_get_executor_string proc~report->proc~dtfft_get_precision_string proc~report->proc~get_backend proc~report->proc~write_message proc~report->mpi_comm_size proc~run_autotune_backend->interface~cudaeventcreate proc~run_autotune_backend->interface~cudaeventdestroy proc~run_autotune_backend->interface~cudaeventelapsedtime proc~run_autotune_backend->interface~cudaeventrecord proc~run_autotune_backend->interface~cudaeventsynchronize proc~run_autotune_backend->interface~cudastreamsynchronize proc~run_autotune_backend->interface~to_str proc~run_autotune_backend->proc~alloc_and_set_aux proc~run_autotune_backend->proc~alloc_mem proc~run_autotune_backend->proc~create_helper proc~run_autotune_backend->proc~cudageterrorstring proc~run_autotune_backend->proc~destroy~9 proc~run_autotune_backend->proc~dtfft_get_backend_string proc~run_autotune_backend->proc~dtfft_get_error_string proc~run_autotune_backend->proc~execute~8 proc~run_autotune_backend->proc~free_mem proc~run_autotune_backend->proc~get_conf_log_enabled proc~run_autotune_backend->proc~get_conf_measure_iters proc~run_autotune_backend->proc~get_conf_measure_warmup_iters proc~run_autotune_backend->proc~get_conf_mpi_enabled proc~run_autotune_backend->proc~get_conf_nvshmem_enabled proc~run_autotune_backend->proc~get_conf_pipelined_enabled proc~run_autotune_backend->proc~get_local_sizes proc~run_autotune_backend->proc~is_backend_mpi proc~run_autotune_backend->proc~is_backend_nccl proc~run_autotune_backend->proc~is_backend_nvshmem proc~run_autotune_backend->proc~is_backend_pipelined proc~run_autotune_backend->proc~pop_nvtx_domain_range proc~run_autotune_backend->proc~push_nvtx_domain_range proc~run_autotune_backend->proc~write_message proc~run_autotune_backend->mpi_abort proc~run_autotune_backend->mpi_allreduce proc~run_autotune_backend->mpi_barrier proc~run_autotune_backend->mpi_comm_size mpi_alltoallv_init mpi_alltoallv_init proc~run_mpi_a2a->mpi_alltoallv_init proc~run_mpi_a2a->mpi_start proc~run_mpi_p2p->mpi_comm_size mpi_recv_init mpi_recv_init proc~run_mpi_p2p->mpi_recv_init mpi_send_init mpi_send_init proc~run_mpi_p2p->mpi_send_init mpi_startall mpi_startall proc~run_mpi_p2p->mpi_startall proc~to_cstr->proc~astring_f2c proc~transpose dtfft_plan_t%transpose proc~transpose->proc~transpose_ptr proc~transpose_ptr->proc~check_device_pointers proc~transpose_ptr->proc~dtfft_get_error_string proc~transpose_ptr->proc~execute~5 proc~transpose_ptr->proc~get_backend proc~transpose_ptr->proc~is_same_ptr proc~transpose_ptr->proc~is_valid_transpose_type proc~transpose_ptr->proc~pop_nvtx_domain_range proc~transpose_ptr->proc~push_nvtx_domain_range proc~transpose_ptr->proc~write_message proc~unload_library->interface~dlclose proc~unload_library->proc~dl_error proc~write_message->mpi_comm_rank proc~write_message->mpi_finalized
Help