create_cuda Function

private function create_cuda(self, dims, transposed_dims, base_comm, comm_dims, effort, base_dtype, base_storage, is_custom_cart_comm, cart_comm, comms, pencils)

Creates CUDA transpose plan

Type Bound

transpose_plan_cuda

Arguments

Type IntentOptional Attributes Name
class(transpose_plan_cuda), intent(inout) :: self

GPU transpose plan

integer(kind=int32), intent(in) :: dims(:)

Global sizes of the transform requested

integer(kind=int32), intent(in) :: transposed_dims(:,:)

Transposed dimensions

type(MPI_Comm), intent(in) :: base_comm

Base communicator

integer(kind=int32), intent(in) :: comm_dims(:)

Number of processors in each dimension

type(dtfft_effort_t), intent(in) :: effort

How thoroughly dtFFT searches for the optimal plan

type(MPI_Datatype), intent(in) :: base_dtype

Base MPI_Datatype

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

logical, intent(in) :: is_custom_cart_comm

is custom Cartesian communicator provided by user

type(MPI_Comm), intent(out) :: cart_comm

Cartesian communicator

type(MPI_Comm), intent(out) :: comms(:)

Array of 1d communicators

type(pencil), intent(out) :: pencils(:)

Data distributing meta

Return Value integer(kind=int32)


Calls

proc~~create_cuda~~CallsGraph proc~create_cuda transpose_plan_cuda%create_cuda interface~int_to_str int_to_str proc~create_cuda->interface~int_to_str mpi_comm_size mpi_comm_size proc~create_cuda->mpi_comm_size mpi_wtime mpi_wtime proc~create_cuda->mpi_wtime proc~alloc_and_set_aux alloc_and_set_aux proc~create_cuda->proc~alloc_and_set_aux proc~autotune_grid_decomposition autotune_grid_decomposition proc~create_cuda->proc~autotune_grid_decomposition proc~clean_unused_cache clean_unused_cache proc~create_cuda->proc~clean_unused_cache proc~create_cart_comm create_cart_comm proc~create_cuda->proc~create_cart_comm proc~create~8 pencil%create proc~create_cuda->proc~create~8 proc~double_to_str double_to_str proc~create_cuda->proc~double_to_str proc~dtfft_get_backend_string dtfft_get_backend_string proc~create_cuda->proc~dtfft_get_backend_string proc~get_log_enabled get_log_enabled proc~create_cuda->proc~get_log_enabled proc~get_mpi_enabled get_mpi_enabled proc~create_cuda->proc~get_mpi_enabled proc~get_nccl_enabled get_nccl_enabled proc~create_cuda->proc~get_nccl_enabled proc~get_nvshmem_enabled get_nvshmem_enabled proc~create_cuda->proc~get_nvshmem_enabled proc~get_user_gpu_backend get_user_gpu_backend proc~create_cuda->proc~get_user_gpu_backend proc~get_user_stream get_user_stream proc~create_cuda->proc~get_user_stream proc~is_backend_nccl is_backend_nccl proc~create_cuda->proc~is_backend_nccl proc~load_cuda load_cuda proc~create_cuda->proc~load_cuda proc~load_nvrtc load_nvrtc proc~create_cuda->proc~load_nvrtc proc~run_autotune_backend run_autotune_backend proc~create_cuda->proc~run_autotune_backend proc~write_message write_message proc~create_cuda->proc~write_message proc~int_to_str_int32 int_to_str_int32 interface~int_to_str->proc~int_to_str_int32 proc~int_to_str_int64 int_to_str_int64 interface~int_to_str->proc~int_to_str_int64 proc~int_to_str_int8 int_to_str_int8 interface~int_to_str->proc~int_to_str_int8 mpi_abort mpi_abort proc~alloc_and_set_aux->mpi_abort mpi_allreduce mpi_allreduce proc~alloc_and_set_aux->mpi_allreduce proc~alloc_mem alloc_mem proc~alloc_and_set_aux->proc~alloc_mem proc~dtfft_get_error_string dtfft_get_error_string proc~alloc_and_set_aux->proc~dtfft_get_error_string proc~get_aux_size~2 transpose_handle_cuda%get_aux_size proc~alloc_and_set_aux->proc~get_aux_size~2 proc~autotune_grid_decomposition->mpi_comm_size proc~autotune_grid autotune_grid proc~autotune_grid_decomposition->proc~autotune_grid proc~clean_unused_cache->interface~int_to_str is_null_ptr is_null_ptr proc~clean_unused_cache->is_null_ptr proc~clean_unused_cache->mpi_abort proc~cudageterrorstring cudaGetErrorString proc~clean_unused_cache->proc~cudageterrorstring mpi_cart_create mpi_cart_create proc~create_cart_comm->mpi_cart_create mpi_cart_sub mpi_cart_sub proc~create_cart_comm->mpi_cart_sub proc~destroy~8 pencil%destroy proc~create~8->proc~destroy~8 proc~get_local_size get_local_size proc~create~8->proc~get_local_size proc~get_mpi_enabled_from_env get_mpi_enabled_from_env proc~get_mpi_enabled->proc~get_mpi_enabled_from_env proc~get_nccl_enabled_from_env get_nccl_enabled_from_env proc~get_nccl_enabled->proc~get_nccl_enabled_from_env proc~get_nvshmem_enabled_from_env get_nvshmem_enabled_from_env proc~get_nvshmem_enabled->proc~get_nvshmem_enabled_from_env proc~get_backend_from_env get_backend_from_env proc~get_user_gpu_backend->proc~get_backend_from_env proc~get_user_stream->interface~int_to_str interface~cudastreamcreate cudaStreamCreate proc~get_user_stream->interface~cudastreamcreate proc~get_user_stream->mpi_abort proc~get_user_stream->proc~cudageterrorstring proc~destroy_strings destroy_strings proc~load_cuda->proc~destroy_strings proc~dynamic_load dynamic_load proc~load_cuda->proc~dynamic_load proc~load_nvrtc->proc~destroy_strings proc~load_nvrtc->proc~dynamic_load proc~run_autotune_backend->interface~int_to_str proc~run_autotune_backend->mpi_comm_size proc~run_autotune_backend->proc~alloc_and_set_aux proc~run_autotune_backend->proc~double_to_str proc~run_autotune_backend->proc~dtfft_get_backend_string proc~run_autotune_backend->proc~get_log_enabled proc~run_autotune_backend->proc~get_mpi_enabled proc~run_autotune_backend->proc~get_nvshmem_enabled proc~run_autotune_backend->proc~is_backend_nccl proc~run_autotune_backend->proc~write_message interface~cudaeventcreate cudaEventCreate proc~run_autotune_backend->interface~cudaeventcreate interface~cudaeventdestroy cudaEventDestroy proc~run_autotune_backend->interface~cudaeventdestroy interface~cudaeventelapsedtime cudaEventElapsedTime proc~run_autotune_backend->interface~cudaeventelapsedtime interface~cudaeventrecord cudaEventRecord proc~run_autotune_backend->interface~cudaeventrecord interface~cudaeventsynchronize cudaEventSynchronize proc~run_autotune_backend->interface~cudaeventsynchronize interface~cudastreamsynchronize cudaStreamSynchronize proc~run_autotune_backend->interface~cudastreamsynchronize proc~run_autotune_backend->mpi_abort proc~run_autotune_backend->mpi_allreduce mpi_barrier mpi_barrier proc~run_autotune_backend->mpi_barrier proc~run_autotune_backend->proc~alloc_mem proc~create_helper backend_helper%create_helper proc~run_autotune_backend->proc~create_helper proc~run_autotune_backend->proc~cudageterrorstring proc~destroy~7 transpose_handle_cuda%destroy proc~run_autotune_backend->proc~destroy~7 proc~run_autotune_backend->proc~dtfft_get_error_string proc~execute~8 transpose_handle_cuda%execute proc~run_autotune_backend->proc~execute~8 proc~free_mem free_mem proc~run_autotune_backend->proc~free_mem proc~get_iters_from_env get_iters_from_env proc~run_autotune_backend->proc~get_iters_from_env proc~get_local_sizes~2 get_local_sizes proc~run_autotune_backend->proc~get_local_sizes~2 proc~get_pipelined_enabled get_pipelined_enabled proc~run_autotune_backend->proc~get_pipelined_enabled proc~is_backend_mpi is_backend_mpi proc~run_autotune_backend->proc~is_backend_mpi proc~is_backend_nvshmem is_backend_nvshmem proc~run_autotune_backend->proc~is_backend_nvshmem proc~is_backend_pipelined is_backend_pipelined proc~run_autotune_backend->proc~is_backend_pipelined proc~pop_nvtx_domain_range pop_nvtx_domain_range proc~run_autotune_backend->proc~pop_nvtx_domain_range proc~push_nvtx_domain_range push_nvtx_domain_range proc~run_autotune_backend->proc~push_nvtx_domain_range mpi_comm_rank mpi_comm_rank proc~write_message->mpi_comm_rank mpi_finalized mpi_finalized proc~write_message->mpi_finalized proc~alloc_mem->interface~int_to_str proc~alloc_mem->proc~dtfft_get_backend_string proc~alloc_mem->proc~get_log_enabled proc~alloc_mem->proc~is_backend_nccl proc~alloc_mem->proc~write_message proc~alloc_mem->is_null_ptr proc~alloc_mem->mpi_abort proc~alloc_mem->mpi_allreduce proc~alloc_mem->proc~cudageterrorstring proc~alloc_mem->proc~is_backend_nvshmem interface~cudamalloc cudaMalloc proc~alloc_mem->interface~cudamalloc interface~cudamemgetinfo cudaMemGetInfo proc~alloc_mem->interface~cudamemgetinfo interface~ncclcommregister ncclCommRegister proc~alloc_mem->interface~ncclcommregister interface~ncclmemalloc ncclMemAlloc proc~alloc_mem->interface~ncclmemalloc interface~nvshmem_malloc nvshmem_malloc proc~alloc_mem->interface~nvshmem_malloc proc~ncclgeterrorstring ncclGetErrorString proc~alloc_mem->proc~ncclgeterrorstring temp temp proc~alloc_mem->temp proc~autotune_grid->interface~int_to_str proc~autotune_grid->proc~create_cart_comm proc~autotune_grid->proc~create~8 proc~autotune_grid->proc~get_log_enabled proc~autotune_grid->proc~run_autotune_backend proc~autotune_grid->proc~write_message proc~autotune_grid->proc~destroy~8 proc~autotune_grid->proc~pop_nvtx_domain_range proc~autotune_grid->proc~push_nvtx_domain_range mpi_comm_free mpi_comm_free proc~autotune_grid->mpi_comm_free proc~create_helper->interface~int_to_str proc~create_helper->mpi_comm_size proc~create_helper->mpi_abort proc~create_helper->mpi_comm_rank interface~get_env get_env proc~create_helper->interface~get_env interface~ncclcomminitrank ncclCommInitRank proc~create_helper->interface~ncclcomminitrank interface~ncclgetuniqueid ncclGetUniqueId proc~create_helper->interface~ncclgetuniqueid mpi_allgather mpi_allgather proc~create_helper->mpi_allgather mpi_bcast mpi_bcast proc~create_helper->mpi_bcast proc~destroy_helper backend_helper%destroy_helper proc~create_helper->proc~destroy_helper proc~create_helper->proc~ncclgeterrorstring interface~cudageterrorstring_c cudaGetErrorString_c proc~cudageterrorstring->interface~cudageterrorstring_c proc~string_c2f string_c2f proc~cudageterrorstring->proc~string_c2f proc~destroy~3 nvrtc_kernel%destroy proc~destroy~7->proc~destroy~3 interface~is_null_ptr is_null_ptr proc~dynamic_load->interface~is_null_ptr proc~load_library load_library proc~dynamic_load->proc~load_library proc~load_symbol load_symbol proc~dynamic_load->proc~load_symbol proc~unload_library unload_library proc~dynamic_load->proc~unload_library proc~execute~3 nvrtc_kernel%execute proc~execute~8->proc~execute~3 proc~free_mem->interface~int_to_str proc~free_mem->proc~is_backend_nccl proc~free_mem->mpi_abort proc~free_mem->proc~is_backend_nvshmem interface~cudafree cudaFree proc~free_mem->interface~cudafree interface~ncclcommderegister ncclCommDeregister proc~free_mem->interface~ncclcommderegister interface~ncclmemfree ncclMemFree proc~free_mem->interface~ncclmemfree interface~nvshmem_free nvshmem_free proc~free_mem->interface~nvshmem_free proc~is_same_ptr is_same_ptr proc~free_mem->proc~is_same_ptr proc~free_mem->proc~ncclgeterrorstring proc~get_aux_size abstract_backend%get_aux_size proc~get_aux_size~2->proc~get_aux_size proc~get_iters_from_env->interface~get_env proc~get_local_size->mpi_comm_size proc~get_local_size->mpi_comm_rank proc~get_local_size->mpi_allgather proc~get_pipe_enabled_from_env get_pipe_enabled_from_env proc~get_pipelined_enabled->proc~get_pipe_enabled_from_env interface~nvtxdomainrangepop_c nvtxDomainRangePop_c proc~pop_nvtx_domain_range->interface~nvtxdomainrangepop_c interface~nvtxdomainrangepushex_c nvtxDomainRangePushEx_c proc~push_nvtx_domain_range->interface~nvtxdomainrangepushex_c proc~astring_f2c astring_f2c proc~push_nvtx_domain_range->proc~astring_f2c proc~create_nvtx_domain create_nvtx_domain proc~push_nvtx_domain_range->proc~create_nvtx_domain proc~get_env_base get_env_base interface~get_env->proc~get_env_base proc~get_env_int32 get_env_int32 interface~get_env->proc~get_env_int32 proc~get_env_int8 get_env_int8 interface~get_env->proc~get_env_int8 proc~get_env_logical get_env_logical interface~get_env->proc~get_env_logical proc~get_env_string get_env_string interface~get_env->proc~get_env_string interface~is_null_ptr->interface~is_null_ptr proc~is_null_funptr is_null_funptr interface~is_null_ptr->proc~is_null_funptr proc~string_f2c string_f2c proc~astring_f2c->proc~string_f2c proc~create_nvtx_domain->proc~astring_f2c interface~nvtxdomaincreate_c nvtxDomainCreate_c proc~create_nvtx_domain->interface~nvtxdomaincreate_c proc~destroy_helper->interface~int_to_str proc~destroy_helper->proc~get_log_enabled proc~destroy_helper->proc~write_message proc~destroy_helper->mpi_abort proc~destroy_helper->proc~ncclgeterrorstring interface~ncclcommdestroy ncclCommDestroy proc~destroy_helper->interface~ncclcommdestroy proc~destroy~3->interface~int_to_str proc~destroy~3->mpi_abort proc~destroy~3->proc~cudageterrorstring proc~destroy~3->interface~cudafree proc~mark_unused mark_unused proc~destroy~3->proc~mark_unused proc~execute~3->interface~int_to_str proc~execute~3->mpi_abort proc~execute~3->mpi_comm_rank proc~execute~3->proc~cudageterrorstring interface~cudamemcpyasync cudaMemcpyAsync proc~execute~3->interface~cudamemcpyasync proc~culaunchkernel cuLaunchKernel proc~execute~3->proc~culaunchkernel proc~get_contiguous_execution_blocks get_contiguous_execution_blocks proc~execute~3->proc~get_contiguous_execution_blocks proc~load_library->interface~is_null_ptr proc~load_library->proc~astring_f2c interface~dlopen dlopen proc~load_library->interface~dlopen proc~dl_error dl_error proc~load_library->proc~dl_error proc~load_symbol->interface~is_null_ptr proc~load_symbol->proc~astring_f2c interface~dlsym dlsym proc~load_symbol->interface~dlsym proc~load_symbol->proc~dl_error proc~ncclgeterrorstring->proc~string_c2f interface~ncclgeterrorstring_c ncclGetErrorString_c proc~ncclgeterrorstring->interface~ncclgeterrorstring_c interface~dlclose dlclose proc~unload_library->interface~dlclose proc~unload_library->proc~dl_error interface~run_cuda_kernel run_cuda_kernel proc~culaunchkernel->interface~run_cuda_kernel proc~dl_error->proc~get_log_enabled proc~dl_error->proc~write_message proc~dl_error->proc~string_c2f interface~dlerror dlerror proc~dl_error->interface~dlerror proc~get_env_int32->proc~get_log_enabled proc~get_env_int32->proc~write_message proc~get_env_int32->interface~get_env proc~get_env_int8->interface~get_env proc~get_env_logical->interface~get_env proc~get_env_string->proc~get_log_enabled proc~get_env_string->proc~write_message proc~get_env_string->interface~get_env proc~mark_unused->proc~is_same_ptr