autotune_grid_decomposition Subroutine

private subroutine autotune_grid_decomposition(platform, dims, base_comm, effort, transpose_mode, base_dtype, base_storage, stream, fmodes, bmodes, best_decomposition, backend, min_execution_time, best_backend, best_transpose_mode, fbacks, bbacks)

Runs through all possible grid decompositions and selects the best one based on the lowest average execution time

Arguments

Type IntentOptional Attributes Name
type(dtfft_platform_t), intent(in) :: platform

Platform to use

integer(kind=int32), intent(in) :: dims(:)

Global sizes of the transform requested

type(MPI_Comm), intent(in) :: base_comm

3D comm

type(dtfft_effort_t), intent(in) :: effort

Effort level for the plan creation

type(dtfft_transpose_mode_t), intent(in) :: transpose_mode

Transpose mode to use

type(MPI_Datatype), intent(in) :: base_dtype

Base MPI_Datatype

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

type(dtfft_stream_t), intent(in) :: stream

Stream to use

type(dtfft_transpose_mode_t), intent(inout) :: fmodes(:)

Best transpose modes for forward plan

type(dtfft_transpose_mode_t), intent(inout) :: bmodes(:)

Best transpose modes for backward plan

integer(kind=int32), intent(out) :: best_decomposition(:)

Best decomposition found

type(dtfft_backend_t), intent(in), optional :: backend

GPU Backend to test. Should be passed only when effort is DTFFT_ESTIMATE or DTFFT_MEASURE

real(kind=real32), intent(out), optional :: min_execution_time

Elapsed time for best plan selected

type(dtfft_backend_t), intent(out), optional :: best_backend

Best backend selected

type(dtfft_transpose_mode_t), intent(out), optional :: best_transpose_mode

Best transpose mode selected

type(dtfft_backend_t), intent(out), optional :: fbacks(:)

Best backends for forward plans

type(dtfft_backend_t), intent(out), optional :: bbacks(:)

Best backends for backward plans


Calls

proc~~autotune_grid_decomposition~~CallsGraph proc~autotune_grid_decomposition autotune_grid_decomposition mpi_comm_size mpi_comm_size proc~autotune_grid_decomposition->mpi_comm_size mpi_dims_create mpi_dims_create proc~autotune_grid_decomposition->mpi_dims_create proc~autotune_grid autotune_grid proc~autotune_grid_decomposition->proc~autotune_grid proc~get_conf_log_enabled get_conf_log_enabled proc~autotune_grid_decomposition->proc~get_conf_log_enabled proc~get_correct_backend get_correct_backend proc~autotune_grid_decomposition->proc~get_correct_backend proc~write_message write_message proc~autotune_grid_decomposition->proc~write_message proc~autotune_grid->proc~get_conf_log_enabled proc~autotune_grid->proc~write_message interface~to_str to_str proc~autotune_grid->interface~to_str mpi_comm_free mpi_comm_free proc~autotune_grid->mpi_comm_free proc~create_pencils_and_comm create_pencils_and_comm proc~autotune_grid->proc~create_pencils_and_comm proc~destroy~4 pencil%destroy proc~autotune_grid->proc~destroy~4 proc~get_permutations get_permutations proc~autotune_grid->proc~get_permutations proc~pop_nvtx_domain_range pop_nvtx_domain_range proc~autotune_grid->proc~pop_nvtx_domain_range proc~push_nvtx_domain_range push_nvtx_domain_range proc~autotune_grid->proc~push_nvtx_domain_range proc~run_autotune_backend run_autotune_backend proc~autotune_grid->proc~run_autotune_backend interface~get_conf_internal get_conf_internal proc~get_conf_log_enabled->interface~get_conf_internal proc~get_conf_platform get_conf_platform proc~get_correct_backend->proc~get_conf_platform mpi_comm_rank mpi_comm_rank proc~write_message->mpi_comm_rank mpi_finalized mpi_finalized proc~write_message->mpi_finalized proc~get_conf_internal_int32 get_conf_internal_int32 interface~get_conf_internal->proc~get_conf_internal_int32 proc~get_conf_internal_logical get_conf_internal_logical interface~get_conf_internal->proc~get_conf_internal_logical proc~double_to_string double_to_string interface~to_str->proc~double_to_string proc~float_to_string float_to_string interface~to_str->proc~float_to_string proc~int32_to_string int32_to_string interface~to_str->proc~int32_to_string proc~int64_to_string int64_to_string interface~to_str->proc~int64_to_string proc~int8_to_string int8_to_string interface~to_str->proc~int8_to_string proc~create_pencils_and_comm->proc~get_permutations proc~create_cart_comm create_cart_comm proc~create_pencils_and_comm->proc~create_cart_comm proc~create~4 pencil%create proc~create_pencils_and_comm->proc~create~4 interface~nvtxdomainrangepop_c nvtxDomainRangePop_c proc~pop_nvtx_domain_range->interface~nvtxdomainrangepop_c interface~nvtxdomainrangepushex_c nvtxDomainRangePushEx_c proc~push_nvtx_domain_range->interface~nvtxdomainrangepushex_c proc~astring_f2c astring_f2c proc~push_nvtx_domain_range->proc~astring_f2c proc~create_nvtx_domain create_nvtx_domain proc~push_nvtx_domain_range->proc~create_nvtx_domain proc~run_autotune_backend->proc~get_correct_backend lbmodes lbmodes proc~run_autotune_backend->lbmodes lbtimes lbtimes proc~run_autotune_backend->lbtimes lfmodes lfmodes proc~run_autotune_backend->lfmodes lftimes lftimes proc~run_autotune_backend->lftimes proc~create_helper~2 backend_helper%create_helper proc~run_autotune_backend->proc~create_helper~2 proc~destroy_helper~2 backend_helper%destroy_helper proc~run_autotune_backend->proc~destroy_helper~2 proc~execute_many execute_many proc~run_autotune_backend->proc~execute_many proc~execute_single execute_single proc~run_autotune_backend->proc~execute_single proc~get_conf_compression_enabled get_conf_compression_enabled proc~run_autotune_backend->proc~get_conf_compression_enabled proc~get_conf_datatype_enabled get_conf_datatype_enabled proc~run_autotune_backend->proc~get_conf_datatype_enabled proc~get_conf_fused_enabled get_conf_fused_enabled proc~run_autotune_backend->proc~get_conf_fused_enabled proc~get_conf_mpi_enabled get_conf_mpi_enabled proc~run_autotune_backend->proc~get_conf_mpi_enabled proc~get_conf_nccl_enabled get_conf_nccl_enabled proc~run_autotune_backend->proc~get_conf_nccl_enabled proc~get_conf_nvshmem_enabled get_conf_nvshmem_enabled proc~run_autotune_backend->proc~get_conf_nvshmem_enabled proc~get_conf_pipelined_enabled get_conf_pipelined_enabled proc~run_autotune_backend->proc~get_conf_pipelined_enabled proc~get_conf_rma_enabled get_conf_rma_enabled proc~run_autotune_backend->proc~get_conf_rma_enabled proc~get_conf_transpose get_conf_transpose proc~run_autotune_backend->proc~get_conf_transpose proc~is_backend_compressed is_backend_compressed proc~run_autotune_backend->proc~is_backend_compressed proc~is_backend_fused is_backend_fused proc~run_autotune_backend->proc~is_backend_fused proc~is_backend_mpi is_backend_mpi proc~run_autotune_backend->proc~is_backend_mpi proc~is_backend_nccl is_backend_nccl proc~run_autotune_backend->proc~is_backend_nccl proc~is_backend_nvshmem is_backend_nvshmem proc~run_autotune_backend->proc~is_backend_nvshmem proc~is_backend_pipelined is_backend_pipelined proc~run_autotune_backend->proc~is_backend_pipelined proc~is_backend_rma is_backend_rma proc~run_autotune_backend->proc~is_backend_rma proc~string_f2c string_f2c proc~astring_f2c->proc~string_f2c proc~create_cart_comm->proc~write_message proc~create_cart_comm->interface~to_str proc~create_cart_comm->mpi_comm_free mpi_abort mpi_abort proc~create_cart_comm->mpi_abort mpi_cart_create mpi_cart_create proc~create_cart_comm->mpi_cart_create mpi_cart_sub mpi_cart_sub proc~create_cart_comm->mpi_cart_sub mpi_comm_dup mpi_comm_dup proc~create_cart_comm->mpi_comm_dup proc~create_subcomm_include_all create_subcomm_include_all proc~create_cart_comm->proc~create_subcomm_include_all proc~create_helper~2->mpi_comm_size proc~create_helper~2->mpi_comm_rank proc~create_helper~2->proc~destroy_helper~2 fname fname proc~create_helper~2->fname interface~get_env get_env proc~create_helper~2->interface~get_env interface~ncclcomminitrank ncclCommInitRank proc~create_helper~2->interface~ncclcomminitrank interface~ncclgetuniqueid ncclGetUniqueId proc~create_helper~2->interface~ncclgetuniqueid proc~create_helper~2->mpi_abort mpi_allgather mpi_allgather proc~create_helper~2->mpi_allgather mpi_bcast mpi_bcast proc~create_helper~2->mpi_bcast proc~ncclgeterrorstring ncclGetErrorString proc~create_helper~2->proc~ncclgeterrorstring proc~create_nvtx_domain->proc~astring_f2c interface~nvtxdomaincreate_c nvtxDomainCreate_c proc~create_nvtx_domain->interface~nvtxdomaincreate_c proc~create~4->mpi_comm_size proc~create~4->proc~destroy~4 proc~create~4->mpi_allgather proc~check_if_even check_if_even proc~create~4->proc~check_if_even proc~get_local_size get_local_size proc~create~4->proc~get_local_size proc~destroy_helper~2->proc~write_message proc~destroy_helper~2->fname interface~ncclcommdestroy ncclCommDestroy proc~destroy_helper~2->interface~ncclcommdestroy proc~destroy_helper~2->mpi_abort proc~destroy_helper~2->proc~ncclgeterrorstring proc~execute_many->proc~get_conf_log_enabled proc~execute_many->proc~write_message proc~execute_many->interface~to_str proc~execute_many->proc~pop_nvtx_domain_range proc~execute_many->proc~push_nvtx_domain_range proc~execute_many->proc~execute_single btimes btimes proc~execute_many->btimes ftimes ftimes proc~execute_many->ftimes proc~allocate_plans allocate_plans proc~execute_many->proc~allocate_plans proc~create~18 abstract_reshape_handle%create proc~execute_many->proc~create~18 proc~destroy_plans destroy_plans proc~execute_many->proc~destroy_plans proc~dtfft_get_backend_string dtfft_get_backend_string proc~execute_many->proc~dtfft_get_backend_string proc~execute_autotune execute_autotune proc~execute_many->proc~execute_autotune proc~get_local_sizes get_local_sizes proc~execute_many->proc~get_local_sizes proc~execute_single->proc~get_conf_log_enabled proc~execute_single->proc~write_message proc~execute_single->proc~pop_nvtx_domain_range proc~execute_single->proc~push_nvtx_domain_range proc~execute_single->proc~dtfft_get_backend_string proc~execute_single_transpose_modes execute_single_transpose_modes proc~execute_single->proc~execute_single_transpose_modes proc~get_conf_compression_enabled->interface~get_conf_internal proc~get_conf_datatype_enabled->interface~get_conf_internal proc~get_conf_fused_enabled->interface~get_conf_internal proc~get_conf_mpi_enabled->interface~get_conf_internal proc~get_conf_nccl_enabled->interface~get_conf_internal proc~get_conf_nvshmem_enabled->interface~get_conf_internal proc~get_conf_pipelined_enabled->interface~get_conf_internal proc~get_env_base get_env_base interface~get_env->proc~get_env_base proc~get_env_int32 get_env_int32 interface~get_env->proc~get_env_int32 proc~get_env_int8 get_env_int8 interface~get_env->proc~get_env_int8 proc~get_env_logical get_env_logical interface~get_env->proc~get_env_logical proc~get_env_string get_env_string interface~get_env->proc~get_env_string proc~allocate_plans->proc~write_message proc~allocate_plans->mpi_abort proc~check_if_even->mpi_comm_size proc~check_if_even->mpi_allgather proc~create_subcomm_include_all->mpi_comm_size proc~create_subcomm create_subcomm proc~create_subcomm_include_all->proc~create_subcomm proc~create~18->proc~write_message proc~create~18->mpi_abort create_private create_private proc~create~18->create_private proc~get_reshape_type get_reshape_type proc~create~18->proc~get_reshape_type proc~get_transpose_type get_transpose_type proc~create~18->proc~get_transpose_type proc~is_valid_reshape_type is_valid_reshape_type proc~create~18->proc~is_valid_reshape_type proc~is_valid_transpose_type is_valid_transpose_type proc~create~18->proc~is_valid_transpose_type destroy destroy proc~destroy_plans->destroy proc~execute_autotune->fname proc~execute_autotune->mpi_abort execute execute proc~execute_autotune->execute interface~cudaeventcreate cudaEventCreate proc~execute_autotune->interface~cudaeventcreate interface~cudaeventdestroy cudaEventDestroy proc~execute_autotune->interface~cudaeventdestroy interface~cudaeventelapsedtime cudaEventElapsedTime proc~execute_autotune->interface~cudaeventelapsedtime interface~cudaeventrecord cudaEventRecord proc~execute_autotune->interface~cudaeventrecord interface~cudaeventsynchronize cudaEventSynchronize proc~execute_autotune->interface~cudaeventsynchronize interface~cudastreamsynchronize cudaStreamSynchronize proc~execute_autotune->interface~cudastreamsynchronize mpi_barrier mpi_barrier proc~execute_autotune->mpi_barrier mpi_wtime mpi_wtime proc~execute_autotune->mpi_wtime proc~alloc_and_set_aux alloc_and_set_aux proc~execute_autotune->proc~alloc_and_set_aux proc~alloc_mem alloc_mem proc~execute_autotune->proc~alloc_mem proc~cudageterrorstring cudaGetErrorString proc~execute_autotune->proc~cudageterrorstring proc~dtfft_get_error_string dtfft_get_error_string proc~execute_autotune->proc~dtfft_get_error_string proc~free_mem free_mem proc~execute_autotune->proc~free_mem proc~get_conf_measure_iters get_conf_measure_iters proc~execute_autotune->proc~get_conf_measure_iters proc~get_conf_measure_warmup_iters get_conf_measure_warmup_iters proc~execute_autotune->proc~get_conf_measure_warmup_iters proc~report_timings report_timings proc~execute_autotune->proc~report_timings proc~run_execute_single run_execute_single proc~execute_single_transpose_modes->proc~run_execute_single proc~get_local_size->mpi_comm_size proc~get_local_size->mpi_comm_rank proc~get_local_size->mpi_allgather interface~ncclgeterrorstring_c ncclGetErrorString_c proc~ncclgeterrorstring->interface~ncclgeterrorstring_c proc~string_c2f string_c2f proc~ncclgeterrorstring->proc~string_c2f proc~alloc_and_set_aux->mpi_abort proc~alloc_and_set_aux->proc~alloc_mem proc~alloc_and_set_aux->proc~dtfft_get_error_string mpi_allreduce mpi_allreduce proc~alloc_and_set_aux->mpi_allreduce proc~get_aux_bytes_generic get_aux_bytes_generic proc~alloc_and_set_aux->proc~get_aux_bytes_generic proc~is_nvshmem_generic is_nvshmem_generic proc~alloc_and_set_aux->proc~is_nvshmem_generic proc~alloc_mem->proc~get_conf_log_enabled proc~alloc_mem->proc~write_message proc~alloc_mem->interface~to_str proc~alloc_mem->proc~is_backend_nccl proc~alloc_mem->proc~is_backend_nvshmem proc~alloc_mem->fname proc~alloc_mem->mpi_abort proc~alloc_mem->proc~dtfft_get_backend_string proc~alloc_mem->proc~ncclgeterrorstring proc~alloc_mem->proc~cudageterrorstring interface~cudamalloc cudaMalloc proc~alloc_mem->interface~cudamalloc interface~cudamemgetinfo cudaMemGetInfo proc~alloc_mem->interface~cudamemgetinfo interface~ncclcommregister ncclCommRegister proc~alloc_mem->interface~ncclcommregister interface~ncclmemalloc ncclMemAlloc proc~alloc_mem->interface~ncclmemalloc interface~nvshmem_malloc nvshmem_malloc proc~alloc_mem->interface~nvshmem_malloc is_null_ptr is_null_ptr proc~alloc_mem->is_null_ptr proc~alloc_mem->mpi_allreduce proc~mem_alloc_host mem_alloc_host proc~alloc_mem->proc~mem_alloc_host temp temp proc~alloc_mem->temp mpi_comm_create mpi_comm_create proc~create_subcomm->mpi_comm_create mpi_comm_group mpi_comm_group proc~create_subcomm->mpi_comm_group mpi_group_free mpi_group_free proc~create_subcomm->mpi_group_free mpi_group_incl mpi_group_incl proc~create_subcomm->mpi_group_incl proc~cudageterrorstring->proc~string_c2f interface~cudageterrorstring_c cudaGetErrorString_c proc~cudageterrorstring->interface~cudageterrorstring_c proc~free_mem->proc~get_conf_log_enabled proc~free_mem->proc~write_message proc~free_mem->interface~to_str proc~free_mem->proc~is_backend_nccl proc~free_mem->proc~is_backend_nvshmem proc~free_mem->fname proc~free_mem->mpi_abort proc~free_mem->proc~ncclgeterrorstring interface~cudafree cudaFree proc~free_mem->interface~cudafree interface~mem_free_host mem_free_host proc~free_mem->interface~mem_free_host interface~ncclcommderegister ncclCommDeregister proc~free_mem->interface~ncclcommderegister interface~ncclmemfree ncclMemFree proc~free_mem->interface~ncclmemfree nvshmem_free nvshmem_free proc~free_mem->nvshmem_free proc~is_same_ptr is_same_ptr proc~free_mem->proc~is_same_ptr proc~get_conf_measure_iters->interface~get_conf_internal proc~get_conf_measure_warmup_iters->interface~get_conf_internal proc~get_env_base->mpi_bcast proc~destroy_string string%destroy_string proc~get_env_base->proc~destroy_string proc~get_env_int32->proc~write_message proc~get_env_int32->interface~get_env proc~get_env_int32->mpi_abort proc~get_env_int8->interface~get_env proc~get_env_logical->interface~get_env proc~get_env_string->proc~write_message proc~get_env_string->interface~get_env proc~get_env_string->proc~destroy_string proc~report_timings->mpi_comm_size proc~report_timings->proc~get_conf_log_enabled proc~report_timings->proc~write_message proc~report_timings->interface~to_str proc~report_timings->mpi_allreduce proc~run_execute_single->proc~get_conf_log_enabled proc~run_execute_single->proc~write_message proc~run_execute_single->proc~pop_nvtx_domain_range proc~run_execute_single->proc~push_nvtx_domain_range proc~run_execute_single->proc~allocate_plans proc~run_execute_single->proc~create~18 proc~run_execute_single->proc~destroy_plans proc~run_execute_single->proc~execute_autotune interface~is_null_ptr is_null_ptr proc~string_c2f->interface~is_null_ptr interface~is_null_ptr->interface~is_null_ptr proc~is_null_funptr is_null_funptr interface~is_null_ptr->proc~is_null_funptr proc~get_aux_bytes~5 abstract_reshape_handle%get_aux_bytes proc~get_aux_bytes_generic->proc~get_aux_bytes~5 proc~is_nvshmem_generic->proc~is_backend_nvshmem get_backend get_backend proc~is_nvshmem_generic->get_backend interface~aligned_alloc aligned_alloc proc~mem_alloc_host->interface~aligned_alloc

Called by

proc~~autotune_grid_decomposition~~CalledByGraph proc~autotune_grid_decomposition autotune_grid_decomposition proc~create~15 transpose_plan%create proc~create~15->proc~autotune_grid_decomposition