run_autotune_backend Subroutine

private subroutine run_autotune_backend(comms, cart_comm, pencils, base_storage, stream, is_z_slab, backend, best_time, best_backend)

Uses

  • proc~~run_autotune_backend~~UsesGraph proc~run_autotune_backend run_autotune_backend iso_c_binding iso_c_binding proc~run_autotune_backend->iso_c_binding iso_fortran_env iso_fortran_env proc~run_autotune_backend->iso_fortran_env

Runs autotune for all backends

Arguments

Type IntentOptional Attributes Name
type(MPI_Comm), intent(in) :: comms(:)

1D comms

type(MPI_Comm), intent(in) :: cart_comm

3D Cartesian comm

type(pencil), intent(in) :: pencils(:)

Source meta

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

type(dtfft_stream_t), intent(in) :: stream

Stream to use

logical, intent(in) :: is_z_slab

Is Z-slab optimization enabled

type(dtfft_backend_t), intent(in), optional :: backend

GPU Backend to test. Should be passed only when effort is DTFFT_ESTIMATE or DTFFT_MEASURE

real(kind=real32), intent(out), optional :: best_time

Elapsed time for best backend

type(dtfft_backend_t), intent(out), optional :: best_backend

Best backend selected


Calls

proc~~run_autotune_backend~~CallsGraph proc~run_autotune_backend run_autotune_backend interface~cudaeventcreate cudaEventCreate proc~run_autotune_backend->interface~cudaeventcreate interface~cudaeventdestroy cudaEventDestroy proc~run_autotune_backend->interface~cudaeventdestroy interface~cudaeventelapsedtime cudaEventElapsedTime proc~run_autotune_backend->interface~cudaeventelapsedtime interface~cudaeventrecord cudaEventRecord proc~run_autotune_backend->interface~cudaeventrecord interface~cudaeventsynchronize cudaEventSynchronize proc~run_autotune_backend->interface~cudaeventsynchronize interface~cudastreamsynchronize cudaStreamSynchronize proc~run_autotune_backend->interface~cudastreamsynchronize interface~to_str to_str proc~run_autotune_backend->interface~to_str mpi_abort mpi_abort proc~run_autotune_backend->mpi_abort mpi_allreduce mpi_allreduce proc~run_autotune_backend->mpi_allreduce mpi_barrier mpi_barrier proc~run_autotune_backend->mpi_barrier mpi_comm_size mpi_comm_size proc~run_autotune_backend->mpi_comm_size proc~alloc_and_set_aux alloc_and_set_aux proc~run_autotune_backend->proc~alloc_and_set_aux proc~alloc_mem alloc_mem proc~run_autotune_backend->proc~alloc_mem proc~create_helper backend_helper%create_helper proc~run_autotune_backend->proc~create_helper proc~cudageterrorstring cudaGetErrorString proc~run_autotune_backend->proc~cudageterrorstring proc~destroy~9 transpose_handle_cuda%destroy proc~run_autotune_backend->proc~destroy~9 proc~dtfft_get_backend_string dtfft_get_backend_string proc~run_autotune_backend->proc~dtfft_get_backend_string proc~dtfft_get_error_string dtfft_get_error_string proc~run_autotune_backend->proc~dtfft_get_error_string proc~execute~8 transpose_handle_cuda%execute proc~run_autotune_backend->proc~execute~8 proc~free_mem free_mem proc~run_autotune_backend->proc~free_mem proc~get_conf_log_enabled get_conf_log_enabled proc~run_autotune_backend->proc~get_conf_log_enabled proc~get_conf_measure_iters get_conf_measure_iters proc~run_autotune_backend->proc~get_conf_measure_iters proc~get_conf_measure_warmup_iters get_conf_measure_warmup_iters proc~run_autotune_backend->proc~get_conf_measure_warmup_iters proc~get_conf_mpi_enabled get_conf_mpi_enabled proc~run_autotune_backend->proc~get_conf_mpi_enabled proc~get_conf_nvshmem_enabled get_conf_nvshmem_enabled proc~run_autotune_backend->proc~get_conf_nvshmem_enabled proc~get_conf_pipelined_enabled get_conf_pipelined_enabled proc~run_autotune_backend->proc~get_conf_pipelined_enabled proc~get_local_sizes get_local_sizes proc~run_autotune_backend->proc~get_local_sizes proc~is_backend_mpi is_backend_mpi proc~run_autotune_backend->proc~is_backend_mpi proc~is_backend_nccl is_backend_nccl proc~run_autotune_backend->proc~is_backend_nccl proc~is_backend_nvshmem is_backend_nvshmem proc~run_autotune_backend->proc~is_backend_nvshmem proc~is_backend_pipelined is_backend_pipelined proc~run_autotune_backend->proc~is_backend_pipelined proc~pop_nvtx_domain_range pop_nvtx_domain_range proc~run_autotune_backend->proc~pop_nvtx_domain_range proc~push_nvtx_domain_range push_nvtx_domain_range proc~run_autotune_backend->proc~push_nvtx_domain_range proc~write_message write_message proc~run_autotune_backend->proc~write_message proc~double_to_string double_to_string interface~to_str->proc~double_to_string proc~float_to_string float_to_string interface~to_str->proc~float_to_string proc~int32_to_string int32_to_string interface~to_str->proc~int32_to_string proc~int64_to_string int64_to_string interface~to_str->proc~int64_to_string proc~int8_to_string int8_to_string interface~to_str->proc~int8_to_string proc~alloc_and_set_aux->mpi_abort proc~alloc_and_set_aux->mpi_allreduce proc~alloc_and_set_aux->proc~alloc_mem proc~alloc_and_set_aux->proc~dtfft_get_error_string proc~get_aux_size~3 transpose_handle_cuda%get_aux_size proc~alloc_and_set_aux->proc~get_aux_size~3 proc~alloc_mem->interface~to_str proc~alloc_mem->mpi_abort proc~alloc_mem->mpi_allreduce proc~alloc_mem->proc~cudageterrorstring proc~alloc_mem->proc~dtfft_get_backend_string proc~alloc_mem->proc~get_conf_log_enabled proc~alloc_mem->proc~is_backend_nccl proc~alloc_mem->proc~is_backend_nvshmem proc~alloc_mem->proc~write_message interface~cudamalloc cudaMalloc proc~alloc_mem->interface~cudamalloc interface~cudamemgetinfo cudaMemGetInfo proc~alloc_mem->interface~cudamemgetinfo interface~ncclcommregister ncclCommRegister proc~alloc_mem->interface~ncclcommregister interface~ncclmemalloc ncclMemAlloc proc~alloc_mem->interface~ncclmemalloc interface~nvshmem_malloc nvshmem_malloc proc~alloc_mem->interface~nvshmem_malloc is_null_ptr is_null_ptr proc~alloc_mem->is_null_ptr proc~ncclgeterrorstring ncclGetErrorString proc~alloc_mem->proc~ncclgeterrorstring temp temp proc~alloc_mem->temp proc~create_helper->interface~to_str proc~create_helper->mpi_abort proc~create_helper->mpi_comm_size interface~get_env get_env proc~create_helper->interface~get_env interface~ncclcomminitrank ncclCommInitRank proc~create_helper->interface~ncclcomminitrank interface~ncclgetuniqueid ncclGetUniqueId proc~create_helper->interface~ncclgetuniqueid mpi_allgather mpi_allgather proc~create_helper->mpi_allgather mpi_bcast mpi_bcast proc~create_helper->mpi_bcast mpi_comm_rank mpi_comm_rank proc~create_helper->mpi_comm_rank proc~destroy_helper backend_helper%destroy_helper proc~create_helper->proc~destroy_helper proc~create_helper->proc~ncclgeterrorstring interface~cudageterrorstring_c cudaGetErrorString_c proc~cudageterrorstring->interface~cudageterrorstring_c proc~string_c2f string_c2f proc~cudageterrorstring->proc~string_c2f proc~destroy~4 nvrtc_kernel%destroy proc~destroy~9->proc~destroy~4 proc~execute~3 nvrtc_kernel%execute proc~execute~8->proc~execute~3 proc~free_mem->interface~to_str proc~free_mem->mpi_abort proc~free_mem->proc~get_conf_log_enabled proc~free_mem->proc~is_backend_nccl proc~free_mem->proc~is_backend_nvshmem proc~free_mem->proc~write_message interface~cudafree cudaFree proc~free_mem->interface~cudafree interface~ncclcommderegister ncclCommDeregister proc~free_mem->interface~ncclcommderegister interface~ncclmemfree ncclMemFree proc~free_mem->interface~ncclmemfree nvshmem_free nvshmem_free proc~free_mem->nvshmem_free proc~is_same_ptr is_same_ptr proc~free_mem->proc~is_same_ptr proc~free_mem->proc~ncclgeterrorstring interface~get_conf_internal get_conf_internal proc~get_conf_log_enabled->interface~get_conf_internal proc~get_conf_measure_iters->interface~get_conf_internal proc~get_conf_measure_warmup_iters->interface~get_conf_internal proc~get_conf_mpi_enabled->interface~get_conf_internal proc~get_conf_nvshmem_enabled->interface~get_conf_internal proc~get_conf_pipelined_enabled->interface~get_conf_internal interface~nvtxdomainrangepop_c nvtxDomainRangePop_c proc~pop_nvtx_domain_range->interface~nvtxdomainrangepop_c interface~nvtxdomainrangepushex_c nvtxDomainRangePushEx_c proc~push_nvtx_domain_range->interface~nvtxdomainrangepushex_c proc~astring_f2c astring_f2c proc~push_nvtx_domain_range->proc~astring_f2c proc~create_nvtx_domain create_nvtx_domain proc~push_nvtx_domain_range->proc~create_nvtx_domain proc~write_message->mpi_comm_rank mpi_finalized mpi_finalized proc~write_message->mpi_finalized proc~get_conf_internal_int32 get_conf_internal_int32 interface~get_conf_internal->proc~get_conf_internal_int32 proc~get_conf_internal_logical get_conf_internal_logical interface~get_conf_internal->proc~get_conf_internal_logical proc~get_env_base get_env_base interface~get_env->proc~get_env_base proc~get_env_int32 get_env_int32 interface~get_env->proc~get_env_int32 proc~get_env_int8 get_env_int8 interface~get_env->proc~get_env_int8 proc~get_env_logical get_env_logical interface~get_env->proc~get_env_logical proc~get_env_string get_env_string interface~get_env->proc~get_env_string proc~string_f2c string_f2c proc~astring_f2c->proc~string_f2c proc~create_nvtx_domain->proc~astring_f2c interface~nvtxdomaincreate_c nvtxDomainCreate_c proc~create_nvtx_domain->interface~nvtxdomaincreate_c proc~destroy_helper->interface~to_str proc~destroy_helper->mpi_abort proc~destroy_helper->proc~write_message proc~destroy_helper->proc~ncclgeterrorstring interface~ncclcommdestroy ncclCommDestroy proc~destroy_helper->interface~ncclcommdestroy proc~destroy~4->interface~to_str proc~destroy~4->mpi_abort proc~destroy~4->proc~cudageterrorstring proc~destroy~4->interface~cudafree proc~remove nvrtc_cache%remove proc~destroy~4->proc~remove proc~execute~3->interface~cudastreamsynchronize proc~execute~3->interface~to_str proc~execute~3->mpi_abort proc~execute~3->proc~cudageterrorstring interface~cudamemcpyasync cudaMemcpyAsync proc~execute~3->interface~cudamemcpyasync proc~culaunchkernel cuLaunchKernel proc~execute~3->proc~culaunchkernel proc~get_contiguous_execution_blocks get_contiguous_execution_blocks proc~execute~3->proc~get_contiguous_execution_blocks proc~get_aux_size~4 abstract_backend%get_aux_size proc~get_aux_size~3->proc~get_aux_size~4 proc~ncclgeterrorstring->proc~string_c2f interface~ncclgeterrorstring_c ncclGetErrorString_c proc~ncclgeterrorstring->interface~ncclgeterrorstring_c proc~destroy_string string%destroy_string proc~get_env_base->proc~destroy_string proc~get_env_int32->proc~write_message proc~get_env_int32->interface~get_env proc~get_env_int8->interface~get_env proc~get_env_logical->interface~get_env proc~get_env_string->proc~write_message proc~get_env_string->interface~get_env proc~remove->is_null_ptr proc~remove->proc~is_same_ptr

Called by

proc~~run_autotune_backend~~CalledByGraph proc~run_autotune_backend run_autotune_backend proc~autotune_grid~2 autotune_grid proc~autotune_grid~2->proc~run_autotune_backend proc~create_cuda transpose_plan_cuda%create_cuda proc~create_cuda->proc~run_autotune_backend proc~autotune_grid_decomposition~2 autotune_grid_decomposition proc~create_cuda->proc~autotune_grid_decomposition~2 proc~autotune_grid_decomposition~2->proc~autotune_grid~2