create Subroutine

private subroutine create(self, helper, send, recv, effort, base_storage, backend, force_effort)

Creates CUDA Transpose Handle

Type Bound

transpose_handle_cuda

Arguments

Type IntentOptional Attributes Name
class(transpose_handle_cuda), intent(inout) :: self

CUDA Transpose Handle

type(backend_helper), intent(in) :: helper

Backend helper

type(pencil), intent(in) :: send

Send pencil

type(pencil), intent(in) :: recv

Recv pencil

type(dtfft_effort_t), intent(in) :: effort

Effort level for generating transpose kernels

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

type(dtfft_backend_t), intent(in) :: backend

Backend type

logical, intent(in), optional :: force_effort

Should effort be forced or not


Calls

proc~~create~9~~CallsGraph proc~create~9 transpose_handle_cuda%create mpi_comm_rank mpi_comm_rank proc~create~9->mpi_comm_rank mpi_comm_size mpi_comm_size proc~create~9->mpi_comm_size mpi_irecv mpi_irecv proc~create~9->mpi_irecv mpi_isend mpi_isend proc~create~9->mpi_isend mpi_wait mpi_wait proc~create~9->mpi_wait proc~create~4 nvrtc_kernel%create proc~create~9->proc~create~4 proc~destroy_data_handle data_handle%destroy_data_handle proc~create~9->proc~destroy_data_handle proc~get_transpose_type get_transpose_type proc~create~9->proc~get_transpose_type proc~is_backend_cufftmp is_backend_cufftmp proc~create~9->proc~is_backend_cufftmp proc~is_backend_mpi is_backend_mpi proc~create~9->proc~is_backend_mpi proc~is_backend_nccl is_backend_nccl proc~create~9->proc~is_backend_nccl proc~is_backend_pipelined is_backend_pipelined proc~create~9->proc~is_backend_pipelined proc~set_unpack_kernel abstract_backend%set_unpack_kernel proc~create~9->proc~set_unpack_kernel interface~cudagetdevice cudaGetDevice proc~create~4->interface~cudagetdevice interface~get_device_props get_device_props proc~create~4->interface~get_device_props interface~to_str to_str proc~create~4->interface~to_str mpi_abort mpi_abort proc~create~4->mpi_abort proc~create_device_pointer create_device_pointer proc~create~4->proc~create_device_pointer proc~cudageterrorstring cudaGetErrorString proc~create~4->proc~cudageterrorstring proc~destroy~4 nvrtc_kernel%destroy proc~create~4->proc~destroy~4 proc~get_kernel get_kernel proc~create~4->proc~get_kernel proc~get_kernel_args get_kernel_args proc~create~4->proc~get_kernel_args proc~double_to_string double_to_string interface~to_str->proc~double_to_string proc~float_to_string float_to_string interface~to_str->proc~float_to_string proc~int32_to_string int32_to_string interface~to_str->proc~int32_to_string proc~int64_to_string int64_to_string interface~to_str->proc~int64_to_string proc~int8_to_string int8_to_string interface~to_str->proc~int8_to_string proc~create_device_pointer->interface~to_str proc~create_device_pointer->mpi_abort proc~create_device_pointer->proc~cudageterrorstring interface~cudamalloc cudaMalloc proc~create_device_pointer->interface~cudamalloc interface~cudamemcpy cudaMemcpy proc~create_device_pointer->interface~cudamemcpy interface~cudageterrorstring_c cudaGetErrorString_c proc~cudageterrorstring->interface~cudageterrorstring_c proc~string_c2f string_c2f proc~cudageterrorstring->proc~string_c2f proc~destroy~4->interface~to_str proc~destroy~4->mpi_abort proc~destroy~4->proc~cudageterrorstring interface~cudafree cudaFree proc~destroy~4->interface~cudafree proc~remove nvrtc_cache%remove proc~destroy~4->proc~remove proc~get_kernel->interface~to_str proc~get_kernel->mpi_abort proc~get_kernel->proc~cudageterrorstring proc~get_kernel->proc~get_kernel_args interface~cudaeventcreate cudaEventCreate proc~get_kernel->interface~cudaeventcreate interface~cudaeventdestroy cudaEventDestroy proc~get_kernel->interface~cudaeventdestroy interface~cudaeventelapsedtime cudaEventElapsedTime proc~get_kernel->interface~cudaeventelapsedtime interface~cudaeventrecord cudaEventRecord proc~get_kernel->interface~cudaeventrecord interface~cudaeventsynchronize cudaEventSynchronize proc~get_kernel->interface~cudaeventsynchronize proc~get_kernel->interface~cudafree proc~get_kernel->interface~cudamalloc interface~cudastreamsynchronize cudaStreamSynchronize proc~get_kernel->interface~cudastreamsynchronize proc~compile_and_cache compile_and_cache proc~get_kernel->proc~compile_and_cache proc~culaunchkernel cuLaunchKernel proc~get_kernel->proc~culaunchkernel proc~destroy_code kernel_codegen%destroy_code proc~get_kernel->proc~destroy_code proc~evaluate_analytical_performance evaluate_analytical_performance proc~get_kernel->proc~evaluate_analytical_performance proc~generate_candidates generate_candidates proc~get_kernel->proc~generate_candidates proc~get_conf_configs_to_test get_conf_configs_to_test proc~get_kernel->proc~get_conf_configs_to_test proc~get_conf_forced_kernel_optimization get_conf_forced_kernel_optimization proc~get_kernel->proc~get_conf_forced_kernel_optimization proc~get_conf_kernel_optimization_enabled get_conf_kernel_optimization_enabled proc~get_kernel->proc~get_conf_kernel_optimization_enabled proc~get_conf_log_enabled get_conf_log_enabled proc~get_kernel->proc~get_conf_log_enabled proc~get_conf_measure_iters get_conf_measure_iters proc~get_kernel->proc~get_conf_measure_iters proc~get_conf_measure_warmup_iters get_conf_measure_warmup_iters proc~get_kernel->proc~get_conf_measure_warmup_iters proc~get_conf_stream get_conf_stream proc~get_kernel->proc~get_conf_stream proc~get_contiguous_execution_blocks get_contiguous_execution_blocks proc~get_kernel->proc~get_contiguous_execution_blocks proc~get_transpose_kernel get_transpose_kernel proc~get_kernel->proc~get_transpose_kernel proc~get_unpack_kernel_code get_unpack_kernel_code proc~get_kernel->proc~get_unpack_kernel_code proc~get_unpack_pipelined_kernel_code get_unpack_pipelined_kernel_code proc~get_kernel->proc~get_unpack_pipelined_kernel_code proc~is_unpack_kernel is_unpack_kernel proc~get_kernel->proc~is_unpack_kernel proc~pop_nvtx_domain_range pop_nvtx_domain_range proc~get_kernel->proc~pop_nvtx_domain_range proc~push_nvtx_domain_range push_nvtx_domain_range proc~get_kernel->proc~push_nvtx_domain_range proc~get_kernel->proc~remove proc~sort_candidates_by_score sort_candidates_by_score proc~get_kernel->proc~sort_candidates_by_score proc~write_message write_message proc~get_kernel->proc~write_message proc~get_kernel_args->mpi_comm_rank proc~get_kernel_args->mpi_comm_size proc~compile_and_cache->interface~to_str proc~compile_and_cache->mpi_abort proc~compile_and_cache->proc~cudageterrorstring proc~compile_and_cache->proc~get_conf_log_enabled proc~compile_and_cache->proc~pop_nvtx_domain_range proc~compile_and_cache->proc~push_nvtx_domain_range proc~compile_and_cache->proc~string_c2f proc~compile_and_cache->proc~write_message is_null_ptr is_null_ptr proc~compile_and_cache->is_null_ptr proc~add nvrtc_cache%add proc~compile_and_cache->proc~add proc~astring_f2c astring_f2c proc~compile_and_cache->proc~astring_f2c proc~destroy_strings destroy_strings proc~compile_and_cache->proc~destroy_strings proc~get nvrtc_cache%get proc~compile_and_cache->proc~get proc~nvrtcgeterrorstring nvrtcGetErrorString proc~compile_and_cache->proc~nvrtcgeterrorstring proc~to_cstr kernel_codegen%to_cstr proc~compile_and_cache->proc~to_cstr proc~count_bank_conflicts count_bank_conflicts proc~evaluate_analytical_performance->proc~count_bank_conflicts proc~estimate_bank_conflict_ratio estimate_bank_conflict_ratio proc~evaluate_analytical_performance->proc~estimate_bank_conflict_ratio proc~estimate_coalescing estimate_coalescing proc~evaluate_analytical_performance->proc~estimate_coalescing proc~estimate_occupancy estimate_occupancy proc~evaluate_analytical_performance->proc~estimate_occupancy proc~estimate_memory_pressure estimate_memory_pressure proc~generate_candidates->proc~estimate_memory_pressure proc~estimate_optimal_padding estimate_optimal_padding proc~generate_candidates->proc~estimate_optimal_padding proc~find_valid_combination find_valid_combination proc~generate_candidates->proc~find_valid_combination interface~get_conf_internal get_conf_internal proc~get_conf_configs_to_test->interface~get_conf_internal proc~get_conf_forced_kernel_optimization->interface~get_conf_internal proc~get_conf_kernel_optimization_enabled->interface~get_conf_internal proc~get_conf_log_enabled->interface~get_conf_internal proc~get_conf_measure_iters->interface~get_conf_internal proc~get_conf_measure_warmup_iters->interface~get_conf_internal proc~get_conf_stream->interface~to_str proc~get_conf_stream->mpi_abort proc~get_conf_stream->proc~cudageterrorstring interface~cudastreamcreate cudaStreamCreate proc~get_conf_stream->interface~cudastreamcreate proc~get_transpose_kernel->proc~compile_and_cache proc~get_transpose_kernel->proc~destroy_code proc~get_transpose_kernel_code get_transpose_kernel_code proc~get_transpose_kernel->proc~get_transpose_kernel_code proc~add_line kernel_codegen%add_line proc~get_unpack_kernel_code->proc~add_line proc~get_code_init get_code_init proc~get_unpack_kernel_code->proc~get_code_init proc~get_neighbor_function_code get_neighbor_function_code proc~get_unpack_kernel_code->proc~get_neighbor_function_code proc~get_unpack_pipelined_kernel_code->proc~add_line proc~get_unpack_pipelined_kernel_code->proc~get_code_init interface~nvtxdomainrangepop_c nvtxDomainRangePop_c proc~pop_nvtx_domain_range->interface~nvtxdomainrangepop_c interface~nvtxdomainrangepushex_c nvtxDomainRangePushEx_c proc~push_nvtx_domain_range->interface~nvtxdomainrangepushex_c proc~push_nvtx_domain_range->proc~astring_f2c proc~create_nvtx_domain create_nvtx_domain proc~push_nvtx_domain_range->proc~create_nvtx_domain proc~remove->is_null_ptr proc~is_same_ptr is_same_ptr proc~remove->proc~is_same_ptr proc~write_message->mpi_comm_rank mpi_finalized mpi_finalized proc~write_message->mpi_finalized proc~get_conf_internal_int32 get_conf_internal_int32 interface~get_conf_internal->proc~get_conf_internal_int32 proc~get_conf_internal_logical get_conf_internal_logical interface~get_conf_internal->proc~get_conf_internal_logical proc~create~12 nvrtc_cache%create proc~add->proc~create~12 proc~get_true_transpose_type get_true_transpose_type proc~add->proc~get_true_transpose_type proc~string_f2c string_f2c proc~astring_f2c->proc~string_f2c proc~create_nvtx_domain->proc~astring_f2c interface~nvtxdomaincreate_c nvtxDomainCreate_c proc~create_nvtx_domain->interface~nvtxdomaincreate_c proc~destroy_string string%destroy_string proc~destroy_strings->proc~destroy_string proc~estimate_bank_conflict_ratio->proc~count_bank_conflicts proc~estimate_optimal_padding->proc~count_bank_conflicts proc~get->proc~is_unpack_kernel proc~get->proc~get_true_transpose_type proc~get_code_init->proc~add_line proc~get_neighbor_function_code->proc~add_line proc~get_transpose_kernel_code->interface~to_str proc~get_transpose_kernel_code->proc~add_line proc~get_transpose_kernel_code->proc~get_code_init proc~get_transpose_kernel_code->proc~get_neighbor_function_code proc~nvrtcgeterrorstring->proc~string_c2f proc~to_cstr->proc~astring_f2c