execute Subroutine

private subroutine execute(self, in, out, stream, neighbor, aux, csize, csizes, skip_compression, skip_rank, sync)

Uses

  • proc~~execute~6~~UsesGraph proc~execute~6 abstract_kernel%execute iso_fortran_env iso_fortran_env proc~execute~6->iso_fortran_env

Executes kernel

Type Bound

abstract_kernel

Arguments

Type IntentOptional Attributes Name
class(abstract_kernel), intent(inout) :: self

Abstract kernel

type(c_ptr), intent(in) :: in

Source buffer, can be device or host pointer

type(c_ptr), intent(in) :: out

Target buffer, can be device or host pointer

type(dtfft_stream_t), intent(in) :: stream

Stream to execute on, used only for device pointers

integer(kind=int32), intent(in), optional :: neighbor

Source rank for pipelined unpacking

type(c_ptr), intent(in), optional :: aux

Target buffer, can be device or host pointer

integer(kind=int32), intent(inout), optional :: csize

Compressed buffer size

integer(kind=int32), intent(inout), optional :: csizes(:)

Multiple compression sizes. This should only be used with CUDA backends

logical, intent(in), optional :: skip_compression

Skip compression/decompression stage. Should be used when packing/unpacking from itself.

integer(kind=int32), intent(in), optional :: skip_rank

Skip compression/decompression for specific rank when neighbor is not specified.

logical, intent(in), optional :: sync

Sync stream after packing/compression. Should be used only for fused backends


Calls

proc~~execute~6~~CallsGraph proc~execute~6 abstract_kernel%execute execute_private execute_private proc~execute~6->execute_private mpi_abort mpi_abort proc~execute~6->mpi_abort post_sync post_sync proc~execute~6->post_sync pre_sync pre_sync proc~execute~6->pre_sync proc~bytes_to_floats bytes_to_floats proc~execute~6->proc~bytes_to_floats proc~compress~2 abstract_compressor%compress proc~execute~6->proc~compress~2 proc~decompress~2 abstract_compressor%decompress proc~execute~6->proc~decompress~2 proc~pop_nvtx_domain_range pop_nvtx_domain_range proc~execute~6->proc~pop_nvtx_domain_range proc~push_nvtx_domain_range push_nvtx_domain_range proc~execute~6->proc~push_nvtx_domain_range proc~write_message write_message proc~execute~6->proc~write_message sync sync proc~execute~6->sync proc~compress~2->mpi_abort proc~compress~2->proc~pop_nvtx_domain_range proc~compress~2->proc~push_nvtx_domain_range proc~compress~2->proc~write_message compress_private compress_private proc~compress~2->compress_private interface~to_str to_str proc~compress~2->interface~to_str proc~is_same_ptr is_same_ptr proc~compress~2->proc~is_same_ptr proc~ptr_offset ptr_offset proc~compress~2->proc~ptr_offset proc~decompress~2->mpi_abort proc~decompress~2->proc~pop_nvtx_domain_range proc~decompress~2->proc~push_nvtx_domain_range proc~decompress~2->proc~write_message decompress_private decompress_private proc~decompress~2->decompress_private proc~decompress~2->proc~is_same_ptr proc~decompress~2->proc~ptr_offset interface~nvtxdomainrangepop_c nvtxDomainRangePop_c proc~pop_nvtx_domain_range->interface~nvtxdomainrangepop_c interface~nvtxdomainrangepushex_c nvtxDomainRangePushEx_c proc~push_nvtx_domain_range->interface~nvtxdomainrangepushex_c proc~astring_f2c astring_f2c proc~push_nvtx_domain_range->proc~astring_f2c proc~create_nvtx_domain create_nvtx_domain proc~push_nvtx_domain_range->proc~create_nvtx_domain mpi_comm_rank mpi_comm_rank proc~write_message->mpi_comm_rank mpi_finalized mpi_finalized proc~write_message->mpi_finalized proc~double_to_string double_to_string interface~to_str->proc~double_to_string proc~float_to_string float_to_string interface~to_str->proc~float_to_string proc~int32_to_string int32_to_string interface~to_str->proc~int32_to_string proc~int64_to_string int64_to_string interface~to_str->proc~int64_to_string proc~int8_to_string int8_to_string interface~to_str->proc~int8_to_string proc~string_f2c string_f2c proc~astring_f2c->proc~string_f2c proc~create_nvtx_domain->proc~astring_f2c interface~nvtxdomaincreate_c nvtxDomainCreate_c proc~create_nvtx_domain->interface~nvtxdomaincreate_c

Called by

proc~~execute~6~~CalledByGraph proc~execute~6 abstract_kernel%execute proc~execute_end~2 reshape_handle_generic%execute_end proc~execute_end~2->proc~execute~6 proc~execute_fused backend_mpi%execute_fused proc~execute_fused->proc~execute~6 proc~execute_mpi backend_mpi%execute_mpi proc~execute_mpi->proc~execute~6 proc~execute_mpi->proc~execute_fused proc~execute_nccl backend_nccl%execute_nccl proc~execute_nccl->proc~execute~6 proc~execute_test execute_test proc~execute_test->proc~execute~6 proc~execute~2 abstract_backend%execute proc~execute~2->proc~execute~6 proc~test_pack_unpack test_pack_unpack proc~test_pack_unpack->proc~execute~6 proc~test_transpose test_transpose proc~test_transpose->proc~execute~6 proc~execute~4 reshape_handle_generic%execute proc~execute~4->proc~execute~2 program~test_compression test_compression program~test_compression->proc~test_pack_unpack program~test_compression->proc~test_transpose program~test_device_kernels test_device_kernels program~test_device_kernels->proc~execute_test