execute_nccl Subroutine

private subroutine execute_nccl(self, in, out, stream, aux, error_code)

Uses

  • proc~~execute_nccl~~UsesGraph proc~execute_nccl backend_nccl%execute_nccl iso_c_binding iso_c_binding proc~execute_nccl->iso_c_binding iso_fortran_env iso_fortran_env proc~execute_nccl->iso_fortran_env

Executes NCCL backend

Type Bound

backend_nccl

Arguments

Type IntentOptional Attributes Name
class(backend_nccl), intent(inout) :: self

NCCL backend

real(kind=real32), intent(inout), target, contiguous :: in(:)

Send pointer

real(kind=real32), intent(inout), target, contiguous :: out(:)

Recv pointer

type(dtfft_stream_t), intent(in) :: stream

Main execution CUDA stream

real(kind=real32), intent(inout), target, contiguous :: aux(:)

Aux pointer

integer(kind=int32), intent(out) :: error_code

Error code


Calls

proc~~execute_nccl~~CallsGraph proc~execute_nccl backend_nccl%execute_nccl crecvs crecvs proc~execute_nccl->crecvs csends csends proc~execute_nccl->csends fname fname proc~execute_nccl->fname interface~ncclgroupend ncclGroupEnd proc~execute_nccl->interface~ncclgroupend interface~ncclgroupstart ncclGroupStart proc~execute_nccl->interface~ncclgroupstart interface~ncclrecv ncclRecv proc~execute_nccl->interface~ncclrecv interface~ncclsend ncclSend proc~execute_nccl->interface~ncclsend mpi_abort mpi_abort proc~execute_nccl->mpi_abort mpi_alltoall mpi_alltoall proc~execute_nccl->mpi_alltoall proc~execute_self_copy abstract_backend%execute_self_copy proc~execute_nccl->proc~execute_self_copy proc~execute~6 abstract_kernel%execute proc~execute_nccl->proc~execute~6 proc~ncclgeterrorstring ncclGetErrorString proc~execute_nccl->proc~ncclgeterrorstring proc~execute_self_copy->fname proc~execute_self_copy->mpi_abort interface~cudaeventrecord cudaEventRecord proc~execute_self_copy->interface~cudaeventrecord interface~cudamemcpyasync cudaMemcpyAsync proc~execute_self_copy->interface~cudamemcpyasync interface~cudastreamwaitevent cudaStreamWaitEvent proc~execute_self_copy->interface~cudastreamwaitevent proc~cudageterrorstring cudaGetErrorString proc~execute_self_copy->proc~cudageterrorstring proc~pop_nvtx_domain_range pop_nvtx_domain_range proc~execute_self_copy->proc~pop_nvtx_domain_range proc~push_nvtx_domain_range push_nvtx_domain_range proc~execute_self_copy->proc~push_nvtx_domain_range proc~execute~6->mpi_abort execute_private execute_private proc~execute~6->execute_private post_sync post_sync proc~execute~6->post_sync pre_sync pre_sync proc~execute~6->pre_sync proc~bytes_to_floats bytes_to_floats proc~execute~6->proc~bytes_to_floats proc~compress~2 abstract_compressor%compress proc~execute~6->proc~compress~2 proc~decompress~2 abstract_compressor%decompress proc~execute~6->proc~decompress~2 proc~execute~6->proc~pop_nvtx_domain_range proc~execute~6->proc~push_nvtx_domain_range proc~write_message write_message proc~execute~6->proc~write_message sync sync proc~execute~6->sync interface~ncclgeterrorstring_c ncclGetErrorString_c proc~ncclgeterrorstring->interface~ncclgeterrorstring_c proc~string_c2f string_c2f proc~ncclgeterrorstring->proc~string_c2f proc~compress~2->mpi_abort proc~compress~2->proc~pop_nvtx_domain_range proc~compress~2->proc~push_nvtx_domain_range proc~compress~2->proc~write_message compress_private compress_private proc~compress~2->compress_private interface~to_str to_str proc~compress~2->interface~to_str proc~is_same_ptr is_same_ptr proc~compress~2->proc~is_same_ptr proc~ptr_offset ptr_offset proc~compress~2->proc~ptr_offset proc~cudageterrorstring->proc~string_c2f interface~cudageterrorstring_c cudaGetErrorString_c proc~cudageterrorstring->interface~cudageterrorstring_c proc~decompress~2->mpi_abort proc~decompress~2->proc~pop_nvtx_domain_range proc~decompress~2->proc~push_nvtx_domain_range proc~decompress~2->proc~write_message decompress_private decompress_private proc~decompress~2->decompress_private proc~decompress~2->proc~is_same_ptr proc~decompress~2->proc~ptr_offset interface~nvtxdomainrangepop_c nvtxDomainRangePop_c proc~pop_nvtx_domain_range->interface~nvtxdomainrangepop_c interface~nvtxdomainrangepushex_c nvtxDomainRangePushEx_c proc~push_nvtx_domain_range->interface~nvtxdomainrangepushex_c proc~astring_f2c astring_f2c proc~push_nvtx_domain_range->proc~astring_f2c proc~create_nvtx_domain create_nvtx_domain proc~push_nvtx_domain_range->proc~create_nvtx_domain interface~is_null_ptr is_null_ptr proc~string_c2f->interface~is_null_ptr mpi_comm_rank mpi_comm_rank proc~write_message->mpi_comm_rank mpi_finalized mpi_finalized proc~write_message->mpi_finalized interface~is_null_ptr->interface~is_null_ptr proc~is_null_funptr is_null_funptr interface~is_null_ptr->proc~is_null_funptr proc~double_to_string double_to_string interface~to_str->proc~double_to_string proc~float_to_string float_to_string interface~to_str->proc~float_to_string proc~int32_to_string int32_to_string interface~to_str->proc~int32_to_string proc~int64_to_string int64_to_string interface~to_str->proc~int64_to_string proc~int8_to_string int8_to_string interface~to_str->proc~int8_to_string proc~string_f2c string_f2c proc~astring_f2c->proc~string_f2c proc~create_nvtx_domain->proc~astring_f2c interface~nvtxdomaincreate_c nvtxDomainCreate_c proc~create_nvtx_domain->interface~nvtxdomaincreate_c