execute_nccl Subroutine

private subroutine execute_nccl(self, in, out, stream, aux)

Uses

  • proc~~execute_nccl~~UsesGraph proc~execute_nccl backend_nccl%execute_nccl iso_c_binding iso_c_binding proc~execute_nccl->iso_c_binding iso_fortran_env iso_fortran_env proc~execute_nccl->iso_fortran_env

Executes NCCL backend

Type Bound

backend_nccl

Arguments

Type IntentOptional Attributes Name
class(backend_nccl), intent(inout) :: self

NCCL backend

real(kind=real32), intent(inout), target :: in(:)

Send pointer

real(kind=real32), intent(inout), target :: out(:)

Recv pointer

type(dtfft_stream_t), intent(in) :: stream

Main execution CUDA stream

real(kind=real32), intent(inout), target :: aux(:)

Auxiliary pointer


Calls

proc~~execute_nccl~~CallsGraph proc~execute_nccl backend_nccl%execute_nccl interface~int_to_str int_to_str proc~execute_nccl->interface~int_to_str interface~ncclgroupend ncclGroupEnd proc~execute_nccl->interface~ncclgroupend interface~ncclgroupstart ncclGroupStart proc~execute_nccl->interface~ncclgroupstart interface~ncclrecv ncclRecv proc~execute_nccl->interface~ncclrecv interface~ncclsend ncclSend proc~execute_nccl->interface~ncclsend mpi_abort mpi_abort proc~execute_nccl->mpi_abort proc~execute~3 nvrtc_kernel%execute proc~execute_nccl->proc~execute~3 proc~ncclgeterrorstring ncclGetErrorString proc~execute_nccl->proc~ncclgeterrorstring proc~int_to_str_int32 int_to_str_int32 interface~int_to_str->proc~int_to_str_int32 proc~int_to_str_int64 int_to_str_int64 interface~int_to_str->proc~int_to_str_int64 proc~int_to_str_int8 int_to_str_int8 interface~int_to_str->proc~int_to_str_int8 proc~execute~3->interface~int_to_str proc~execute~3->mpi_abort interface~cudamemcpyasync cudaMemcpyAsync proc~execute~3->interface~cudamemcpyasync mpi_comm_rank mpi_comm_rank proc~execute~3->mpi_comm_rank proc~cudageterrorstring cudaGetErrorString proc~execute~3->proc~cudageterrorstring proc~culaunchkernel cuLaunchKernel proc~execute~3->proc~culaunchkernel proc~get_contiguous_execution_blocks get_contiguous_execution_blocks proc~execute~3->proc~get_contiguous_execution_blocks interface~ncclgeterrorstring_c ncclGetErrorString_c proc~ncclgeterrorstring->interface~ncclgeterrorstring_c proc~string_c2f string_c2f proc~ncclgeterrorstring->proc~string_c2f proc~cudageterrorstring->proc~string_c2f interface~cudageterrorstring_c cudaGetErrorString_c proc~cudageterrorstring->interface~cudageterrorstring_c interface~run_cuda_kernel run_cuda_kernel proc~culaunchkernel->interface~run_cuda_kernel