execute Subroutine

private subroutine execute(self, in, out, stream, source)

Uses

  • proc~~execute~3~~UsesGraph proc~execute~3 nvrtc_kernel%execute iso_c_binding iso_c_binding proc~execute~3->iso_c_binding iso_fortran_env iso_fortran_env proc~execute~3->iso_fortran_env

Executes kernel on stream

Type Bound

nvrtc_kernel

Arguments

Type IntentOptional Attributes Name
class(nvrtc_kernel), intent(inout) :: self

nvRTC Compiled kernel class

real(kind=real32), intent(in), target :: in(:)

Source pointer

real(kind=real32), intent(in), target :: out(:)

Target pointer

type(dtfft_stream_t), intent(in) :: stream

CUDA Stream

integer(kind=int32), intent(in), optional :: source

Source rank for pipelined unpacking


Calls

proc~~execute~3~~CallsGraph proc~execute~3 nvrtc_kernel%execute interface~cudamemcpyasync cudaMemcpyAsync proc~execute~3->interface~cudamemcpyasync interface~cudastreamsynchronize cudaStreamSynchronize proc~execute~3->interface~cudastreamsynchronize interface~to_str to_str proc~execute~3->interface~to_str mpi_abort mpi_abort proc~execute~3->mpi_abort proc~cudageterrorstring cudaGetErrorString proc~execute~3->proc~cudageterrorstring proc~culaunchkernel cuLaunchKernel proc~execute~3->proc~culaunchkernel proc~get_contiguous_execution_blocks get_contiguous_execution_blocks proc~execute~3->proc~get_contiguous_execution_blocks proc~double_to_string double_to_string interface~to_str->proc~double_to_string proc~float_to_string float_to_string interface~to_str->proc~float_to_string proc~int32_to_string int32_to_string interface~to_str->proc~int32_to_string proc~int64_to_string int64_to_string interface~to_str->proc~int64_to_string proc~int8_to_string int8_to_string interface~to_str->proc~int8_to_string interface~cudageterrorstring_c cudaGetErrorString_c proc~cudageterrorstring->interface~cudageterrorstring_c proc~string_c2f string_c2f proc~cudageterrorstring->proc~string_c2f

Called by

proc~~execute~3~~CalledByGraph proc~execute~3 nvrtc_kernel%execute proc~execute_mpi backend_mpi%execute_mpi proc~execute_mpi->proc~execute~3 proc~execute_nccl backend_nccl%execute_nccl proc~execute_nccl->proc~execute~3 proc~execute~8 transpose_handle_cuda%execute proc~execute~8->proc~execute~3 proc~execute~9 abstract_backend%execute proc~execute~9->proc~execute~3 proc~execute_cuda transpose_plan_cuda%execute_cuda proc~execute_cuda->proc~execute~8 proc~run_autotune_backend run_autotune_backend proc~run_autotune_backend->proc~execute~8 proc~autotune_grid~2 autotune_grid proc~autotune_grid~2->proc~run_autotune_backend proc~create_cuda transpose_plan_cuda%create_cuda proc~create_cuda->proc~run_autotune_backend proc~autotune_grid_decomposition~2 autotune_grid_decomposition proc~create_cuda->proc~autotune_grid_decomposition~2 proc~autotune_grid_decomposition~2->proc~autotune_grid~2