execute Subroutine

private subroutine execute(self, in, out, stream, aux)

Executes transpose - exchange - unpack

Type Bound

transpose_handle_cuda

Arguments

Type IntentOptional Attributes Name
class(transpose_handle_cuda), intent(inout) :: self

CUDA Transpose Handle

real(kind=real32), intent(inout) :: in(:)

Send pointer

real(kind=real32), intent(inout) :: out(:)

Recv pointer

type(dtfft_stream_t), intent(in) :: stream

Main execution CUDA stream

real(kind=real32), intent(inout) :: aux(:)

Aux pointer


Calls

proc~~execute~8~~CallsGraph proc~execute~8 transpose_handle_cuda%execute proc~execute~3 nvrtc_kernel%execute proc~execute~8->proc~execute~3 interface~cudamemcpyasync cudaMemcpyAsync proc~execute~3->interface~cudamemcpyasync interface~int_to_str int_to_str proc~execute~3->interface~int_to_str mpi_abort mpi_abort proc~execute~3->mpi_abort mpi_comm_rank mpi_comm_rank proc~execute~3->mpi_comm_rank proc~cudageterrorstring cudaGetErrorString proc~execute~3->proc~cudageterrorstring proc~culaunchkernel cuLaunchKernel proc~execute~3->proc~culaunchkernel proc~get_contiguous_execution_blocks get_contiguous_execution_blocks proc~execute~3->proc~get_contiguous_execution_blocks proc~int_to_str_int32 int_to_str_int32 interface~int_to_str->proc~int_to_str_int32 proc~int_to_str_int64 int_to_str_int64 interface~int_to_str->proc~int_to_str_int64 proc~int_to_str_int8 int_to_str_int8 interface~int_to_str->proc~int_to_str_int8 interface~cudageterrorstring_c cudaGetErrorString_c proc~cudageterrorstring->interface~cudageterrorstring_c proc~string_c2f string_c2f proc~cudageterrorstring->proc~string_c2f interface~run_cuda_kernel run_cuda_kernel proc~culaunchkernel->interface~run_cuda_kernel

Called by

proc~~execute~8~~CalledByGraph proc~execute~8 transpose_handle_cuda%execute proc~execute_cuda transpose_plan_cuda%execute_cuda proc~execute_cuda->proc~execute~8 proc~run_autotune_backend run_autotune_backend proc~run_autotune_backend->proc~execute~8 proc~autotune_grid autotune_grid proc~autotune_grid->proc~run_autotune_backend proc~create_cuda transpose_plan_cuda%create_cuda proc~create_cuda->proc~run_autotune_backend proc~autotune_grid_decomposition autotune_grid_decomposition proc~create_cuda->proc~autotune_grid_decomposition proc~autotune_grid_decomposition->proc~autotune_grid