get_contiguous_execution_blocks Subroutine

private subroutine get_contiguous_execution_blocks(size, num_blocks, block_sizes)

Arguments

Type IntentOptional Attributes Name
integer(kind=int32), intent(in) :: size

Total amount of iterations required

type(dim3), intent(out) :: num_blocks

Grid of blocks.

type(dim3), intent(out) :: block_sizes

Thread block.


Called by

proc~~get_contiguous_execution_blocks~~CalledByGraph proc~get_contiguous_execution_blocks get_contiguous_execution_blocks proc~create~2 nvrtc_kernel%create proc~create~2->proc~get_contiguous_execution_blocks proc~execute~3 nvrtc_kernel%execute proc~execute~3->proc~get_contiguous_execution_blocks proc~create~7 transpose_handle_cuda%create proc~create~7->proc~create~2 proc~execute_mpi backend_mpi%execute_mpi proc~execute_mpi->proc~execute~3 proc~execute_nccl backend_nccl%execute_nccl proc~execute_nccl->proc~execute~3 proc~execute~7 abstract_backend%execute proc~execute~7->proc~execute~3 proc~execute~8 transpose_handle_cuda%execute proc~execute~8->proc~execute~3 proc~execute_cuda transpose_plan_cuda%execute_cuda proc~execute_cuda->proc~execute~8 proc~run_autotune_backend run_autotune_backend proc~run_autotune_backend->proc~execute~8 proc~autotune_grid autotune_grid proc~autotune_grid->proc~run_autotune_backend proc~create_cuda transpose_plan_cuda%create_cuda proc~create_cuda->proc~run_autotune_backend proc~autotune_grid_decomposition autotune_grid_decomposition proc~create_cuda->proc~autotune_grid_decomposition proc~autotune_grid_decomposition->proc~autotune_grid