generate_candidates Subroutine

public subroutine generate_candidates(dims, tile_dim, other_dim, base_storage, props, candidates, num_candidates)

Generate kernel configuration candidates for given problem

Arguments

Type IntentOptional Attributes Name
integer(kind=int32), intent(in) :: dims(:)

Local dimensions of the input data, always 3D

integer(kind=int32), intent(in) :: tile_dim

Tile dimension

integer(kind=int32), intent(in) :: other_dim

Other dimension (not tiled)

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

type(device_props), intent(in) :: props

GPU architecture properties

type(kernel_config), intent(out) :: candidates(:)

Generated kernel configurations

integer(kind=int32), intent(out) :: num_candidates

Number of generated candidates


Calls

proc~~generate_candidates~~CallsGraph proc~generate_candidates generate_candidates proc~estimate_memory_pressure estimate_memory_pressure proc~generate_candidates->proc~estimate_memory_pressure proc~estimate_optimal_padding estimate_optimal_padding proc~generate_candidates->proc~estimate_optimal_padding proc~find_valid_combination find_valid_combination proc~generate_candidates->proc~find_valid_combination proc~count_bank_conflicts count_bank_conflicts proc~estimate_optimal_padding->proc~count_bank_conflicts

Called by

proc~~generate_candidates~~CalledByGraph proc~generate_candidates generate_candidates proc~get_kernel get_kernel proc~get_kernel->proc~generate_candidates proc~create~4 nvrtc_kernel%create proc~create~4->proc~get_kernel proc~create~9 transpose_handle_cuda%create proc~create~9->proc~create~4