evaluate_analytical_performance Function

public function evaluate_analytical_performance(dims, transpose_type, config, props, base_storage) result(score)

This function evaluates the performance of a kernel configuration based on various architectural and problem-specific parameters.

Arguments

Type IntentOptional Attributes Name
integer(kind=int32), intent(in) :: dims(:)

Problem dimensions

type(dtfft_transpose_t), intent(in) :: transpose_type

Type of transposition to perform

type(kernel_config), intent(in) :: config

Kernel configuration

type(device_props), intent(in) :: props

GPU architecture properties

integer(kind=int64), intent(in) :: base_storage

Number of bytes needed to store single element

Return Value real(kind=real32)

Performance score


Calls

proc~~evaluate_analytical_performance~~CallsGraph proc~evaluate_analytical_performance evaluate_analytical_performance proc~count_bank_conflicts count_bank_conflicts proc~evaluate_analytical_performance->proc~count_bank_conflicts proc~estimate_bank_conflict_ratio estimate_bank_conflict_ratio proc~evaluate_analytical_performance->proc~estimate_bank_conflict_ratio proc~estimate_coalescing estimate_coalescing proc~evaluate_analytical_performance->proc~estimate_coalescing proc~estimate_occupancy estimate_occupancy proc~evaluate_analytical_performance->proc~estimate_occupancy proc~estimate_bank_conflict_ratio->proc~count_bank_conflicts

Called by

proc~~evaluate_analytical_performance~~CalledByGraph proc~evaluate_analytical_performance evaluate_analytical_performance proc~get_kernel get_kernel proc~get_kernel->proc~evaluate_analytical_performance proc~create~4 nvrtc_kernel%create proc~create~4->proc~get_kernel proc~create~9 transpose_handle_cuda%create proc~create~9->proc~create~4