diff --git a/src/caliper/ConfigManager.cpp b/src/caliper/ConfigManager.cpp index 2b3f20af..bf3d36f8 100644 --- a/src/caliper/ConfigManager.cpp +++ b/src/caliper/ConfigManager.cpp @@ -37,6 +37,7 @@ extern const char* builtin_umpire_option_specs; extern const char* builtin_kokkos_option_specs; extern const char* builtin_papi_hsw_option_specs; +extern const char* builtin_papi_skl_option_specs; extern const char* builtin_papi_spr_option_specs; extern void add_submodule_controllers_and_services(); @@ -1380,7 +1381,7 @@ struct ConfigManager::ConfigManagerImpl { ConfigManagerImpl() : builtin_option_specs_list({ #ifdef CALIPER_HAVE_GOTCHA - builtin_gotcha_option_specs, + builtin_gotcha_option_specs, #endif #ifdef CALIPER_HAVE_MPI builtin_mpi_option_specs, @@ -1406,13 +1407,17 @@ struct ConfigManager::ConfigManagerImpl { #ifdef CALIPER_HAVE_KOKKOS builtin_kokkos_option_specs, #endif - builtin_base_option_specs + builtin_base_option_specs }) { #ifdef CALIPER_HAVE_PAPI #ifdef CALIPER_HAVE_ARCH - if (std::string(CALIPER_HAVE_ARCH) == "sapphirerapids") { + std::string cali_arch = CALIPER_HAVE_ARCH; + Log(2).stream() << "ConfigManager: detected architecture " << cali_arch << std::endl; + if (cali_arch == "sapphirerapids") { builtin_option_specs_list.push_back(builtin_papi_spr_option_specs); + } else if (cali_arch == "skylake" || cali_arch == "skylake_avx512" || cali_arch == "cascadelake") { + builtin_option_specs_list.push_back(builtin_papi_skl_option_specs); } else { builtin_option_specs_list.push_back(builtin_papi_hsw_option_specs); } diff --git a/src/caliper/controllers/controllers.cpp b/src/caliper/controllers/controllers.cpp index 6bd4eeea..0a8a1312 100644 --- a/src/caliper/controllers/controllers.cpp +++ b/src/caliper/controllers/controllers.cpp @@ -1189,6 +1189,81 @@ const char* builtin_papi_hsw_option_specs = R"json( ] )json"; +const char* builtin_papi_skl_option_specs = R"json( +[ +{ + "name" : "topdown.toplevel", + "description" : "Top-down analysis for Intel CPUs (top level)", + "type" : "bool", + "category" : "metric", + "services" : [ "topdown" ], + "config" : { "CALI_TOPDOWN_LEVEL": "top" }, + "query" : + [ + { "level": "local", "select": + [ + "any(topdown.retiring) as \"Retiring\"", + "any(topdown.backend_bound) as \"Backend bound\"", + "any(topdown.frontend_bound) as \"Frontend bound\"", + "any(topdown.bad_speculation) as \"Bad speculation\"" + ] + }, + { "level": "cross", "select": + [ + "any(any#topdown.retiring) as \"Retiring\"", + "any(any#topdown.backend_bound) as \"Backend bound\"", + "any(any#topdown.frontend_bound) as \"Frontend bound\"", + "any(any#topdown.bad_speculation) as \"Bad speculation\"" + ] + } + ] +}, +{ + "name" : "topdown.all", + "description" : "Top-down analysis for Intel CPUs (all levels)", + "type" : "bool", + "category" : "metric", + "services" : [ "topdown" ], + "config" : { "CALI_TOPDOWN_LEVEL": "all" }, + "query" : + [ + { "level": "local", "select": + [ + "any(topdown.retiring) as \"Retiring\"", + "any(topdown.light_operations) as \"Light operations\"", + "any(topdown.heavy_operations) as \"Heavy operations\"", + "any(topdown.backend_bound) as \"Backend bound\"", + "any(topdown.memory_bound) as \"Memory bound\"", + "any(topdown.core_bound) as \"Core bound\"", + "any(topdown.frontend_bound) as \"Frontend bound\"", + "any(topdown.fetch_latency) as \"Fetch latency\"", + "any(topdown.fetch_bandwidth) as \"Fetch bandwidth\"", + "any(topdown.bad_speculation) as \"Bad speculation\"", + "any(topdown.branch_mispredicts) as \"Branch mispredicts\"", + "any(topdown.machine_clears) as \"Machine clears\"" + ] + }, + { "level": "cross", "select": + [ + "any(any#topdown.retiring) as \"Retiring\"", + "any(any#topdown.light_operations) as \"Light operations\"", + "any(any#topdown.heavy_operations) as \"Heavy operations\"", + "any(any#topdown.backend_bound) as \"Backend bound\"", + "any(any#topdown.memory_bound) as \"Memory bound\"", + "any(any#topdown.core_bound) as \"Core bound\"", + "any(any#topdown.frontend_bound) as \"Frontend bound\"", + "any(any#topdown.fetch_latency) as \"Fetch latency\"", + "any(any#topdown.fetch_bandwidth) as \"Fetch bandwidth\"", + "any(any#topdown.bad_speculation) as \"Bad speculation\"", + "any(any#topdown.branch_mispredicts) as \"Branch mispredicts\"", + "any(any#topdown.machine_clears) as \"Machine clears\"" + ] + } + ] +} +] +)json"; + #ifdef CALIPER_WITH_PAPI_RDPMC const char* builtin_papi_spr_option_specs = R"json( [ @@ -1261,60 +1336,6 @@ const char* builtin_papi_spr_option_specs = R"json( ] } ] - }, - { - "name" : "topdown-counters.toplevel", - "description" : "Raw counter values for Intel top-down analysis (top level)", - "type" : "bool", - "category" : "metric", - "services" : [ "papi" ], - "config" : - { - "CALI_PAPI_COUNTERS": - "perf::slots,perf::topdown-retiring" - }, - "query" : - [ - { "level": "local", "select": - [ - "inclusive_sum(sum#papi.slots) as slots", - "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring" - ] - }, - { "level": "cross", "select": - [ - "sum(inclusive#sum#papi.slots) as slots", - "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring" - ] - } - ] - }, - { - "name" : "topdown-counters.all", - "description" : "Raw counter values for Intel top-down analysis (all levels)", - "type" : "bool", - "category" : "metric", - "services" : [ "papi" ], - "config" : - { - "CALI_PAPI_COUNTERS": - "perf::slots,perf::topdown-retiring" - }, - "query" : - [ - { "level": "local", "select": - [ - "inclusive_sum(sum#papi.slots) as slots", - "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring" - ] - }, - { "level": "cross", "select": - [ - "sum(inclusive#sum#papi.slots) as slots", - "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring" - ] - } - ] } ] )json"; @@ -1390,84 +1411,6 @@ const char* builtin_papi_spr_option_specs = R"json( ] } ] - }, - { - "name" : "topdown-counters.toplevel", - "description" : "Raw counter values for Intel top-down analysis (top level)", - "type" : "bool", - "category" : "metric", - "services" : [ "papi" ], - "config" : - { - "CALI_PAPI_COUNTERS": - "perf::slots,perf::topdown-retiring,perf::topdown-bad-spec,perf::topdown-fe-bound,perf::topdown-be-bound,INT_MISC:UOP_DROPPING" - }, - "query" : - [ - { "level": "local", "select": - [ - "inclusive_sum(sum#papi.perf::slots) as slots", - "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring", - "inclusive_sum(sum#papi.perf::topdown-bad-spec) as topdown_bad_spec", - "inclusive_sum(sum#papi.perf::topdown-fe-bound) as topdown_fe_bound", - "inclusive_sum(sum#papi.perf::topdown-be-bound) as topdown_be_bound", - "inclusive_sum(sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping" - ] - }, - { "level": "cross", "select": - [ - "sum(inclusive#sum#papi.perf::slots) as slots", - "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring", - "sum(inclusive#sum#papi.perf::topdown-bad-spec) as topdown_bad_spec", - "sum(inclusive#sum#papi.perf::topdown-fe-bound) as topdown_fe_bound", - "sum(inclusive#sum#papi.perf::topdown-be-bound) as topdown_be_bound", - "sum(inclusive#sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping" - ] - } - ] - }, - { - "name" : "topdown-counters.all", - "description" : "Raw counter values for Intel top-down analysis (all levels)", - "type" : "bool", - "category" : "metric", - "services" : [ "papi" ], - "config" : - { - "CALI_PAPI_COUNTERS": - "perf::slots,perf::topdown-retiring,perf::topdown-bad-spec,perf::topdown-fe-bound,perf::topdown-be-bound,INT_MISC:UOP_DROPPING,perf_raw::r8400,perf_raw::r8500,perf_raw::r8600,perf_raw::r8700" - }, - "query" : - [ - { "level": "local", "select": - [ - "inclusive_sum(sum#papi.perf::slots) as slots", - "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring", - "inclusive_sum(sum#papi.perf::topdown-bad-spec) as topdown_bad_spec", - "inclusive_sum(sum#papi.perf::topdown-fe-bound) as topdown_fe_bound", - "inclusive_sum(sum#papi.perf::topdown-be-bound) as topdown_be_bound", - "inclusive_sum(sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping", - "inclusive_sum(sum#papi.perf_raw::r8400) as topdown_heavy_ops", - "inclusive_sum(sum#papi.perf_raw::r8500) as topdown_br_mispredict", - "inclusive_sum(sum#papi.perf_raw::r8600) as topdown_fetch_lat", - "inclusive_sum(sum#papi.perf_raw::r8700) as topdown_mem_bound" - ] - }, - { "level": "cross", "select": - [ - "sum(inclusive#sum#papi.perf::slots) as slots", - "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring", - "sum(inclusive#sum#papi.perf::topdown-bad-spec) as topdown_bad_spec", - "sum(inclusive#sum#papi.perf::topdown-fe-bound) as topdown_fe_bound", - "sum(inclusive#sum#papi.perf::topdown-be-bound) as topdown_be_bound", - "sum(inclusive#sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping", - "sum(inclusive#sum#papi.perf_raw::r8400) as topdown_heavy_ops", - "sum(inclusive#sum#papi.perf_raw::r8500) as topdown_br_mispredict", - "sum(inclusive#sum#papi.perf_raw::r8600) as topdown_fetch_lat", - "sum(inclusive#sum#papi.perf_raw::r8700) as topdown_mem_bound" - ] - } - ] } ] )json"; diff --git a/src/services/topdown/CMakeLists.txt b/src/services/topdown/CMakeLists.txt index a5e59717..c25e4663 100644 --- a/src/services/topdown/CMakeLists.txt +++ b/src/services/topdown/CMakeLists.txt @@ -1,7 +1,8 @@ set(CALIPER_TOPDOWN_SOURCES IntelTopdown.cpp TopdownCalculator.cpp - HaswellTopdown.cpp) + HaswellTopdown.cpp + SkylakeTopdown.cpp) if (WITH_PAPI_RDPMC) diff --git a/src/services/topdown/IntelTopdown.cpp b/src/services/topdown/IntelTopdown.cpp index a9d28198..6c912969 100644 --- a/src/services/topdown/IntelTopdown.cpp +++ b/src/services/topdown/IntelTopdown.cpp @@ -10,6 +10,7 @@ #include "HaswellTopdown.h" #include "SapphireRapidsTopdown.h" +#include "SkylakeTopdown.h" #include "caliper/SnapshotRecord.h" @@ -41,18 +42,12 @@ class IntelTopdown unsigned num_ret_computed; unsigned num_ret_skipped; - topdown::IntelTopdownLevel m_level; + topdown::IntelTopdownLevel m_level; std::shared_ptr m_calculator; - bool find_counter_attrs(CaliperMetadataAccessInterface& db) - { - return m_calculator->find_counter_attrs(db); - } + bool find_counter_attrs(CaliperMetadataAccessInterface& db) { return m_calculator->find_counter_attrs(db); } - void make_result_attrs(CaliperMetadataAccessInterface& db) - { - m_calculator->make_result_attrs(db); - } + void make_result_attrs(CaliperMetadataAccessInterface& db) { m_calculator->make_result_attrs(db); } void postprocess_snapshot_cb(std::vector& rec) { @@ -139,12 +134,9 @@ class IntelTopdown num_bsp_skipped(0), m_level(calculator->get_level()), m_calculator(calculator) - { - } + {} - ~IntelTopdown() - { - } + ~IntelTopdown() {} public: @@ -167,11 +159,15 @@ class IntelTopdown std::shared_ptr calculator; #if defined(CALIPER_HAVE_ARCH) - if (std::string(CALIPER_HAVE_ARCH) == "sapphirerapids") { + std::string cali_arch = CALIPER_HAVE_ARCH; + if (cali_arch == "sapphirerapids") { calculator = std::shared_ptr(new topdown::SapphireRapidsTopdown(level)); + } else if (cali_arch == "skylake" || cali_arch == "skylake_avx512" || cali_arch == "cascadelake") { + calculator = std::shared_ptr(new topdown::SkylakeTopdown(level)); } else { #endif - calculator = std::shared_ptr(new topdown::HaswellTopdown(level)); // Default type of calculation + calculator = std::shared_ptr(new topdown::HaswellTopdown(level) + ); // Default type of calculation #if defined(CALIPER_HAVE_ARCH) } #endif diff --git a/src/services/topdown/SkylakeTopdown.cpp b/src/services/topdown/SkylakeTopdown.cpp new file mode 100644 index 00000000..d284ffd7 --- /dev/null +++ b/src/services/topdown/SkylakeTopdown.cpp @@ -0,0 +1,306 @@ +#include "SkylakeTopdown.h" + +#include "../Services.h" + +#include "caliper/common/Log.h" +#include "caliper/common/Variant.h" + +namespace cali +{ +namespace topdown +{ + +SkylakeTopdown::SkylakeTopdown(IntelTopdownLevel level) + : cali::topdown::TopdownCalculator( + level, + // top_counters + "IDQ_UOPS_NOT_DELIVERED:CORE" + ",UOPS_ISSUED:ANY" + ",UOPS_RETIRED:RETIRE_SLOTS" + ",INT_MISC:RECOVERY_CYCLES" + ",CPU_CLK_UNHALTED:THREAD_P", + // all_counters + "IDQ_UOPS_NOT_DELIVERED:CORE" + ",UOPS_ISSUED:ANY" + ",UOPS_RETIRED:RETIRE_SLOTS" + ",INT_MISC:RECOVERY_CYCLES" + ",CPU_CLK_UNHALTED:THREAD_P" + ",IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE" + ",BR_MISP_RETIRED:ALL_BRANCHES" + ",MACHINE_CLEARS:COUNT" + ",CYCLE_ACTIVITY:STALLS_MEM_ANY" + ",EXE_ACTIVITY:BOUND_ON_STORES" + ",CYCLE_ACTIVITY:STALLS_TOTAL" + ",EXE_ACTIVITY:1_PORTS_UTIL" + ",EXE_ACTIVITY:2_PORTS_UTIL", + // Note: PAPI doesn't seem to have UOPS_RETIRED.MACRO_FUSED, + // so we can't currently calculate L2 metrics under retiring. + // The commented counters below are unique to these metrics. + // ",UOPS_RETIRED:MACRO_FUSED" + // ",INST_RETIRED:ANY_P" + // res_top + { "retiring", "backend_bound", "frontend_bound", "bad_speculation" }, + // res_all + { "retiring", + // "light_operations", + // "heavy_operations", + "backend_bound", + "memory_bound", + "core_bound", + "frontend_bound", + "fetch_latency", + "fetch_bandwidth", + "bad_speculation", + "branch_mispredicts", + "machine_clears" } + ) +{} + +bool SkylakeTopdown::setup_config(Caliper& c, Channel& channel) const +{ + channel.config().set("CALI_PAPI_COUNTERS", m_level == All ? m_all_counters : m_top_counters); + channel.config().set("CALI_PAPI_ENABLE_MULTIPLEXING", "true"); + if (!cali::services::register_service(&c, &channel, "papi")) { + Log(0).stream() << channel.name() << ": topdown: Unable to register papi service, skipping topdown" + << std::endl; + return false; + } + return true; +} + +std::vector SkylakeTopdown::compute_toplevel(const std::vector& rec) +{ + std::vector ret; + + Variant v_idq_uops_not_delivered_core = get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CORE"); + Variant v_uops_issued_any = get_val_from_rec(rec, "UOPS_ISSUED:ANY"); + Variant v_uops_retired_retire_slots = get_val_from_rec(rec, "UOPS_RETIRED:RETIRE_SLOTS"); + Variant v_int_misc_recovery_cycles = get_val_from_rec(rec, "INT_MISC:RECOVERY_CYCLES"); + Variant v_cpu_clk_unhalted_thread = get_val_from_rec(rec, "CPU_CLK_UNHALTED:THREAD_P"); + + bool is_incomplete = v_idq_uops_not_delivered_core.empty() || v_uops_issued_any.empty() + || v_uops_retired_retire_slots.empty() || v_int_misc_recovery_cycles.empty() + || v_cpu_clk_unhalted_thread.empty(); + bool is_nonzero = v_idq_uops_not_delivered_core.to_double() > 0.0 && v_uops_issued_any.to_double() > 0.0 + && v_uops_retired_retire_slots.to_double() > 0.0 && v_int_misc_recovery_cycles.to_double() > 0.0 + && v_idq_uops_not_delivered_core.to_double() > 0.0; + + double thread_slots = 4 * v_cpu_clk_unhalted_thread.to_double(); + + if (is_incomplete || is_nonzero || thread_slots < 1.0) { + return ret; + } + + double frontend_bound = std::max(v_idq_uops_not_delivered_core.to_double() / thread_slots, 0.0); + double bad_speculation = std::max( + (v_uops_issued_any.to_double() - v_uops_retired_retire_slots.to_double() + + 4 * v_int_misc_recovery_cycles.to_double()) + / thread_slots, + 0.0 + ); + double backend_bound = std::max( + 1 - frontend_bound + - (v_uops_issued_any.to_double() + 4 * v_int_misc_recovery_cycles.to_double()) / thread_slots, + 0.0 + ); + double retiring = std::max(v_uops_retired_retire_slots.to_double() / thread_slots, 0.0); + + ret.reserve(4); + ret.push_back(Entry(m_result_attrs["retiring"], Variant(retiring))); + ret.push_back(Entry(m_result_attrs["backend_bound"], Variant(backend_bound))); + ret.push_back(Entry(m_result_attrs["frontend_bound"], Variant(frontend_bound))); + ret.push_back(Entry(m_result_attrs["bad_speculation"], Variant(bad_speculation))); + + return ret; +} + +std::size_t SkylakeTopdown::get_num_expected_toplevel() const +{ + return 4; +} + +std::vector SkylakeTopdown::compute_retiring(const std::vector& rec) +{ + std::vector ret; + + // TODO uncomment when we can figure out the raw counter corresponding to + // UOPS_RETIRED:MACRO_FUSED + + // Variant v_uops_retired_retire_slots = get_val_from_rec(rec, "UOPS_RETIRED:RETIRE_SLOTS"); + // Variant v_uops_retired_macro_fused = get_val_from_rec(rec, "UOPS_RETIRED:MACRO_FUSED"); + // Variant v_inst_retired_any = get_val_from_rec(rec, "INST_RETIRED:ANY_P"); + // Variant v_cpu_clk_unhalted_thread = get_val_from_rec(rec, "CPU_CLK_UNHALTED:THREAD_P"); + + // bool is_incomplete = v_uops_retired_retire_slots.empty() || v_uops_retired_macro_fused.empty() + // || v_inst_retired_any.empty() || v_cpu_clk_unhalted_thread.empty(); + + // double thread_slots = 4 * v_cpu_clk_unhalted_thread.to_double(); + + // if (is_incomplete || !(thread_slots > 1.0)) { + // return ret; + // } + + // double retiring = std::max(v_uops_retired_retire_slots.to_double() / thread_slots, 0.0); + + // double heavy_operations = std::max( + // (v_uops_retired_retire_slots.to_double() + v_uops_retired_macro_fused.to_double() + // - v_inst_retired_any.to_double()) + // / thread_slots, + // 0.0 + // ); + + // ret.reserve(2); + // ret.push_back(Entry(m_result_attrs["heavy_operations"], Variant(heavy_operations))); + // ret.push_back(Entry(m_result_attrs["light_operations"], Variant(std::max(retiring - heavy_operations, 0.0)))); + + return ret; +} + +std::size_t SkylakeTopdown::get_num_expected_retiring() const +{ + return 0; +} + +std::vector SkylakeTopdown::compute_backend_bound(const std::vector& rec) +{ + std::vector ret; + + Variant v_cycle_activity_stalls_mem_any = get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_MEM_ANY"); + Variant v_exe_activity_bound_on_stores = get_val_from_rec(rec, "EXE_ACTIVITY:BOUND_ON_STORES"); + Variant v_cycle_activity_stalls_total = get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_TOTAL"); + Variant v_exe_activity_1_ports_util = get_val_from_rec(rec, "EXE_ACTIVITY:1_PORTS_UTIL"); + Variant v_exe_activity_2_ports_util = get_val_from_rec(rec, "EXE_ACTIVITY:2_PORTS_UTIL"); + + Variant v_idq_uops_not_delivered_core = get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CORE"); + Variant v_uops_issued_any = get_val_from_rec(rec, "UOPS_ISSUED:ANY"); + Variant v_uops_retired_retire_slots = get_val_from_rec(rec, "UOPS_RETIRED:RETIRE_SLOTS"); + Variant v_int_misc_recovery_cycles = get_val_from_rec(rec, "INT_MISC:RECOVERY_CYCLES"); + Variant v_cpu_clk_unhalted_thread = get_val_from_rec(rec, "CPU_CLK_UNHALTED:THREAD_P"); + + bool is_incomplete = v_idq_uops_not_delivered_core.empty() || v_uops_issued_any.empty() + || v_uops_retired_retire_slots.empty() || v_int_misc_recovery_cycles.empty() + || v_cpu_clk_unhalted_thread.empty() || v_cycle_activity_stalls_mem_any.empty() + || v_exe_activity_bound_on_stores.empty() || v_cycle_activity_stalls_total.empty() + || v_exe_activity_1_ports_util.empty() || v_exe_activity_2_ports_util.empty(); + + double thread_slots = 4 * v_cpu_clk_unhalted_thread.to_double(); + + if (is_incomplete || !(thread_slots > 1.0)) { + return ret; + } + + double frontend_bound = std::max(v_idq_uops_not_delivered_core.to_double() / thread_slots, 0.0); + double backend_bound = std::max( + 1 - frontend_bound + - (v_uops_issued_any.to_double() + 4 * v_int_misc_recovery_cycles.to_double()) / thread_slots, + 0.0 + ); + double retiring = std::max(v_uops_retired_retire_slots.to_double() / thread_slots, 0.0); + + double memory_bound = std::max( + ((v_cycle_activity_stalls_mem_any.to_double() + v_exe_activity_bound_on_stores.to_double()) + / (v_cycle_activity_stalls_total.to_double() + + (v_exe_activity_1_ports_util.to_double() + retiring * v_exe_activity_2_ports_util.to_double()) + + v_exe_activity_bound_on_stores.to_double())) + * backend_bound, + 0.0 + ); + + ret.reserve(2); + + ret.push_back(Entry(m_result_attrs["memory_bound"], Variant(memory_bound))); + ret.push_back(Entry(m_result_attrs["core_bound"], Variant(std::max(backend_bound - memory_bound, 0.0)))); + + return ret; +} + +std::size_t SkylakeTopdown::get_num_expected_backend_bound() const +{ + return 2; +} + +std::vector SkylakeTopdown::compute_frontend_bound(const std::vector& rec) +{ + std::vector ret; + + Variant v_idq_uops_not_delivered_cycles_0_uops_deliv_core = + get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE"); + Variant v_idq_uops_not_delivered_core = get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CORE"); + Variant v_cpu_clk_unhalted_thread = get_val_from_rec(rec, "CPU_CLK_UNHALTED:THREAD_P"); + + bool is_incomplete = v_idq_uops_not_delivered_cycles_0_uops_deliv_core.empty() + || v_idq_uops_not_delivered_core.empty() || v_cpu_clk_unhalted_thread.empty(); + + double thread_slots = 4 * v_cpu_clk_unhalted_thread.to_double(); + + if (is_incomplete || !(thread_slots > 1.0)) { + return ret; + } + + double frontend_bound = std::max(v_idq_uops_not_delivered_core.to_double() / thread_slots, 0.0); + double fetch_latency = + std::max(4 * v_idq_uops_not_delivered_cycles_0_uops_deliv_core.to_double() / thread_slots, 0.0); + + ret.reserve(2); + + ret.push_back(Entry(m_result_attrs["fetch_latency"], Variant(fetch_latency))); + ret.push_back(Entry(m_result_attrs["fetch_bandwidth"], Variant(std::max(frontend_bound - fetch_latency, 0.0)))); + + return ret; +} + +std::size_t SkylakeTopdown::get_num_expected_frontend_bound() const +{ + return 2; +} + +std::vector SkylakeTopdown::compute_bad_speculation(const std::vector& rec) +{ + std::vector ret; + + Variant v_br_misp_retired_all_branches = get_val_from_rec(rec, "BR_MISP_RETIRED:ALL_BRANCHES"); + Variant v_machine_clears_count = get_val_from_rec(rec, "MACHINE_CLEARS:COUNT"); + Variant v_uops_issued_any = get_val_from_rec(rec, "UOPS_ISSUED:ANY"); + Variant v_uops_retired_retire_slots = get_val_from_rec(rec, "UOPS_RETIRED:RETIRE_SLOTS"); + Variant v_int_misc_recovery_cycles = get_val_from_rec(rec, "INT_MISC:RECOVERY_CYCLES"); + Variant v_cpu_clk_unhalted_thread = get_val_from_rec(rec, "CPU_CLK_UNHALTED:THREAD_P"); + + bool is_incomplete = v_br_misp_retired_all_branches.empty() || v_machine_clears_count.empty() + || v_uops_issued_any.empty() || v_uops_retired_retire_slots.empty() + || v_int_misc_recovery_cycles.empty() || v_cpu_clk_unhalted_thread.empty(); + + double thread_slots = 4 * v_cpu_clk_unhalted_thread.to_double(); + + if (is_incomplete || !(thread_slots > 1.0)) { + return ret; + } + + double bad_speculation = std::max( + (v_uops_issued_any.to_double() - v_uops_retired_retire_slots.to_double() + + 4 * v_int_misc_recovery_cycles.to_double()) + / thread_slots, + 0.0 + ); + double branch_mispredicts = std::max( + (v_br_misp_retired_all_branches.to_double() + / (v_br_misp_retired_all_branches.to_double() + v_machine_clears_count.to_double())) + * bad_speculation, + 0.0 + ); + + ret.reserve(2); + + ret.push_back(Entry(m_result_attrs["branch_mispredicts"], Variant(branch_mispredicts))); + ret.push_back(Entry(m_result_attrs["machine_clears"], Variant(std::max(bad_speculation - branch_mispredicts, 0.0))) + ); + + return ret; +} + +std::size_t SkylakeTopdown::get_num_expected_bad_speculation() const +{ + return 2; +} + +} // namespace topdown +} // namespace cali \ No newline at end of file diff --git a/src/services/topdown/SkylakeTopdown.h b/src/services/topdown/SkylakeTopdown.h new file mode 100644 index 00000000..b16db579 --- /dev/null +++ b/src/services/topdown/SkylakeTopdown.h @@ -0,0 +1,41 @@ +#ifndef CALI_TOPDOWN_SKYLAKE_TOPDOWN_H +#define CALI_TOPDOWN_SKYLAKE_TOPDOWN_H + +#include "TopdownCalculator.h" + +namespace cali +{ +namespace topdown +{ + +// Topdown calculations for: +// * Skylake +// * Skylake-X +// * Cascade Lake +// * Cascade Lake X +class SkylakeTopdown : public TopdownCalculator +{ +public: + + SkylakeTopdown(IntelTopdownLevel level); + + virtual ~SkylakeTopdown() = default; + + virtual bool setup_config(Caliper& c, Channel& channel) const override; + + virtual std::vector compute_toplevel(const std::vector& rec) override; + virtual std::size_t get_num_expected_toplevel() const override; + virtual std::vector compute_retiring(const std::vector& rec) override; + virtual std::size_t get_num_expected_retiring() const override; + virtual std::vector compute_backend_bound(const std::vector& rec) override; + virtual std::size_t get_num_expected_backend_bound() const override; + virtual std::vector compute_frontend_bound(const std::vector& rec) override; + virtual std::size_t get_num_expected_frontend_bound() const override; + virtual std::vector compute_bad_speculation(const std::vector& rec) override; + virtual std::size_t get_num_expected_bad_speculation() const override; +}; + +} // namespace topdown +} // namespace cali + +#endif /* CALI_TOPDOWN_SKYLAKE_TOPDOWN_H */ \ No newline at end of file