Skip to content

Commit

Permalink
Merge pull request #250 from intel/metric_corrections_tma_5.01
Browse files Browse the repository at this point in the history
Metric corrections tma 5.01
  • Loading branch information
1perrytaylor authored Dec 2, 2024
2 parents d8859e3 + 60ac84b commit 9051dd9
Show file tree
Hide file tree
Showing 38 changed files with 2,129 additions and 965 deletions.
36 changes: 18 additions & 18 deletions ADL/metrics/alderlake_metrics_goldencove_core.json

Large diffs are not rendered by default.

22 changes: 11 additions & 11 deletions ADL/metrics/perf/alderlake_metrics_goldencove_core_perf.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
"MetricExpr": "100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_big_code",
"MetricExpr": "100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend;TopdownL1;tma_L1_group;Default;Scaled_Slots",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20",
Expand Down Expand Up @@ -89,7 +89,7 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
"MetricExpr": "100 - ( tma_big_code + tma_instruction_fetch_bw + tma_mispredictions + tma_cache_memory_bandwidth + tma_cache_memory_latency + tma_memory_data_tlbs + tma_memory_synchronization + tma_compute_bound_est + tma_irregular_overhead + tma_branching_overhead + tma_useful_work )",
"MetricExpr": "100 - ( tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work )",
"MetricGroup": "BvOB;Cor;Offcore;TopdownL1;tma_L1_group;Default;Scaled_Slots",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
Expand Down Expand Up @@ -1099,7 +1099,7 @@
"MetricExpr": "100 * ( 1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1 ) if tma_info_system_smt_2t_utilization > 0.5 else 0",
"MetricGroup": "Cor;SMT;Metric",
"MetricName": "tma_info_botlnk_l0_core_bound_likely",
"MetricThreshold": "tma_info_botlnk_core_bound_likely > 0.5"
"MetricThreshold": "tma_info_botlnk_l0_core_bound_likely > 0.5"
},
{
"BriefDescription": "Instructions Per Cycle (per Logical Processor)",
Expand Down Expand Up @@ -1142,7 +1142,7 @@
},
{
"BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor",
"MetricExpr": "tma_info_thread_slots / ( TOPDOWN.SLOTS / 2 ) if #SMT_on else 1",
"MetricExpr": "tma_info_thread_slots / ( slots / 2 ) if #SMT_on else 1",
"MetricGroup": "SMT;TmaL1;TopdownL1;tma_L1_group;Metric",
"MetricName": "tma_info_thread_slots_utilization"
},
Expand Down Expand Up @@ -1361,7 +1361,7 @@
"MetricExpr": "IDQ.DSB_UOPS / ( UOPS_ISSUED.ANY )",
"MetricGroup": "DSB;Fed;FetchBW;Metric;tma_issueFB",
"MetricName": "tma_info_frontend_dsb_coverage",
"MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & ipc / 6 > 0.35",
"MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 6 > 0.35",
"PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_inst_mix_iptb, tma_lcp."
},
{
Expand Down Expand Up @@ -1419,23 +1419,23 @@
"MetricExpr": "100 * ( tma_fetch_latency * tma_dsb_switches / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_mite / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) )",
"MetricGroup": "DSBmiss;Fed;Scaled_Slots;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_misses",
"MetricThreshold": "tma_info_botlnk_dsb_misses > 10",
"MetricThreshold": "tma_info_botlnk_l2_dsb_misses > 10",
"PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp."
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) hits - subset of the Instruction_Fetch_BW Bottleneck",
"MetricExpr": "100 * ( tma_frontend_bound * ( tma_fetch_bandwidth / ( tma_fetch_latency + tma_fetch_bandwidth ) ) * ( tma_dsb / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) )",
"MetricGroup": "DSB;Fed;FetchBW;Scaled_Slots;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_bandwidth",
"MetricThreshold": "tma_info_botlnk_dsb_bandwidth > 10",
"MetricThreshold": "tma_info_botlnk_l2_dsb_bandwidth > 10",
"PublicDescription": "Total pipeline cost of DSB (uop cache) hits - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp."
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
"MetricExpr": "100 * ( tma_fetch_latency * tma_icache_misses / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) )",
"MetricGroup": "Fed;FetchLat;IcMiss;Scaled_Slots;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
"MetricThreshold": "tma_info_botlnk_ic_misses > 5",
"MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
"PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck."
},
{
Expand Down Expand Up @@ -1475,7 +1475,7 @@
},
{
"BriefDescription": "Branch Misprediction Cost: Cycles representing fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
"MetricExpr": "tma_mispredictions * tma_info_thread_slots / ( 6 ) / BR_MISP_RETIRED.ALL_BRANCHES / 100",
"MetricExpr": "tma_bottleneck_mispredictions * tma_info_thread_slots / ( 6 ) / BR_MISP_RETIRED.ALL_BRANCHES / 100",
"MetricGroup": "Bad;BrMispredicts;Core_Metric;tma_issueBM",
"MetricName": "tma_info_bad_spec_branch_misprediction_cost",
"PublicDescription": "Branch Misprediction Cost: Cycles representing fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_bottleneck_mispredictions, tma_branch_mispredicts, tma_mispredicts_resteers."
Expand Down Expand Up @@ -1618,7 +1618,7 @@
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING ) / ( 4 * tma_info_core_core_clks )",
"MetricGroup": "Mem;MemoryTLB;Core_Metric",
"MetricName": "tma_info_memory_tlb_page_walks_utilization",
"MetricThreshold": "tma_info_memory_page_walks_utilization > 0.5"
"MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
"BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
Expand Down Expand Up @@ -1667,7 +1667,7 @@
"MetricExpr": "L2_LINES_OUT.USELESS_HWPF / ( L2_LINES_OUT.SILENT + L2_LINES_OUT.NON_SILENT )",
"MetricGroup": "Prefetches;Metric",
"MetricName": "tma_info_memory_prefetches_useless_hwpf",
"MetricThreshold": "tma_info_memory_useless_hwpf > 0.15"
"MetricThreshold": "tma_info_memory_prefetches_useless_hwpf > 0.15"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
Expand Down
38 changes: 19 additions & 19 deletions ARL/metrics/arrowlake_metrics_lioncove_core.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"Header": {
"Copyright": "Copyright (c) 2001 - 2024 Intel Corporation. All rights reserved.",
"Info": "Performance Monitoring Metrics for Intel(R) Core(TM) processors based on Arrow Lake performance hybrid architecture0",
"DatePublished": "11/15/2024",
"DatePublished": "12/02/2024",
"Version": "1.0",
"Legend": "",
"TmaVersion": "5.01",
Expand Down Expand Up @@ -319,7 +319,7 @@
],
"Constants": [],
"Formula": "100 * ( ( a / ( a + b + c + d ) ) - ( 1 - ( 10 * ( e / ( f ) ) * ( max( ( g / ( a + b + c + d ) ) * ( 1 - h / ( i - j ) ) , 0.0001 ) ) / ( g / ( a + b + c + d ) ) ) ) * ( k / ( a + b + c + d ) ) * ( ( ( g / ( a + b + c + d ) ) / ( b / ( a + b + c + d ) ) ) * l / ( m ) ) / ( ( n / ( m ) ) + ( o / ( m ) ) + ( l / ( m ) + ( p / ( m ) ) ) + ( ( 3 ) * q / ( m ) ) + ( r / ( m ) ) + ( s / ( m ) ) ) - ( ( 1 - t / u ) * ( ( k / ( a + b + c + d ) ) * ( ( ( 3 ) * q / ( m ) ) + ( l / ( m ) + ( p / ( m ) ) ) * ( ( ( 1 - ( ( g / ( a + b + c + d ) ) / ( b / ( a + b + c + d ) ) ) ) * l / ( m ) ) + ( ( ( g / ( a + b + c + d ) ) / ( b / ( a + b + c + d ) ) ) * l / ( m ) ) * ( max( ( g / ( a + b + c + d ) ) * ( 1 - h / ( i - j ) ) , 0.0001 ) ) / ( g / ( a + b + c + d ) ) ) / ( ( ( ( g / ( a + b + c + d ) ) / ( b / ( a + b + c + d ) ) ) * l / ( m ) ) + ( ( 1 - ( ( g / ( a + b + c + d ) ) / ( b / ( a + b + c + d ) ) ) ) * l / ( m ) ) + ( p / ( m ) ) ) ) / ( ( n / ( m ) ) + ( o / ( m ) ) + ( l / ( m ) + ( p / ( m ) ) ) + ( ( 3 ) * q / ( m ) ) + ( r / ( m ) ) + ( s / ( m ) ) ) + ( max( 0 , ( a / ( a + b + c + d ) ) - ( k / ( a + b + c + d ) ) ) ) * ( v / ( m ) ) / ( ( ( w / ( m ) + x / ( y + x ) * ( z - a_a ) ) / ( m ) ) + ( ( a_b + y / ( y + x ) * ( z - a_a ) ) / ( m ) ) + ( a_c / ( m ) ) + ( v / ( m ) ) ) ) ) ) - ( 100 * ( k / ( a + b + c + d ) ) * ( ( o / ( m ) ) + ( n / ( m ) ) + ( p / ( m ) ) ) / ( ( n / ( m ) ) + ( o / ( m ) ) + ( l / ( m ) + ( p / ( m ) ) ) + ( ( 3 ) * q / ( m ) ) + ( r / ( m ) ) + ( s / ( m ) ) ) )",
"BaseFormula": " 100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - inst_retired.rep_iteration / uops_retired.ms:c1 ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_big_code",
"BaseFormula": " 100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - inst_retired.rep_iteration / uops_retired.ms:c1 ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_bottleneck_big_code",
"Category": "TMA",
"CountDomain": "Scaled_Slots",
"Threshold": {
Expand Down Expand Up @@ -8234,11 +8234,11 @@
},
{
"Alias": "b",
"Value": "IPC"
"Value": "metric_TMA_Info_Thread_IPC"
}
],
"Formula": "a < 0.7 & b / 8 > 0.35",
"BaseFormula": "metric_TMA_Info_Frontend_DSB_Coverage < 0.7 & IPC / 8 > 0.35",
"BaseFormula": "metric_TMA_Info_Frontend_DSB_Coverage < 0.7 & metric_TMA_Info_Thread_IPC / 8 > 0.35",
"ThresholdIssues": "$issueFB"
},
"ResolutionLevels": "THREAD, CORE, SOCKET, SYSTEM",
Expand Down Expand Up @@ -8706,11 +8706,11 @@
"ThresholdMetrics": [
{
"Alias": "a",
"Value": "metric_TMA_Info_Botlnk_DSB_Misses"
"Value": "metric_TMA_Info_Botlnk_L2_DSB_Misses"
}
],
"Formula": "a > 10",
"BaseFormula": "metric_TMA_Info_Botlnk_DSB_Misses > 10",
"BaseFormula": "metric_TMA_Info_Botlnk_L2_DSB_Misses > 10",
"ThresholdIssues": "$issueFB"
},
"ResolutionLevels": "THREAD, CORE, SOCKET, SYSTEM",
Expand Down Expand Up @@ -8790,11 +8790,11 @@
"ThresholdMetrics": [
{
"Alias": "a",
"Value": "metric_TMA_Info_Botlnk_DSB_Bandwidth"
"Value": "metric_TMA_Info_Botlnk_L2_DSB_Bandwidth"
}
],
"Formula": "a > 10",
"BaseFormula": "metric_TMA_Info_Botlnk_DSB_Bandwidth > 10",
"BaseFormula": "metric_TMA_Info_Botlnk_L2_DSB_Bandwidth > 10",
"ThresholdIssues": "$issueFB"
},
"ResolutionLevels": "THREAD, CORE, SOCKET, SYSTEM",
Expand Down Expand Up @@ -8870,11 +8870,11 @@
"ThresholdMetrics": [
{
"Alias": "a",
"Value": "metric_TMA_Info_Botlnk_IC_Misses"
"Value": "metric_TMA_Info_Botlnk_L2_IC_Misses"
}
],
"Formula": "a > 5",
"BaseFormula": "metric_TMA_Info_Botlnk_IC_Misses > 5",
"BaseFormula": "metric_TMA_Info_Botlnk_L2_IC_Misses > 5",
"ThresholdIssues": "$issueFL"
},
"ResolutionLevels": "THREAD, CORE, SOCKET, SYSTEM",
Expand Down Expand Up @@ -9171,7 +9171,7 @@
],
"Constants": [],
"Formula": "( 100 * ( 1 - ( 10 * ( a / ( b ) ) * ( max( ( c / ( d + e + f + g ) ) * ( 1 - h / ( i - j ) ) , 0.0001 ) ) / ( c / ( d + e + f + g ) ) ) ) * ( ( c / ( d + e + f + g ) ) + ( k / ( d + e + f + g ) ) * ( ( ( c / ( d + e + f + g ) ) / ( e / ( d + e + f + g ) ) ) * l / ( m ) ) / ( ( n / ( m ) ) + ( o / ( m ) ) + ( l / ( m ) + ( p / ( m ) ) ) + ( ( 3 ) * q / ( m ) ) + ( r / ( m ) ) + ( s / ( m ) ) ) ) ) * ( b ) / ( 8 ) / h / 100",
"BaseFormula": " tma_mispredictions * tma_info_thread_slots / ( 8 ) / br_misp_retired.all_branches / 100",
"BaseFormula": " tma_bottleneck_mispredictions * tma_info_thread_slots / ( 8 ) / br_misp_retired.all_branches / 100",
"Category": "TMA",
"CountDomain": "Core_Metric",
"Threshold": {
Expand Down Expand Up @@ -9966,11 +9966,11 @@
"ThresholdMetrics": [
{
"Alias": "a",
"Value": "metric_TMA_Info_Memory_Page_Walks_Utilization"
"Value": "metric_TMA_Info_Memory_TLB_Page_Walks_Utilization"
}
],
"Formula": "a > 0.5",
"BaseFormula": "metric_TMA_Info_Memory_Page_Walks_Utilization > 0.5",
"BaseFormula": "metric_TMA_Info_Memory_TLB_Page_Walks_Utilization > 0.5",
"ThresholdIssues": ""
},
"ResolutionLevels": "CORE, SOCKET, SYSTEM",
Expand Down Expand Up @@ -10096,11 +10096,11 @@
"ThresholdMetrics": [
{
"Alias": "a",
"Value": "metric_TMA_Info_Memory_Load_STLB_Miss_Ret"
"Value": "metric_TMA_Info_Memory_TLB_Load_STLB_Miss_Ret"
}
],
"Formula": "a > 0.05",
"BaseFormula": "metric_TMA_Info_Memory_Load_STLB_Miss_Ret > 0.05",
"BaseFormula": "metric_TMA_Info_Memory_TLB_Load_STLB_Miss_Ret > 0.05",
"ThresholdIssues": ""
},
"ResolutionLevels": "THREAD, CORE, SOCKET, SYSTEM",
Expand Down Expand Up @@ -10136,11 +10136,11 @@
"ThresholdMetrics": [
{
"Alias": "a",
"Value": "metric_TMA_Info_Memory_Store_STLB_Miss_Ret"
"Value": "metric_TMA_Info_Memory_TLB_Store_STLB_Miss_Ret"
}
],
"Formula": "a > 0.05",
"BaseFormula": "metric_TMA_Info_Memory_Store_STLB_Miss_Ret > 0.05",
"BaseFormula": "metric_TMA_Info_Memory_TLB_Store_STLB_Miss_Ret > 0.05",
"ThresholdIssues": ""
},
"ResolutionLevels": "THREAD, CORE, SOCKET, SYSTEM",
Expand Down Expand Up @@ -10176,11 +10176,11 @@
"ThresholdMetrics": [
{
"Alias": "a",
"Value": "metric_TMA_Info_Memory_Useless_HWPF"
"Value": "metric_TMA_Info_Memory_Prefetches_Useless_HWPF"
}
],
"Formula": "a > 0.15",
"BaseFormula": "metric_TMA_Info_Memory_Useless_HWPF > 0.15",
"BaseFormula": "metric_TMA_Info_Memory_Prefetches_Useless_HWPF > 0.15",
"ThresholdIssues": ""
},
"ResolutionLevels": "THREAD, CORE, SOCKET, SYSTEM",
Expand Down
Loading

0 comments on commit 9051dd9

Please sign in to comment.