Skip to content

Commit

Permalink
Hybrid metric fix for cpu_core@ string. Replaces instances of "cpu@" …
Browse files Browse the repository at this point in the history
…with "cpu_core@".
  • Loading branch information
1perrytaylor committed Jan 15, 2025
1 parent 445a56f commit f131604
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 59 deletions.
34 changes: 17 additions & 17 deletions ADL/metrics/perf/alderlake_metrics_goldencove_core_perf.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
"MetricExpr": "100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_bottleneck_big_code",
"MetricExpr": "100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend;TopdownL1;tma_L1_group;Default;Scaled_Slots",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20",
Expand Down Expand Up @@ -79,7 +79,7 @@
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
"MetricExpr": "100 * ( ( ( 1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) + ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) * tma_branch_mispredicts + ( tma_machine_clears * tma_other_nukes / ( tma_other_nukes ) ) + ( tma_core_bound * ( tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0 ) / ( tma_divider + tma_serializing_operation + tma_ports_utilization ) ) + ( ( ( tma_microcode_sequencer / ( tma_few_uops_instructions + tma_microcode_sequencer ) ) * ( tma_assists / tma_microcode_sequencer ) ) * tma_heavy_operations ) )",
"MetricExpr": "100 * ( ( ( 1 - INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) + ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) * tma_branch_mispredicts + ( tma_machine_clears * tma_other_nukes / ( tma_other_nukes ) ) + ( tma_core_bound * ( tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0 ) / ( tma_divider + tma_serializing_operation + tma_ports_utilization ) ) + ( ( ( tma_microcode_sequencer / ( tma_few_uops_instructions + tma_microcode_sequencer ) ) * ( tma_assists / tma_microcode_sequencer ) ) * tma_heavy_operations ) )",
"MetricGroup": "Bad;BvIO;Cor;Ret;TopdownL1;tma_L1_group;Default;Scaled_Slots;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
Expand Down Expand Up @@ -242,7 +242,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
"MetricExpr": "( 3 ) * cpu@UOPS_RETIRED.MS\\,cmask\\=0x1\\,edge\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) / tma_info_thread_clks",
"MetricExpr": "( 3 ) * cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1\\,edge\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;Clocks_Estimated;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"ScaleUnit": "100%",
Expand Down Expand Up @@ -289,7 +289,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder",
"MetricExpr": "( cpu@INST_DECODED.DECODERS\\,cmask\\=0x1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=0x2@ ) / tma_info_core_core_clks / 2",
"MetricExpr": "( cpu_core@INST_DECODED.DECODERS\\,cmask\\=0x1@ - cpu_core@INST_DECODED.DECODERS\\,cmask\\=0x2@ ) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_mite_group;Slots_Estimated;tma_issueD0",
"MetricName": "tma_decoder0_alone",
"ScaleUnit": "100%",
Expand All @@ -316,7 +316,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details",
"MetricExpr": "max( IDQ.MS_CYCLES_ANY , cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) ) / tma_info_core_core_clks / 2",
"MetricExpr": "max( IDQ.MS_CYCLES_ANY , cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) ) / tma_info_core_core_clks / 2",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group;Slots_Estimated",
"MetricName": "tma_ms",
"ScaleUnit": "100%",
Expand Down Expand Up @@ -404,7 +404,7 @@
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
"MetricExpr": "min( ( 7 ) * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=0x1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - MEMORY_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / tma_info_thread_clks",
"MetricExpr": "min( ( 7 ) * cpu_core@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=0x1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - MEMORY_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / tma_info_thread_clks",
"MetricGroup": "BvMT;MemoryTLB;TopdownL4;tma_L4_group;tma_l1_bound_group;Clocks_Estimated;tma_issueTLB",
"MetricName": "tma_dtlb_load",
"ScaleUnit": "100%",
Expand Down Expand Up @@ -570,7 +570,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
"MetricExpr": "( min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=0x4@ ) ) / tma_info_thread_clks",
"MetricExpr": "( min( CPU_CLK_UNHALTED.THREAD , cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=0x4@ ) ) / tma_info_thread_clks",
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;Clocks;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"ScaleUnit": "100%",
Expand Down Expand Up @@ -633,7 +633,7 @@
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
"MetricExpr": "( ( 7 ) * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=0x1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / tma_info_core_core_clks",
"MetricExpr": "( ( 7 ) * cpu_core@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=0x1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / tma_info_core_core_clks",
"MetricGroup": "BvMT;MemoryTLB;TopdownL4;tma_L4_group;tma_store_bound_group;Clocks_Estimated;tma_issueTLB",
"MetricName": "tma_dtlb_store",
"ScaleUnit": "100%",
Expand Down Expand Up @@ -1174,7 +1174,7 @@
},
{
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
"MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@",
"MetricExpr": "UOPS_EXECUTED.THREAD / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@",
"MetricGroup": "Backend;Cor;Pipeline;PortsUtil;Metric",
"MetricName": "tma_info_core_ilp"
},
Expand Down Expand Up @@ -1301,13 +1301,13 @@
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired",
"MetricExpr": "( tma_retiring * tma_info_thread_slots ) / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@",
"MetricExpr": "( tma_retiring * tma_info_thread_slots ) / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@",
"MetricGroup": "Pipeline;Ret;Metric",
"MetricName": "tma_info_pipeline_retire"
},
{
"BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions",
"MetricExpr": "INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@",
"MetricExpr": "INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@",
"MetricGroup": "MicroSeq;Pipeline;Ret;Metric",
"MetricName": "tma_info_pipeline_strings_cycles",
"MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1"
Expand All @@ -1322,7 +1322,7 @@
},
{
"BriefDescription": "",
"MetricExpr": "UOPS_EXECUTED.THREAD / ( ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@ )",
"MetricExpr": "UOPS_EXECUTED.THREAD / ( ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@ )",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT;Metric",
"MetricName": "tma_info_pipeline_execute"
},
Expand All @@ -1346,7 +1346,7 @@
},
{
"BriefDescription": "Average number of Uops issued by front-end when it issued something",
"MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=0x1@",
"MetricExpr": "UOPS_ISSUED.ANY / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=0x1@",
"MetricGroup": "Fed;FetchBW;Metric",
"MetricName": "tma_info_frontend_fetch_upc"
},
Expand All @@ -1366,14 +1366,14 @@
},
{
"BriefDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection",
"MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / cpu@INT_MISC.UNKNOWN_BRANCH_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@",
"MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@",
"MetricGroup": "Fed;Metric",
"MetricName": "tma_info_frontend_unknown_branch_cost",
"PublicDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection. See Unknown_Branches node."
},
{
"BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details",
"MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@",
"MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@",
"MetricGroup": "DSBmiss;Metric",
"MetricName": "tma_info_frontend_dsb_switch_cost"
},
Expand All @@ -1385,7 +1385,7 @@
},
{
"BriefDescription": "Average Latency for L1 instruction cache misses",
"MetricExpr": "ICACHE_DATA.STALLS / cpu@ICACHE_DATA.STALLS\\,cmask\\=0x1\\,edge\\=0x1@",
"MetricExpr": "ICACHE_DATA.STALLS / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=0x1\\,edge\\=0x1@",
"MetricGroup": "Fed;FetchLat;IcMiss;Metric",
"MetricName": "tma_info_frontend_icache_miss_latency"
},
Expand Down Expand Up @@ -1683,7 +1683,7 @@
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@",
"MetricGroup": "Memory_BW;Offcore;Metric",
"MetricName": "tma_info_memory_latency_load_l2_mlp"
},
Expand Down
Loading

0 comments on commit f131604

Please sign in to comment.