diff --git a/ADL/metrics/perf/alderlake_metrics_goldencove_core_perf.json b/ADL/metrics/perf/alderlake_metrics_goldencove_core_perf.json index 3418ff89..d803dd8c 100644 --- a/ADL/metrics/perf/alderlake_metrics_goldencove_core_perf.json +++ b/ADL/metrics/perf/alderlake_metrics_goldencove_core_perf.json @@ -20,7 +20,7 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)", - "MetricExpr": "100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_bottleneck_big_code", + "MetricExpr": "100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_bottleneck_big_code", "MetricGroup": "BvFB;Fed;FetchBW;Frontend;TopdownL1;tma_L1_group;Default;Scaled_Slots", "MetricName": "tma_bottleneck_instruction_fetch_bw", "MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20", @@ -79,7 +79,7 @@ }, { "BriefDescription": "Total pipeline cost of irregular execution (e.g", - "MetricExpr": "100 * ( ( ( 1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) + ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) * tma_branch_mispredicts + ( tma_machine_clears * tma_other_nukes / ( tma_other_nukes ) ) + ( tma_core_bound * ( tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0 ) / ( tma_divider + tma_serializing_operation + tma_ports_utilization ) ) + ( ( ( tma_microcode_sequencer / ( tma_few_uops_instructions + tma_microcode_sequencer ) ) * ( tma_assists / tma_microcode_sequencer ) ) * tma_heavy_operations ) )", + "MetricExpr": "100 * ( ( ( 1 - INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) + ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) * tma_branch_mispredicts + ( tma_machine_clears * tma_other_nukes / ( tma_other_nukes ) ) + ( tma_core_bound * ( tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0 ) / ( tma_divider + tma_serializing_operation + tma_ports_utilization ) ) + ( ( ( tma_microcode_sequencer / ( tma_few_uops_instructions + tma_microcode_sequencer ) ) * ( tma_assists / tma_microcode_sequencer ) ) * tma_heavy_operations ) )", "MetricGroup": "Bad;BvIO;Cor;Ret;TopdownL1;tma_L1_group;Default;Scaled_Slots;tma_issueMS", "MetricName": "tma_bottleneck_irregular_overhead", "MetricThreshold": "tma_bottleneck_irregular_overhead > 10", @@ -242,7 +242,7 @@ }, { "BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)", - "MetricExpr": "( 3 ) * cpu@UOPS_RETIRED.MS\\,cmask\\=0x1\\,edge\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) / tma_info_thread_clks", + "MetricExpr": "( 3 ) * cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1\\,edge\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) / tma_info_thread_clks", "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;Clocks_Estimated;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO", "MetricName": "tma_ms_switches", "ScaleUnit": "100%", @@ -289,7 +289,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder", - "MetricExpr": "( cpu@INST_DECODED.DECODERS\\,cmask\\=0x1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=0x2@ ) / tma_info_core_core_clks / 2", + "MetricExpr": "( cpu_core@INST_DECODED.DECODERS\\,cmask\\=0x1@ - cpu_core@INST_DECODED.DECODERS\\,cmask\\=0x2@ ) / tma_info_core_core_clks / 2", "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_mite_group;Slots_Estimated;tma_issueD0", "MetricName": "tma_decoder0_alone", "ScaleUnit": "100%", @@ -316,7 +316,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details", - "MetricExpr": "max( IDQ.MS_CYCLES_ANY , cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) ) / tma_info_core_core_clks / 2", + "MetricExpr": "max( IDQ.MS_CYCLES_ANY , cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) ) / tma_info_core_core_clks / 2", "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group;Slots_Estimated", "MetricName": "tma_ms", "ScaleUnit": "100%", @@ -404,7 +404,7 @@ }, { "BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses", - "MetricExpr": "min( ( 7 ) * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=0x1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - MEMORY_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / tma_info_thread_clks", + "MetricExpr": "min( ( 7 ) * cpu_core@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=0x1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - MEMORY_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / tma_info_thread_clks", "MetricGroup": "BvMT;MemoryTLB;TopdownL4;tma_L4_group;tma_l1_bound_group;Clocks_Estimated;tma_issueTLB", "MetricName": "tma_dtlb_load", "ScaleUnit": "100%", @@ -570,7 +570,7 @@ }, { "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)", - "MetricExpr": "( min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=0x4@ ) ) / tma_info_thread_clks", + "MetricExpr": "( min( CPU_CLK_UNHALTED.THREAD , cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=0x4@ ) ) / tma_info_thread_clks", "MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;Clocks;tma_issueBW", "MetricName": "tma_mem_bandwidth", "ScaleUnit": "100%", @@ -633,7 +633,7 @@ }, { "BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses", - "MetricExpr": "( ( 7 ) * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=0x1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / tma_info_core_core_clks", + "MetricExpr": "( ( 7 ) * cpu_core@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=0x1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / tma_info_core_core_clks", "MetricGroup": "BvMT;MemoryTLB;TopdownL4;tma_L4_group;tma_store_bound_group;Clocks_Estimated;tma_issueTLB", "MetricName": "tma_dtlb_store", "ScaleUnit": "100%", @@ -1174,7 +1174,7 @@ }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)", - "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@", + "MetricExpr": "UOPS_EXECUTED.THREAD / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@", "MetricGroup": "Backend;Cor;Pipeline;PortsUtil;Metric", "MetricName": "tma_info_core_ilp" }, @@ -1301,13 +1301,13 @@ }, { "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired", - "MetricExpr": "( tma_retiring * tma_info_thread_slots ) / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", + "MetricExpr": "( tma_retiring * tma_info_thread_slots ) / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", "MetricGroup": "Pipeline;Ret;Metric", "MetricName": "tma_info_pipeline_retire" }, { "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions", - "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", + "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", "MetricGroup": "MicroSeq;Pipeline;Ret;Metric", "MetricName": "tma_info_pipeline_strings_cycles", "MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1" @@ -1322,7 +1322,7 @@ }, { "BriefDescription": "", - "MetricExpr": "UOPS_EXECUTED.THREAD / ( ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@ )", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@ )", "MetricGroup": "Cor;Pipeline;PortsUtil;SMT;Metric", "MetricName": "tma_info_pipeline_execute" }, @@ -1346,7 +1346,7 @@ }, { "BriefDescription": "Average number of Uops issued by front-end when it issued something", - "MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=0x1@", + "MetricExpr": "UOPS_ISSUED.ANY / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=0x1@", "MetricGroup": "Fed;FetchBW;Metric", "MetricName": "tma_info_frontend_fetch_upc" }, @@ -1366,14 +1366,14 @@ }, { "BriefDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection", - "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / cpu@INT_MISC.UNKNOWN_BRANCH_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", + "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", "MetricGroup": "Fed;Metric", "MetricName": "tma_info_frontend_unknown_branch_cost", "PublicDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection. See Unknown_Branches node." }, { "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details", - "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", + "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", "MetricGroup": "DSBmiss;Metric", "MetricName": "tma_info_frontend_dsb_switch_cost" }, @@ -1385,7 +1385,7 @@ }, { "BriefDescription": "Average Latency for L1 instruction cache misses", - "MetricExpr": "ICACHE_DATA.STALLS / cpu@ICACHE_DATA.STALLS\\,cmask\\=0x1\\,edge\\=0x1@", + "MetricExpr": "ICACHE_DATA.STALLS / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=0x1\\,edge\\=0x1@", "MetricGroup": "Fed;FetchLat;IcMiss;Metric", "MetricName": "tma_info_frontend_icache_miss_latency" }, @@ -1683,7 +1683,7 @@ }, { "BriefDescription": "Average Parallel L2 cache miss demand Loads", - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@", + "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@", "MetricGroup": "Memory_BW;Offcore;Metric", "MetricName": "tma_info_memory_latency_load_l2_mlp" }, diff --git a/ARL/metrics/perf/arrowlake_metrics_lioncove_core_perf.json b/ARL/metrics/perf/arrowlake_metrics_lioncove_core_perf.json index 6a0776da..f036b240 100644 --- a/ARL/metrics/perf/arrowlake_metrics_lioncove_core_perf.json +++ b/ARL/metrics/perf/arrowlake_metrics_lioncove_core_perf.json @@ -20,7 +20,7 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)", - "MetricExpr": "100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_bottleneck_big_code", + "MetricExpr": "100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_bottleneck_big_code", "MetricGroup": "BvFB;Fed;FetchBW;Frontend;TopdownL1;tma_L1_group;Default;Scaled_Slots", "MetricName": "tma_bottleneck_instruction_fetch_bw", "MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20", @@ -49,7 +49,7 @@ }, { "BriefDescription": "Total pipeline cost of irregular execution (e.g", - "MetricExpr": "100 * ( ( ( 1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) + ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) * tma_branch_mispredicts + ( tma_machine_clears * tma_other_nukes / ( tma_other_nukes ) ) + ( tma_core_bound * ( tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0 ) / ( tma_divider + tma_serializing_operation + tma_ports_utilization ) ) + ( ( ( tma_microcode_sequencer / ( tma_microcode_sequencer + max( 0 , tma_heavy_operations - tma_microcode_sequencer ) ) ) * ( ( ( 99 *3 + 63 + 30 ) / 5 ) * ASSISTS.ANY / tma_info_thread_slots / tma_microcode_sequencer ) ) * tma_heavy_operations ) )", + "MetricExpr": "100 * ( ( ( 1 - INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) + ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) * tma_branch_mispredicts + ( tma_machine_clears * tma_other_nukes / ( tma_other_nukes ) ) + ( tma_core_bound * ( tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0 ) / ( tma_divider + tma_serializing_operation + tma_ports_utilization ) ) + ( ( ( tma_microcode_sequencer / ( tma_microcode_sequencer + max( 0 , tma_heavy_operations - tma_microcode_sequencer ) ) ) * ( ( ( 99 *3 + 63 + 30 ) / 5 ) * ASSISTS.ANY / tma_info_thread_slots / tma_microcode_sequencer ) ) * tma_heavy_operations ) )", "MetricGroup": "Bad;BvIO;Cor;Ret;TopdownL1;tma_L1_group;Default;Scaled_Slots;tma_issueMS", "MetricName": "tma_bottleneck_irregular_overhead", "MetricThreshold": "tma_bottleneck_irregular_overhead > 10", @@ -240,7 +240,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)", - "MetricExpr": "( cpu@IDQ.MITE_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks + IDQ.MITE_UOPS / ( IDQ.DSB_UOPS + IDQ.MITE_UOPS ) * ( IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE - IDQ_BUBBLES.FETCH_LATENCY ) ) / tma_info_thread_clks", + "MetricExpr": "( cpu_core@IDQ.MITE_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks + IDQ.MITE_UOPS / ( IDQ.DSB_UOPS + IDQ.MITE_UOPS ) * ( IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE - IDQ_BUBBLES.FETCH_LATENCY ) ) / tma_info_thread_clks", "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group;Slots_Estimated", "MetricName": "tma_mite", "ScaleUnit": "100%", @@ -249,7 +249,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline", - "MetricExpr": "( cpu@IDQ.DSB_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ + IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + IDQ.MITE_UOPS ) * ( IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE - IDQ_BUBBLES.FETCH_LATENCY ) ) / tma_info_thread_clks", + "MetricExpr": "( cpu_core@IDQ.DSB_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ + IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + IDQ.MITE_UOPS ) * ( IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE - IDQ_BUBBLES.FETCH_LATENCY ) ) / tma_info_thread_clks", "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group;Slots_Estimated", "MetricName": "tma_dsb", "ScaleUnit": "100%", @@ -258,7 +258,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit", - "MetricExpr": "cpu@LSD.UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks", + "MetricExpr": "cpu_core@LSD.UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks", "MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group;Slots_Estimated", "MetricName": "tma_lsd", "ScaleUnit": "100%", @@ -577,7 +577,7 @@ }, { "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)", - "MetricExpr": "( min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.DATA_RD\\,cmask\\=0x4@ ) ) / tma_info_thread_clks", + "MetricExpr": "( min( CPU_CLK_UNHALTED.THREAD , cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DATA_RD\\,cmask\\=0x4@ ) ) / tma_info_thread_clks", "MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;Clocks;tma_issueBW", "MetricName": "tma_mem_bandwidth", "ScaleUnit": "100%", @@ -905,7 +905,7 @@ }, { "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors", - "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.VECTOR\\,umask\\=0x30@ / ( tma_retiring * tma_info_thread_slots )", + "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.VECTOR\\,umask\\=0x30@ / ( tma_retiring * tma_info_thread_slots )", "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;Uops;tma_issue2P", "MetricName": "tma_fp_vector_256b", "ScaleUnit": "100%", @@ -1126,7 +1126,7 @@ }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)", - "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@", + "MetricExpr": "UOPS_EXECUTED.THREAD / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@", "MetricGroup": "Backend;Cor;Pipeline;PortsUtil;Metric", "MetricName": "tma_info_core_ilp" }, @@ -1247,13 +1247,13 @@ }, { "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired", - "MetricExpr": "( tma_retiring * tma_info_thread_slots ) / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", + "MetricExpr": "( tma_retiring * tma_info_thread_slots ) / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", "MetricGroup": "Pipeline;Ret;Metric", "MetricName": "tma_info_pipeline_retire" }, { "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions", - "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", + "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", "MetricGroup": "MicroSeq;Pipeline;Ret;Metric", "MetricName": "tma_info_pipeline_strings_cycles", "MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1" @@ -1286,7 +1286,7 @@ }, { "BriefDescription": "Average number of Uops issued by front-end when it issued something", - "MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=0x1@", + "MetricExpr": "UOPS_ISSUED.ANY / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=0x1@", "MetricGroup": "Fed;FetchBW;Metric", "MetricName": "tma_info_frontend_fetch_upc" }, @@ -1306,14 +1306,14 @@ }, { "BriefDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection", - "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / cpu@INT_MISC.UNKNOWN_BRANCH_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", + "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", "MetricGroup": "Fed;Metric", "MetricName": "tma_info_frontend_unknown_branch_cost", "PublicDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection. See Unknown_Branches node." }, { "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details", - "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", + "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", "MetricGroup": "DSBmiss;Metric", "MetricName": "tma_info_frontend_dsb_switch_cost" }, @@ -1646,7 +1646,7 @@ }, { "BriefDescription": "Average Parallel L2 cache miss demand Loads", - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@", + "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@", "MetricGroup": "Memory_BW;Offcore;Metric", "MetricName": "tma_info_memory_latency_load_l2_mlp" }, diff --git a/LNL/metrics/perf/lunarlake_metrics_lioncove_core_perf.json b/LNL/metrics/perf/lunarlake_metrics_lioncove_core_perf.json index fad64026..9f96365a 100644 --- a/LNL/metrics/perf/lunarlake_metrics_lioncove_core_perf.json +++ b/LNL/metrics/perf/lunarlake_metrics_lioncove_core_perf.json @@ -20,7 +20,7 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)", - "MetricExpr": "100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_bottleneck_big_code", + "MetricExpr": "100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_bottleneck_big_code", "MetricGroup": "BvFB;Fed;FetchBW;Frontend;TopdownL1;tma_L1_group;Default;Scaled_Slots", "MetricName": "tma_bottleneck_instruction_fetch_bw", "MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20", @@ -79,7 +79,7 @@ }, { "BriefDescription": "Total pipeline cost of irregular execution (e.g", - "MetricExpr": "100 * ( ( ( 1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) + ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) * tma_branch_mispredicts + ( tma_machine_clears * tma_other_nukes / ( tma_other_nukes ) ) + ( tma_core_bound * ( tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0 ) / ( tma_divider + tma_serializing_operation + tma_ports_utilization ) ) + ( ( ( tma_microcode_sequencer / ( tma_microcode_sequencer + max( 0 , tma_heavy_operations - tma_microcode_sequencer ) ) ) * ( ( ( 99 *3 + 63 + 30 ) / 5 ) * ASSISTS.ANY / tma_info_thread_slots / tma_microcode_sequencer ) ) * tma_heavy_operations ) )", + "MetricExpr": "100 * ( ( ( 1 - INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) + ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) * tma_branch_mispredicts + ( tma_machine_clears * tma_other_nukes / ( tma_other_nukes ) ) + ( tma_core_bound * ( tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0 ) / ( tma_divider + tma_serializing_operation + tma_ports_utilization ) ) + ( ( ( tma_microcode_sequencer / ( tma_microcode_sequencer + max( 0 , tma_heavy_operations - tma_microcode_sequencer ) ) ) * ( ( ( 99 *3 + 63 + 30 ) / 5 ) * ASSISTS.ANY / tma_info_thread_slots / tma_microcode_sequencer ) ) * tma_heavy_operations ) )", "MetricGroup": "Bad;BvIO;Cor;Ret;TopdownL1;tma_L1_group;Default;Scaled_Slots;tma_issueMS", "MetricName": "tma_bottleneck_irregular_overhead", "MetricThreshold": "tma_bottleneck_irregular_overhead > 10", @@ -280,7 +280,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)", - "MetricExpr": "( cpu@IDQ.MITE_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks + IDQ.MITE_UOPS / ( IDQ.DSB_UOPS + IDQ.MITE_UOPS ) * ( IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE - IDQ_BUBBLES.FETCH_LATENCY ) ) / tma_info_thread_clks", + "MetricExpr": "( cpu_core@IDQ.MITE_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks + IDQ.MITE_UOPS / ( IDQ.DSB_UOPS + IDQ.MITE_UOPS ) * ( IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE - IDQ_BUBBLES.FETCH_LATENCY ) ) / tma_info_thread_clks", "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group;Slots_Estimated", "MetricName": "tma_mite", "ScaleUnit": "100%", @@ -289,7 +289,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline", - "MetricExpr": "( cpu@IDQ.DSB_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ + IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + IDQ.MITE_UOPS ) * ( IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE - IDQ_BUBBLES.FETCH_LATENCY ) ) / tma_info_thread_clks", + "MetricExpr": "( cpu_core@IDQ.DSB_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ + IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + IDQ.MITE_UOPS ) * ( IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE - IDQ_BUBBLES.FETCH_LATENCY ) ) / tma_info_thread_clks", "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group;Slots_Estimated", "MetricName": "tma_dsb", "ScaleUnit": "100%", @@ -298,7 +298,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit", - "MetricExpr": "cpu@LSD.UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks", + "MetricExpr": "cpu_core@LSD.UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks", "MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group;Slots_Estimated", "MetricName": "tma_lsd", "ScaleUnit": "100%", @@ -617,7 +617,7 @@ }, { "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)", - "MetricExpr": "( min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.DATA_RD\\,cmask\\=0x4@ ) ) / tma_info_thread_clks", + "MetricExpr": "( min( CPU_CLK_UNHALTED.THREAD , cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DATA_RD\\,cmask\\=0x4@ ) ) / tma_info_thread_clks", "MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;Clocks;tma_issueBW", "MetricName": "tma_mem_bandwidth", "ScaleUnit": "100%", @@ -954,7 +954,7 @@ }, { "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors", - "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.VECTOR\\,umask\\=0x30@ / ( tma_retiring * tma_info_thread_slots )", + "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.VECTOR\\,umask\\=0x30@ / ( tma_retiring * tma_info_thread_slots )", "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;Uops;tma_issue2P", "MetricName": "tma_fp_vector_256b", "ScaleUnit": "100%", @@ -1175,7 +1175,7 @@ }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)", - "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@", + "MetricExpr": "UOPS_EXECUTED.THREAD / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@", "MetricGroup": "Backend;Cor;Pipeline;PortsUtil;Metric", "MetricName": "tma_info_core_ilp" }, @@ -1296,13 +1296,13 @@ }, { "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired", - "MetricExpr": "( tma_retiring * tma_info_thread_slots ) / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", + "MetricExpr": "( tma_retiring * tma_info_thread_slots ) / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", "MetricGroup": "Pipeline;Ret;Metric", "MetricName": "tma_info_pipeline_retire" }, { "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions", - "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", + "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", "MetricGroup": "MicroSeq;Pipeline;Ret;Metric", "MetricName": "tma_info_pipeline_strings_cycles", "MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1" @@ -1335,7 +1335,7 @@ }, { "BriefDescription": "Average number of Uops issued by front-end when it issued something", - "MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=0x1@", + "MetricExpr": "UOPS_ISSUED.ANY / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=0x1@", "MetricGroup": "Fed;FetchBW;Metric", "MetricName": "tma_info_frontend_fetch_upc" }, @@ -1355,14 +1355,14 @@ }, { "BriefDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection", - "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / cpu@INT_MISC.UNKNOWN_BRANCH_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", + "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", "MetricGroup": "Fed;Metric", "MetricName": "tma_info_frontend_unknown_branch_cost", "PublicDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection. See Unknown_Branches node." }, { "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details", - "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", + "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", "MetricGroup": "DSBmiss;Metric", "MetricName": "tma_info_frontend_dsb_switch_cost" }, @@ -1701,7 +1701,7 @@ }, { "BriefDescription": "Average Parallel L2 cache miss demand Loads", - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@", + "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@", "MetricGroup": "Memory_BW;Offcore;Metric", "MetricName": "tma_info_memory_latency_load_l2_mlp" }, diff --git a/MTL/metrics/perf/meteorlake_metrics_redwoodcove_core_perf.json b/MTL/metrics/perf/meteorlake_metrics_redwoodcove_core_perf.json index ebceaa21..99eee3dc 100644 --- a/MTL/metrics/perf/meteorlake_metrics_redwoodcove_core_perf.json +++ b/MTL/metrics/perf/meteorlake_metrics_redwoodcove_core_perf.json @@ -20,7 +20,7 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)", - "MetricExpr": "100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_bottleneck_big_code", + "MetricExpr": "100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_bottleneck_big_code", "MetricGroup": "BvFB;Fed;FetchBW;Frontend;TopdownL1;tma_L1_group;Default;Scaled_Slots", "MetricName": "tma_bottleneck_instruction_fetch_bw", "MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20", @@ -79,7 +79,7 @@ }, { "BriefDescription": "Total pipeline cost of irregular execution (e.g", - "MetricExpr": "100 * ( ( ( 1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) + ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) * tma_branch_mispredicts + ( tma_machine_clears * tma_other_nukes / ( tma_other_nukes ) ) + ( tma_core_bound * ( tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0 ) / ( tma_divider + tma_serializing_operation + tma_ports_utilization ) ) + ( ( ( tma_microcode_sequencer / ( tma_few_uops_instructions + tma_microcode_sequencer ) ) * ( tma_assists / tma_microcode_sequencer ) ) * tma_heavy_operations ) )", + "MetricExpr": "100 * ( ( ( 1 - INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) + ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) * tma_branch_mispredicts + ( tma_machine_clears * tma_other_nukes / ( tma_other_nukes ) ) + ( tma_core_bound * ( tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0 ) / ( tma_divider + tma_serializing_operation + tma_ports_utilization ) ) + ( ( ( tma_microcode_sequencer / ( tma_few_uops_instructions + tma_microcode_sequencer ) ) * ( tma_assists / tma_microcode_sequencer ) ) * tma_heavy_operations ) )", "MetricGroup": "Bad;BvIO;Cor;Ret;TopdownL1;tma_L1_group;Default;Scaled_Slots;tma_issueMS", "MetricName": "tma_bottleneck_irregular_overhead", "MetricThreshold": "tma_bottleneck_irregular_overhead > 10", @@ -242,7 +242,7 @@ }, { "BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)", - "MetricExpr": "( 3 ) * cpu@UOPS_RETIRED.MS\\,cmask\\=0x1\\,edge\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) / tma_info_thread_clks", + "MetricExpr": "( 3 ) * cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1\\,edge\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) / tma_info_thread_clks", "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;Clocks_Estimated;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO", "MetricName": "tma_ms_switches", "ScaleUnit": "100%", @@ -289,7 +289,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder", - "MetricExpr": "( cpu@INST_DECODED.DECODERS\\,cmask\\=0x1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=0x2@ ) / tma_info_core_core_clks / 2", + "MetricExpr": "( cpu_core@INST_DECODED.DECODERS\\,cmask\\=0x1@ - cpu_core@INST_DECODED.DECODERS\\,cmask\\=0x2@ ) / tma_info_core_core_clks / 2", "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_mite_group;Slots_Estimated;tma_issueD0", "MetricName": "tma_decoder0_alone", "ScaleUnit": "100%", @@ -316,7 +316,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details", - "MetricExpr": "max( IDQ.MS_CYCLES_ANY , cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) ) / tma_info_core_core_clks / 2", + "MetricExpr": "max( IDQ.MS_CYCLES_ANY , cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) ) / tma_info_core_core_clks / 2", "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group;Slots_Estimated", "MetricName": "tma_ms", "ScaleUnit": "100%", @@ -610,7 +610,7 @@ }, { "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)", - "MetricExpr": "( min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.DATA_RD\\,cmask\\=0x4@ ) ) / tma_info_thread_clks", + "MetricExpr": "( min( CPU_CLK_UNHALTED.THREAD , cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DATA_RD\\,cmask\\=0x4@ ) ) / tma_info_thread_clks", "MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;Clocks;tma_issueBW", "MetricName": "tma_mem_bandwidth", "ScaleUnit": "100%", @@ -1214,7 +1214,7 @@ }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)", - "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@", + "MetricExpr": "UOPS_EXECUTED.THREAD / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@", "MetricGroup": "Backend;Cor;Pipeline;PortsUtil;Metric", "MetricName": "tma_info_core_ilp" }, @@ -1341,13 +1341,13 @@ }, { "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired", - "MetricExpr": "( tma_retiring * tma_info_thread_slots ) / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", + "MetricExpr": "( tma_retiring * tma_info_thread_slots ) / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", "MetricGroup": "Pipeline;Ret;Metric", "MetricName": "tma_info_pipeline_retire" }, { "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions", - "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", + "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", "MetricGroup": "MicroSeq;Pipeline;Ret;Metric", "MetricName": "tma_info_pipeline_strings_cycles", "MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1" @@ -1362,7 +1362,7 @@ }, { "BriefDescription": "", - "MetricExpr": "UOPS_EXECUTED.THREAD / ( ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@ )", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@ )", "MetricGroup": "Cor;Pipeline;PortsUtil;SMT;Metric", "MetricName": "tma_info_pipeline_execute" }, @@ -1386,7 +1386,7 @@ }, { "BriefDescription": "Average number of Uops issued by front-end when it issued something", - "MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=0x1@", + "MetricExpr": "UOPS_ISSUED.ANY / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=0x1@", "MetricGroup": "Fed;FetchBW;Metric", "MetricName": "tma_info_frontend_fetch_upc" }, @@ -1406,14 +1406,14 @@ }, { "BriefDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection", - "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / cpu@INT_MISC.UNKNOWN_BRANCH_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", + "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", "MetricGroup": "Fed;Metric", "MetricName": "tma_info_frontend_unknown_branch_cost", "PublicDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection. See Unknown_Branches node." }, { "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details", - "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", + "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", "MetricGroup": "DSBmiss;Metric", "MetricName": "tma_info_frontend_dsb_switch_cost" }, @@ -1757,7 +1757,7 @@ }, { "BriefDescription": "Average Parallel L2 cache miss demand Loads", - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@", + "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@", "MetricGroup": "Memory_BW;Offcore;Metric", "MetricName": "tma_info_memory_latency_load_l2_mlp" },