Skip to content

Commit

Permalink
Added patch addressing issue icl-utk-edu#207 "Negative/large values i…
Browse files Browse the repository at this point in the history
…n topdown perf counters".
  • Loading branch information
willowec committed Sep 16, 2024
1 parent 3759141 commit 5a972a9
Show file tree
Hide file tree
Showing 6 changed files with 578 additions and 20 deletions.
9 changes: 8 additions & 1 deletion src/components/perf_event/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -1467,7 +1467,7 @@ _pe_start( hwd_context_t *ctx, hwd_control_state_t *ctl )
pe_ctl->events[i].event_fd);
ret=ioctl( pe_ctl->events[i].event_fd,
PERF_EVENT_IOC_ENABLE, NULL) ;
if (_perf_event_vector.cmp_info.fast_counter_read) {
if (_perf_event_vector.cmp_info.fast_counter_read && !pe_ctl->events[i].metric) {
pe_ctl->reset_counts[i] = 0LL;
pe_ctl->reset_flag = 0;
}
Expand Down Expand Up @@ -1622,6 +1622,13 @@ _pe_update_control_state( hwd_control_state_t *ctl,
// pe_ctl->events[i].attr.exclude_hv = !(pe_ctl->domain & PAPI_DOM_SUPERVISOR);
// }

/* Intel's topdown events need to be handled differently than normal events */
/* They are instantaneous values and should not be accumulated. In case more */
/* types of events like this are discovered, the 'metric' flag is set to */
/* handle this behavior. */
if (strcmp(ntv_evt->base_name, "TOPDOWN") == 0) {
pe_ctl->events[i].metric = 1;
}

// set the cpu number provided with an event mask if there was one (will be -1 if mask not provided)
pe_ctl->events[i].cpu = ntv_evt->cpu;
Expand Down
23 changes: 12 additions & 11 deletions src/components/perf_event/perf_event_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,18 @@

typedef struct
{
int group_leader_fd; /* fd of group leader */
int event_fd; /* fd of event */
int event_opened; /* event successfully opened */
int profiling; /* event is profiling */
int sampling; /* event is a sampling event */
uint32_t nr_mmap_pages; /* number pages in the mmap buffer */
void *mmap_buf; /* used for control/profiling */
uint64_t tail; /* current read location in mmap buffer */
uint64_t mask; /* mask used for wrapping the pages */
int cpu; /* cpu associated with this event */
struct perf_event_attr attr; /* perf_event config structure */
int group_leader_fd; /* fd of group leader */
int event_fd; /* fd of event */
int event_opened; /* event successfully opened */
int profiling; /* event is profiling */
int sampling; /* event is a sampling event */
int metric; /* event is a metric event (e.g topdown) */
uint32_t nr_mmap_pages; /* number pages in the mmap buffer */
void *mmap_buf; /* used for control/profiling */
uint64_t tail; /* current read location in mmap buffer */
uint64_t mask; /* mask used for wrapping the pages */
int cpu; /* cpu associated with this event */
struct perf_event_attr attr; /* perf_event config structure */
} pe_event_info_t;


Expand Down
19 changes: 17 additions & 2 deletions src/libpfm4/lib/events/intel_adl_glc_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -1552,12 +1552,12 @@ static const intel_x86_umask_t adl_glc_topdown[]={
.uflags = INTEL_X86_NCOMBO,
},
{ .uname = "BR_MISPREDICT_SLOTS",
.udesc = "TMA slots wasted due to incorrect speculation by branch mispredictions",
.udesc = "TMA slots wasted due to incorrect speculation by branch mispredictions (Topdown L2)",
.ucode = 0x8500ull,
.uflags = INTEL_X86_NCOMBO,
},
{ .uname = "MEMORY_BOUND_SLOTS",
.udesc = "TMA slots wasted due to memory accesses (TopdownL2)",
.udesc = "TMA slots wasted due to memory accesses (Topdown L2)",
.ucode = 0x8700ull,
.uflags = INTEL_X86_NCOMBO,
},
Expand All @@ -1566,6 +1566,21 @@ static const intel_x86_umask_t adl_glc_topdown[]={
.ucode = 0x8000ull,
.uflags = INTEL_X86_NCOMBO,
},
{ .uname = "FRONTEND_BOUND_SLOTS",
.udesc = "TMA slots where the front-end did not deliver uops (Topdown L1)",
.ucode = 0x8200ull,
.uflags = INTEL_X86_NCOMBO,
},
{ .uname = "HEAVY_OPS_SLOTS",
.udesc = "TMA slots where heavy-weight instructions are retiring (Topdown L2)",
.ucode = 0x8400ull,
.uflags = INTEL_X86_NCOMBO,
},
{ .uname = "FETCH_LAT_SLOTS",
.udesc = "TMA slots wasted due to front-end latency issues (Topdown L2)",
.ucode = 0x8600ull,
.uflags = INTEL_X86_NCOMBO,
},
{ .uname = "SLOTS",
.udesc = "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
.ucode = 0x0400ull,
Expand Down
34 changes: 28 additions & 6 deletions src/papi_events.csv
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,8 @@ PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HIT_WITH_FWD
# Intel Ice Lake SP events
CPU,icx
CPU,icl
# Note: Many Ice Lake events work for Alderlake/Raptorlake P-Core
CPU,adl_glc
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
Expand All @@ -991,18 +993,15 @@ PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT
PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D:REPLACEMENT,L2_RQSTS:ALL_CODE_RD
# L2 cache
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD
PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT
PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
#PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
#PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
#PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES
PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD
# L3 cache
Expand All @@ -1014,8 +1013,6 @@ PRESET,PAPI_L3_ICR,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
PRESET,PAPI_L3_LDM,NOT_DERIVED,MEM_LOAD_RETIRED:L3_MISS
PRESET,PAPI_L3_TCA,NOT_DERIVED,LLC_REFERENCES
PRESET,PAPI_L3_TCM,NOT_DERIVED,LLC_MISSES
# SMP
PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
# Branches
PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND
Expand All @@ -1024,6 +1021,15 @@ PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN
PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND
PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND
PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES

CPU,icx
CPU,icl
# L2
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
# SMP
PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
#FLOPs
# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
Expand All @@ -1034,7 +1040,23 @@ PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARI
PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
# End of icx, icl list

CPU,adl_glc
# L2
#PRESET,PAPI_L2_DCA,NOT_DERIVED,DERIVED_SUB,L2_RQSTS.REFERENCES,L2_RQSTS.ALL_CODE_RD
PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS.REFERENCES
PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS.ALL_DEMAND_MISS
# SMP
PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:DATA_RD
#FLOPs
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE
PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE
PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|4|*|+|N4|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE
PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE
PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE
PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE
PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE
# End of icx, icl, adl_glc list

# Intel Sapphire Rapids events
CPU,spr
Expand Down
3 changes: 3 additions & 0 deletions src/validation_tests/Makefile.recipies
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
ALL = fp_validation_hl \
cycles_validation flops_validation \
topdown_validation \
papi_br_cn papi_br_ins papi_br_msp \
papi_br_ntk papi_br_prc papi_br_tkn papi_br_ucn \
papi_dp_ops papi_fp_ops papi_sp_ops papi_hw_int \
Expand Down Expand Up @@ -47,6 +48,8 @@ cycles_validation: cycles_validation.o $(TESTLIB) $(PAPILIB) display_error.o ins
flops_validation: flops_validation.o $(TESTLIB) $(PAPILIB) display_error.o branches_testcode.o flops_testcode.o
$(CC) -o flops_validation flops_validation.o $(TESTLIB) display_error.o branches_testcode.o flops_testcode.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB)

topdown_validation: topdown_validation.o $(TESTLIB) $(PAPILIB) instructions_testcode.o
$(CC) -o topdown_validation topdown_validation.o $(TESTLIB) instructions_testcode.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB)

memleak_check: memleak_check.o $(TESTLIB) $(PAPILIB) display_error.o branches_testcode.o
$(CC) -o memleak_check memleak_check.o $(TESTLIB) display_error.o branches_testcode.o $(PAPILIB) $(LDFLAGS) $(LDFLAGS) $(EXTRALIB)
Expand Down
Loading

0 comments on commit 5a972a9

Please sign in to comment.