Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#4487: inline instr count for trace_after_instrs in drcachesim, AArch64 #4677

Merged
merged 12 commits into from
Feb 1, 2021
Merged
2 changes: 2 additions & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ Further non-compatibility-affecting changes include:
- Added instr_is_exclusive_load().
- Added XINST_CREATE_store_pair() and XINST_CREATE_load_pair().
- Added instr_replace_reg_resize() and opnd_replace_reg_resize().
- Added DRX_COUNTER_REL_ACQ flag to optionally enable release-acquire semantics for
sapostolakis marked this conversation as resolved.
Show resolved Hide resolved
drx_insert_counter_update() on AArchXX.

**************************************************
<hr>
Expand Down
73 changes: 67 additions & 6 deletions clients/drcachesim/tracer/tracer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1320,10 +1320,10 @@ static bool tracing_enabled;
static volatile bool tracing_scheduled;
static void *schedule_tracing_lock;

#ifdef X86_64
#if defined(X86_64) || defined(AARCH64)
# define DELAYED_CHECK_INLINED 1
#else
// XXX i#4487: we don't have the inlining implemented yet.
/* XXX we don't have the inlining implemented yet for 32-bit architectures. */
#endif

static dr_emit_flags_t
Expand Down Expand Up @@ -1448,12 +1448,16 @@ event_delay_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t
num_instrs = (uint)(ptr_uint_t)user_data;
drmgr_disable_auto_predication(drcontext, bb);
#ifdef DELAYED_CHECK_INLINED
# ifdef X86_64
# if defined(X86_64) || defined(AARCH64)
instr_t *skip_call = INSTR_CREATE_label(drcontext);
# ifdef X86_64
if (!drx_insert_counter_update(drcontext, bb, instr,
(dr_spill_slot_t)(SPILL_SLOT_MAX + 1) /*use drmgr*/,
&instr_count, num_instrs, DRX_COUNTER_64BIT))
DR_ASSERT(false);
instr_t *skip_call = INSTR_CREATE_label(drcontext);

if (drreg_reserve_aflags(drcontext, bb, instr) != DRREG_SUCCESS)
FATAL("Fatal error: failed to reserve aflags");
derekbruening marked this conversation as resolved.
Show resolved Hide resolved
reg_id_t scratch = DR_REG_NULL;
if (op_trace_after_instrs.get_value() < INT_MAX) {
MINSERT(bb, instr,
Expand All @@ -1469,20 +1473,77 @@ event_delay_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t
opnd_create_reg(scratch)));
}
MINSERT(bb, instr, INSTR_CREATE_jcc(drcontext, OP_jl, opnd_create_instr(skip_call)));
# elif defined(AARCH64)
if (!drx_insert_counter_update(drcontext, bb, instr,
(dr_spill_slot_t)(SPILL_SLOT_MAX + 1) /*use drmgr*/,
(dr_spill_slot_t)(SPILL_SLOT_MAX + 1), &instr_count,
num_instrs, DRX_COUNTER_64BIT | DRX_COUNTER_REL_ACQ))
DR_ASSERT(false);

reg_id_t scratch1, scratch2;
if (drreg_reserve_register(drcontext, bb, instr, NULL, &scratch1) != DRREG_SUCCESS ||
drreg_reserve_register(drcontext, bb, instr, NULL, &scratch2) != DRREG_SUCCESS ||
drreg_reserve_aflags(drcontext, bb, instr) != DRREG_SUCCESS)
FATAL("Fatal error: failed to reserve scratch registers and aflags");

instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)&instr_count,
opnd_create_reg(scratch1), bb, instr, NULL, NULL);
MINSERT(bb, instr,
XINST_CREATE_load(drcontext, opnd_create_reg(scratch2),
OPND_CREATE_MEMPTR(scratch1, 0)));
instrlist_insert_mov_immed_ptrsz(drcontext, op_trace_after_instrs.get_value(),
opnd_create_reg(scratch1), bb, instr, NULL, NULL);
MINSERT(bb, instr,
XINST_CREATE_cmp(drcontext, opnd_create_reg(scratch2),
opnd_create_reg(scratch1)));
MINSERT(bb, instr,
XINST_CREATE_jump_cond(drcontext, DR_PRED_LT, opnd_create_instr(skip_call)));
# endif

derekbruening marked this conversation as resolved.
Show resolved Hide resolved
/* hit_instr_count_threshold does not always return. Restore scratch registers and
* aflags.
*/
# ifdef X86_64
drreg_statelessly_restore_app_value(drcontext, bb, DR_REG_NULL, instr, instr, NULL,
NULL);
if (scratch != DR_REG_NULL) {
drreg_statelessly_restore_app_value(drcontext, bb, scratch, instr, instr, NULL,
NULL);
}
# elif defined(AARCH64)
drreg_statelessly_restore_app_value(drcontext, bb, scratch1, instr, instr, NULL,
NULL);
drreg_statelessly_restore_app_value(drcontext, bb, scratch2, instr, instr, NULL,
NULL);
drreg_statelessly_restore_app_value(drcontext, bb, DR_REG_NULL, instr, instr, NULL,
NULL);
# endif
dr_insert_clean_call(drcontext, bb, instr, (void *)hit_instr_count_threshold,
false /*fpstate */, 1,
OPND_CREATE_INTPTR((ptr_uint_t)instr_get_app_pc(instr)));
MINSERT(bb, instr, skip_call);

# ifdef X86_64
if (drreg_unreserve_aflags(drcontext, bb, instr) != DRREG_SUCCESS)
DR_ASSERT(false);
if (scratch != DR_REG_NULL) {
if (drreg_unreserve_register(drcontext, bb, instr, scratch) != DRREG_SUCCESS)
DR_ASSERT(false);
}
# elif defined(AARCH64)
if (drreg_unreserve_register(drcontext, bb, instr, scratch1) != DRREG_SUCCESS ||
drreg_unreserve_register(drcontext, bb, instr, scratch2) != DRREG_SUCCESS ||
drreg_unreserve_aflags(drcontext, bb, instr) != DRREG_SUCCESS)
DR_ASSERT(false);
# endif
# else
# error NYI
# endif
#else
// XXX: drx_insert_counter_update doesn't support 64-bit, and there's no
// XINST_CREATE_load_8bytes. For now we pay the cost of a clean call every time.
/* XXX: drx_insert_counter_update doesn't support 64-bit counters for ARM_32, and
* inlining of check_instr_count_threshold is not implemented for i386. For now we pay
* the cost of a clean call every time for 32-bit architectures.
*/
dr_insert_clean_call(drcontext, bb, instr, (void *)check_instr_count_threshold,
false /*fpstate */, 2, OPND_CREATE_INT32(num_instrs),
OPND_CREATE_INTPTR((ptr_uint_t)instr_get_app_pc(instr)));
Expand Down
4 changes: 4 additions & 0 deletions core/ir/aarch64/instr_create.h
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,10 @@ enum {
#define INSTR_CREATE_stur(dc, mem, rt) instr_create_1dst_1src(dc, OP_stur, mem, rt)
#define INSTR_CREATE_sturh(dc, mem, rt) instr_create_1dst_1src(dc, OP_sturh, mem, rt)
/* TODO i#4532: Remove these superfluous 0x1f non-operands. */
#define INST_CREATE_stlr(dc, mem, rt) \
instr_create_1dst_3src(dc, OP_stlr, mem, rt, OPND_CREATE_INT(0x1f), \
OPND_CREATE_INT(0x1f))
/* TODO i#4532: Remove these superfluous 0x1f non-operands. */
#define INSTR_CREATE_stxr(dc, mem, rs, rt) \
instr_create_2dst_2src(dc, OP_stxr, mem, rs, rt, OPND_CREATE_INT(0x1f))
/* TODO i#4532: Remove these superfluous 0x1f non-operands. */
Expand Down
49 changes: 41 additions & 8 deletions ext/drx/drx.c
Original file line number Diff line number Diff line change
Expand Up @@ -528,8 +528,10 @@ drx_insert_counter_update(void *drcontext, instrlist_t *ilist, instr_t *where,
}
}
#elif defined(AARCHXX)
# ifdef ARM
/* FIXME i#1551: implement 64-bit counter support */
ASSERT(!is_64, "DRX_COUNTER_64BIT is not implemented");
ASSERT(!is_64, "DRX_COUNTER_64BIT is not implemented for ARM_32");
# endif /* ARM */

if (use_drreg) {
if (drreg_reserve_register(drcontext, ilist, where, NULL, &reg1) !=
Expand All @@ -556,16 +558,47 @@ drx_insert_counter_update(void *drcontext, instrlist_t *ilist, instr_t *where,
* address being near this one, and add to reg1 instead of
* taking 2 instrs to load it fresh.
*/
/* Update the counter either with release-acquire semantics (when the
* DRX_COUNTER_REL_ACQ flag is on) or without any barriers.
*/
instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)addr, opnd_create_reg(reg1),
ilist, where, NULL, NULL);
MINSERT(
ilist, where,
XINST_CREATE_load(drcontext, opnd_create_reg(reg2), OPND_CREATE_MEMPTR(reg1, 0)));
MINSERT(ilist, where,
if (TEST(DRX_COUNTER_REL_ACQ, flags)) {
# ifdef AARCH64
MINSERT(ilist, where,
INSTR_CREATE_ldar(drcontext, opnd_create_reg(reg2),
OPND_CREATE_MEMPTR(reg1, 0)));
MINSERT(
ilist, where,
XINST_CREATE_add(drcontext, opnd_create_reg(reg2), OPND_CREATE_INT(value)));
MINSERT(ilist, where,
XINST_CREATE_store(drcontext, OPND_CREATE_MEMPTR(reg1, 0),
opnd_create_reg(reg2)));
MINSERT(ilist, where,
INST_CREATE_stlr(drcontext, OPND_CREATE_MEMPTR(reg1, 0),
opnd_create_reg(reg2)));
# else /* ARM */
/* TODO: This counter update has not been tested on a ARM_32 machine. */
MINSERT(ilist, where,
XINST_CREATE_load(drcontext, opnd_create_reg(reg2),
OPND_CREATE_MEMPTR(reg1, 0)));
MINSERT(ilist, where, INSTR_CREATE_dmb(drcontext, OPND_CREATE_INT(DR_DMB_ISH)));
MINSERT(
ilist, where,
XINST_CREATE_add(drcontext, opnd_create_reg(reg2), OPND_CREATE_INT(value)));
MINSERT(ilist, where, INSTR_CREATE_dmb(drcontext, OPND_CREATE_INT(DR_DMB_ISH)));
MINSERT(ilist, where,
XINST_CREATE_store(drcontext, OPND_CREATE_MEMPTR(reg1, 0),
opnd_create_reg(reg2)));
# endif
} else {
MINSERT(ilist, where,
XINST_CREATE_load(drcontext, opnd_create_reg(reg2),
OPND_CREATE_MEMPTR(reg1, 0)));
MINSERT(
ilist, where,
XINST_CREATE_add(drcontext, opnd_create_reg(reg2), OPND_CREATE_INT(value)));
MINSERT(ilist, where,
XINST_CREATE_store(drcontext, OPND_CREATE_MEMPTR(reg1, 0),
opnd_create_reg(reg2)));
}
if (use_drreg) {
if (drreg_unreserve_register(drcontext, ilist, where, reg1) != DRREG_SUCCESS ||
drreg_unreserve_register(drcontext, ilist, where, reg2) != DRREG_SUCCESS)
Expand Down
11 changes: 7 additions & 4 deletions ext/drx/drx.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,9 @@ drx_aflags_are_dead(instr_t *where);

/** Flags for \p drx_insert_counter_update */
enum {
DRX_COUNTER_64BIT = 0x01, /**< 64-bit counter is used for update. */
DRX_COUNTER_LOCK = 0x10, /**< Counter update is atomic. */
DRX_COUNTER_64BIT = 0x01, /**< 64-bit counter is used for update. */
DRX_COUNTER_REL_ACQ = 0x02, /**< Release-acquire semantics for counter update. */
DRX_COUNTER_LOCK = 0x10, /**< Counter update is atomic. */
};

DR_EXPORT
Expand All @@ -159,8 +160,10 @@ DR_EXPORT
* \note The counter update is racy (i.e., not synchronized among threads)
* unless #DRX_COUNTER_LOCK is specified in \p flags. When #DRX_COUNTER_LOCK
* is set, the instrumentation may fail if a 64-bit counter is updated in
* a 32-bit application or the counter crosses cache lines. Currently,
* #DRX_COUNTER_LOCK is not yet supported on ARM.
* a 32-bit application or the counter crosses cache lines. Currently, #DRX_COUNTER_LOCK
* is not yet supported on AArchXX. For AArchXX, if #DRX_COUNTER_REL_ACQ is specified in
* \p flags, release-acquire semantics are enforced for the counter update. The
* #DRX_COUNTER_REL_ACQ flag can be used in conjunction with #DRX_COUNTER_64BIT.
*
* \note To update multiple counters at the same place, multiple
* drx_insert_counter_update() invocations should be made in a row with the
Expand Down
24 changes: 24 additions & 0 deletions suite/tests/client-interface/drx-test.dll.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ static uint counterB;
static uint counterC;
static uint counterD;
#endif
#if defined(AARCH64)
static uint64 counterE;
static uint64 counterF;
#endif
#if defined(AARCHXX)
static uint counterG;
#endif

static void
event_exit(void)
Expand All @@ -61,6 +68,13 @@ event_exit(void)
CHECK(counterB == 2 * counterA, "counter inc messed up");
#if defined(ARM)
CHECK(counterD == 2 * counterA, "counter inc messed up");
#endif
#if defined(AARCH64)
CHECK(counterE == 2 * counterA, "64-bit counter inc messed up");
CHECK(counterF == 2 * counterA, "64-bit counter inc with acq_rel messed up");
#endif
#if defined(AARCHXX)
CHECK(counterG == 2 * counterA, "32-bit counter inc with acq_rel messed up");
#endif
dr_fprintf(STDERR, "event_exit\n");
}
Expand Down Expand Up @@ -114,6 +128,16 @@ event_basic_block(void *drcontext, void *tag, instrlist_t *bb, bool for_trace,
drx_insert_counter_update(drcontext, bb, first, SPILL_SLOT_1,
IF_NOT_X86_(SPILL_SLOT_2) & counterD, 2,
IF_X86_ELSE(DRX_COUNTER_LOCK, 0));
#endif
#if defined(AARCH64)
drx_insert_counter_update(drcontext, bb, first, SPILL_SLOT_1, SPILL_SLOT_2, &counterE,
2, DRX_COUNTER_64BIT);
drx_insert_counter_update(drcontext, bb, first, SPILL_SLOT_1, SPILL_SLOT_2, &counterF,
2, DRX_COUNTER_64BIT | DRX_COUNTER_REL_ACQ);
#endif
#if defined(AARCHXX)
drx_insert_counter_update(drcontext, bb, first, SPILL_SLOT_1, SPILL_SLOT_2, &counterG,
2, DRX_COUNTER_REL_ACQ);
#endif
/* Exercise drx's basic block termination with a zero-cost label */
drx_tail_pad_block(drcontext, bb);
Expand Down
24 changes: 24 additions & 0 deletions suite/tests/client-interface/drxmgr-test.dll.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@

static uint counterA;
static uint counterB;
#if defined(AARCH64)
static uint64 counterC;
static uint64 counterD;
#endif
#if defined(AARCHXX)
static uint counterE;
#endif

static void
event_exit(void)
Expand All @@ -55,6 +62,13 @@ event_exit(void)
drreg_exit();
drmgr_exit();
CHECK(counterB == 3 * counterA, "counter inc messed up");
#if defined(AARCH64)
CHECK(counterC == 3 * counterA, "64-bit counter inc messed up");
CHECK(counterD == 3 * counterA, "64-bit counter inc with acq_rel messed up");
#endif
#if defined(AARCHXX)
CHECK(counterE == 3 * counterA, "32-bit counter inc with acq_rel messed up");
#endif
dr_fprintf(STDERR, "event_exit\n");
}

Expand All @@ -69,6 +83,16 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst
IF_NOT_X86_(SPILL_SLOT_MAX + 1) & counterA, 1, 0);
drx_insert_counter_update(drcontext, bb, inst, SPILL_SLOT_MAX + 1,
IF_NOT_X86_(SPILL_SLOT_MAX + 1) & counterB, 3, 0);
#if defined(AARCH64)
drx_insert_counter_update(drcontext, bb, inst, SPILL_SLOT_MAX + 1, SPILL_SLOT_MAX + 1,
&counterC, 3, DRX_COUNTER_64BIT);
sapostolakis marked this conversation as resolved.
Show resolved Hide resolved
drx_insert_counter_update(drcontext, bb, inst, SPILL_SLOT_MAX + 1, SPILL_SLOT_MAX + 1,
&counterD, 3, DRX_COUNTER_64BIT | DRX_COUNTER_REL_ACQ);
#endif
#if defined(AARCHXX)
drx_insert_counter_update(drcontext, bb, inst, SPILL_SLOT_MAX + 1, SPILL_SLOT_MAX + 1,
&counterE, 3, DRX_COUNTER_REL_ACQ);
#endif
return DR_EMIT_DEFAULT;
}

Expand Down