Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#2297: AARCH64: Implement cbr instrumentation #7005

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions api/samples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -264,11 +264,14 @@ if (X86) # FIXME i#1551, i#1569, i#3544: port to ARM/AArch64/RISCV64
# dr_insert_mbr_instrumentation is NYI
add_sample_client(modxfer "modxfer.c;utils.c" "drmgr;drreg;drx")
add_sample_client(modxfer_app2lib "modxfer_app2lib.c" "drmgr")
add_sample_client(hot_bbcount "hot_bbcount.c" "drmgr;drreg;drbbdup;drx")
endif (X86)
if (X86 OR AARCH64)
# dr_insert_mbr_instrumentation is NYI
add_sample_client(instrcalls "instrcalls.c;utils.c" "drmgr;drsyms;drx")
# dr_insert_cbr_instrument_ex is NYI
add_sample_client(cbrtrace "cbrtrace.c;utils.c" "drmgr;drx")
add_sample_client(hot_bbcount "hot_bbcount.c" "drmgr;drreg;drbbdup;drx")
endif (X86)
endif (X86 OR AARCH64)
add_sample_client(wrap "wrap.c" "drmgr;drwrap")
add_sample_client(signal "signal.c" "drmgr")
add_sample_client(syscall "syscall.c" "drmgr")
Expand Down
53 changes: 44 additions & 9 deletions core/ir/aarch64/instr_create_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -463,12 +463,12 @@
* they just need to know whether they need to preserve the app's flags, so maybe
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please enable the samples that use this which are currently disabled:

  # dr_insert_cbr_instrument_ex is NYI
  add_sample_client(cbrtrace    "cbrtrace.c;utils.c"    "drmgr;drx")
  add_sample_client(hot_bbcount "hot_bbcount.c"         "drmgr;drreg;drbbdup;drx")

Those are run as tests, though w/o targeted correctness checks: just making sure they don't crash.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please enable the count-ctis tests which use this, which will add regression tests.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since count-ctis test requires mbr instrumentation as well, I have added initial implementation for mbr instrumentation. But it sometimes return a very small number, possibly some index into the indirect branch cache? How to convert it back to actual address?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, didn't realize it needed more: was just grepping for tests that use cbr. Makes sense to separate out the mbr. Is it easy to separate in the test? Or just separately locally in the test and confirm cbr works and state that in the PR description and say that the test will be enabled soon when mbr is added and then enable the test in a separate PR for mbr, so long as that comes in relatively soon (i.e., not months later with no cbr test in the meantime).

mbr is supposed to obtain a real address so that sounds like something is wrong there.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please update the PR description to describe how you tested this, since there are no tests added/enabled by this PR (see prior comment which will add some).

* we can just document that this may not write them.
*/
#define XINST_CREATE_slr_s(dc, d, rm_or_imm) \
(opnd_is_reg(rm_or_imm) \
? instr_create_1dst_2src(dc, OP_lsrv, d, d, rm_or_imm) \
: instr_create_1dst_3src(dc, OP_ubfm, d, d, rm_or_imm, \
reg_is_32bit(opnd_get_reg(d)) ? OPND_CREATE_INT(31) \
: OPND_CREATE_INT(63)))
#define XINST_CREATE_slr_s(dc, d, rm_or_imm) \
(opnd_is_reg(rm_or_imm) \
? instr_create_1dst_2src(dc, OP_lsrv, d, d, rm_or_imm) \
: INSTR_CREATE_ubfm(dc, d, d, rm_or_imm, \
reg_is_32bit(opnd_get_reg(d)) ? OPND_CREATE_INT(31) \
: OPND_CREATE_INT(63)))

/**
* This platform-independent macro creates an instr_t for a nop instruction.
Expand Down Expand Up @@ -658,14 +658,49 @@
instr_create_0dst_3src((dc), OP_tbnz, (pc), (reg), (imm))
#define INSTR_CREATE_cmp(dc, rn, rm_or_imm) \
INSTR_CREATE_subs(dc, OPND_CREATE_ZR(rn), rn, rm_or_imm)
#define INSTR_CREATE_eor(dc, d, s) \
INSTR_CREATE_eor_shift(dc, d, d, s, OPND_CREATE_INT8(DR_SHIFT_LSL), \
OPND_CREATE_INT8(0))

/**
* Creates an EOR instruction with one output and two inputs. For simplicity, the first
* input reuses the output register.
*
* \param dc The void * dcontext used to allocate memory for the instr_t.
* \param d The output register and the first input register.
* \param s_or_imm The second input register or immediate.
*/
#define INSTR_CREATE_eor(dc, d, s_or_imm) \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for contributing.

Ideally, every new addition of an INSTR_CREATE_ or XINSTR_CREATE_ should include the Doxygen comment block describing the macro, e.g.

/**                                                                       
 * Creates an EOR instruction with one output and two inputs.             
 * \param dc   The void * dcontext used to allocate memory for the instr_t.
 * \param rd   The output register.                                       
 * \param rn   The first input register.                                  
 * \param rm_or_imm   The second input register or immediate.             
 */

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, added

opnd_is_immed(s_or_imm) \
? instr_create_1dst_2src(dc, OP_eor, d, d, s_or_imm) \
: INSTR_CREATE_eor_shift(dc, d, d, s_or_imm, OPND_CREATE_INT8(DR_SHIFT_LSL), \
OPND_CREATE_INT8(0))
#define INSTR_CREATE_eor_shift(dc, rd, rn, rm, sht, sha) \
instr_create_1dst_4src(dc, OP_eor, rd, rn, \
opnd_create_reg_ex(opnd_get_reg(rm), 0, DR_OPND_SHIFTED), \
opnd_add_flags(sht, DR_OPND_IS_SHIFT), sha)

/**
* Creates a CSINC instruction with one output and three inputs.
*
* \param dc The void * dcontext used to allocate memory for the instr_t.
* \param rd The output register.
* \param rn The first input register.
* \param rm The second input register.
* \param cond The third input condition code.
*/
#define INSTR_CREATE_csinc(dc, rd, rn, rm, cond) \
instr_create_1dst_3src(dc, OP_csinc, rd, rn, rm, cond)

/**
* Creates a UBFM instruction with one output and three inputs.
*
* \param dc The void * dcontext used to allocate memory for the instr_t.
* \param rd The output register.
* \param rn The first input register.
* \param immr The second input immediate.
* \param imms The third input immediate.
*/
#define INSTR_CREATE_ubfm(dc, rd, rn, immr, imms) \
instr_create_1dst_3src(dc, OP_ubfm, rd, rn, immr, imms)

#define INSTR_CREATE_ldp(dc, rt1, rt2, mem) \
instr_create_2dst_1src(dc, OP_ldp, rt1, rt2, mem)
#define INSTR_CREATE_ldr(dc, Rd, mem) instr_create_1dst_1src((dc), OP_ldr, (Rd), (mem))
Expand Down
178 changes: 177 additions & 1 deletion core/lib/instrument.c
Original file line number Diff line number Diff line change
Expand Up @@ -6090,6 +6090,49 @@ dr_insert_mbr_instrumentation(void *drcontext, instrlist_t *ilist, instr_t *inst
#elif defined(RISCV64)
/* FIXME i#3544: Not implemented */
ASSERT_NOT_IMPLEMENTED(false);
#elif defined(AARCH64)
dcontext_t *dcontext = (dcontext_t *)drcontext;
ptr_uint_t address;
reg_id_t target = DR_REG_NULL;
reg_id_t tmp = DR_REG_NULL;
CLIENT_ASSERT(drcontext != NULL,
"dr_insert_mbr_instrumentation: drcontext cannot be NULL");
address = (ptr_uint_t)instr_get_translation(instr);
CLIENT_ASSERT(address != 0,
"dr_insert_mbr_instrumentation: can't determine app address");
CLIENT_ASSERT(instr_is_mbr(instr),
"dr_insert_mbr_instrumentation must be applied to a mbr");

/* For all known mbr instructions on aarch64, the first source register saves the
* target. */
target = opnd_get_reg(instr_get_src(instr, 0));
/* Use tmp register to save target address. */
tmp = (reg_to_pointer_sized(target) == DR_REG_X0) ? DR_REG_X1 : DR_REG_X0;
/* Save old value of tmp register to scratch_slot. */
dr_save_reg(dcontext, ilist, instr, tmp, scratch_slot);
/* Copy target to tmp. */
instr_t *mov =
XINST_CREATE_move(dcontext, opnd_create_reg(tmp), opnd_create_reg(target));
/* PR 214962: ensure we'll properly translate the de-ref of app
* memory by marking the spill and de-ref as INSTR_OUR_MANGLING.
*/
instr_set_our_mangling(mov, true);
MINSERT(ilist, instr, mov);

dr_insert_clean_call_ex(
drcontext, ilist, instr, callee,
/* Many users will ask for mcontexts; some will set; it doesn't seem worth
* asking the user to pass in a flag: if they're using this they are not
* super concerned about overhead.
*/
DR_CLEANCALL_READS_APP_CONTEXT | DR_CLEANCALL_WRITES_APP_CONTEXT, 2,
/* Address of mbr is 1st param. */
OPND_CREATE_INTPTR(address),
/* Indirect target is 2nd param. */
opnd_create_reg(tmp));

/* Restore old value of tmp register. */
dr_restore_reg(dcontext, ilist, instr, tmp, scratch_slot);
#endif /* X86/ARM/RISCV64 */
}

Expand Down Expand Up @@ -6328,7 +6371,140 @@ dr_insert_cbr_instrumentation_help(void *drcontext, instrlist_t *ilist, instr_t
#elif defined(RISCV64)
/* FIXME i#3544: Not implemented */
ASSERT_NOT_IMPLEMENTED(false);
#endif /* X86/ARM/RISCV64 */
#elif defined(AARCH64)
dcontext_t *dcontext = (dcontext_t *)drcontext;
ptr_uint_t address, target;
reg_id_t dir = DR_REG_NULL;
reg_id_t flags = DR_REG_NULL;
int opc;
CLIENT_ASSERT(drcontext != NULL,
"dr_insert_cbr_instrumentation: drcontext cannot be NULL");
address = (ptr_uint_t)instr_get_translation(instr);
CLIENT_ASSERT(address != 0,
"dr_insert_cbr_instrumentation: can't determine app address");
CLIENT_ASSERT(instr_is_cbr(instr),
"dr_insert_cbr_instrumentation must be applied to a cbr");
target = (ptr_uint_t)opnd_get_pc(instr_get_target(instr));

/* Compute branch direction. */
opc = instr_get_opcode(instr);
if (opc == OP_cbnz || opc == OP_cbz) {
/* XXX: which is faster, additional conditional branch or cmp + csinc? */
opnd_t reg_op = instr_get_src(instr, 1);
reg_id_t reg = opnd_get_reg(reg_op);
/* Use dir register to compute direction. */
dir = (reg_to_pointer_sized(reg) == DR_REG_X0) ? DR_REG_X1 : DR_REG_X0;
/* Save old value of dir register to SPILL_SLOT_1. */
dr_save_reg(dcontext, ilist, instr, dir, SPILL_SLOT_1);
/* Use flags register to save nzcv. */
flags = (reg_to_pointer_sized(reg) == DR_REG_X2) ? DR_REG_X3 : DR_REG_X2;
/* Save old value of flags register to SPILL_SLOT_2. */
dr_save_reg(dcontext, ilist, instr, flags, SPILL_SLOT_2);
/* Save flags to flags register. */
dr_save_arith_flags_to_reg(dcontext, ilist, instr, flags);

/* Compare reg against zero. */
instr_t *cmp = INSTR_CREATE_cmp(dcontext, reg_op, OPND_CREATE_INT(0));
MINSERT(ilist, instr, cmp);
/* Compute branch direction. */
opnd_t dir_op = opnd_create_reg(dir);
instr_t *cset = INSTR_CREATE_csinc(
dcontext, dir_op, OPND_CREATE_ZR(dir_op), OPND_CREATE_ZR(dir_op),
opnd_create_cond(opc == OP_cbnz ? DR_PRED_EQ : DR_PRED_NE));
MINSERT(ilist, instr, cset);
} else if (opc == OP_tbnz || opc == OP_tbz) {
opnd_t reg_op = instr_get_src(instr, 1);
reg_id_t reg = opnd_get_reg(reg_op);
reg_id_t dir_same_width = DR_REG_NULL;

/* Use dir register to compute direction. */
dir = (reg_to_pointer_sized(reg) == DR_REG_X0) ? DR_REG_X1 : DR_REG_X0;
dir_same_width = reg_resize_to_opsz(dir, reg_get_size(reg));
/* Save old value of dir register to SPILL_SLOT_1. */
dr_save_reg(dcontext, ilist, instr, dir, SPILL_SLOT_1);

/* Extract tst_bit from reg. */
int tst_bit = opnd_get_immed_int(instr_get_src(instr, 2));
opnd_t dir_same_width_op = opnd_create_reg(dir_same_width);
instr_t *ubfm =
INSTR_CREATE_ubfm(dcontext, dir_same_width_op, reg_op,
OPND_CREATE_INT(tst_bit), OPND_CREATE_INT(tst_bit));
MINSERT(ilist, instr, ubfm);

/* Invert result if tbz. */
if (opc == OP_tbz) {
instr_t *eor =
INSTR_CREATE_eor(dcontext, dir_same_width_op, OPND_CREATE_INT(1));
MINSERT(ilist, instr, eor);
}
} else if (opc == OP_bcond) {
/* Use dir register to compute direction. */
dir = SCRATCH_REG0;
/* Save old value of dir register to SPILL_SLOT_1. */
dr_save_reg(dcontext, ilist, instr, dir, SPILL_SLOT_1);
/* Compute branch direction. */
dr_pred_type_t pred = instr_get_predicate(instr);
opnd_t dir_op = opnd_create_reg(dir);
instr_t *cset = INSTR_CREATE_csinc(
dcontext, dir_op, OPND_CREATE_ZR(dir_op), OPND_CREATE_ZR(dir_op),
opnd_create_cond(instr_invert_predicate(pred)));
MINSERT(ilist, instr, cset);
} else {
CLIENT_ASSERT(false, "unknown conditional branch type");
return;
}

if (has_fallthrough) {
ptr_uint_t fallthrough = address + instr_length(drcontext, instr);
CLIENT_ASSERT(fallthrough > address, "wrong fallthrough address");
dr_insert_clean_call_ex(
drcontext, ilist, instr, callee,
/* Many users will ask for mcontexts; some will set; it doesn't seem worth
* asking the user to pass in a flag: if they're using this they are not
* super concerned about overhead.
*/
DR_CLEANCALL_READS_APP_CONTEXT | DR_CLEANCALL_WRITES_APP_CONTEXT, 5,
/* Address of cbr is 1st parameter. */
OPND_CREATE_INTPTR(address),
/* Target is 2nd parameter. */
OPND_CREATE_INTPTR(target),
/* Fall-through is 3rd parameter. */
OPND_CREATE_INTPTR(fallthrough),
/* Branch direction is 4th parameter. */
opnd_create_reg(dir),
/* User defined data is 5th parameter. */
opnd_is_null(user_data) ? OPND_CREATE_INT32(0) : user_data);
} else {
dr_insert_clean_call_ex(
drcontext, ilist, instr, callee,
/* Many users will ask for mcontexts; some will set; it doesn't seem worth
* asking the user to pass in a flag: if they're using this they are not
* super concerned about overhead.
*/
DR_CLEANCALL_READS_APP_CONTEXT | DR_CLEANCALL_WRITES_APP_CONTEXT, 3,
/* Address of cbr is 1st parameter. */
OPND_CREATE_INTPTR(address),
/* Target is 2nd parameter. */
OPND_CREATE_INTPTR(target),
/* Branch direction is 3rd parameter. */
opnd_create_reg(dir));
}

/* Restore state */
if (opc == OP_cbnz || opc == OP_cbz) {
/* Restore arith flags. */
dr_restore_arith_flags_from_reg(dcontext, ilist, instr, flags);
/* Restore old value of flags register. */
dr_restore_reg(dcontext, ilist, instr, flags, SPILL_SLOT_2);
/* Restore old value of dir register. */
dr_restore_reg(dcontext, ilist, instr, dir, SPILL_SLOT_1);
} else if (opc == OP_bcond || opc == OP_tbnz || opc == OP_tbz) {
/* Restore old value of dir register. */
dr_restore_reg(dcontext, ilist, instr, dir, SPILL_SLOT_1);
} else {
CLIENT_ASSERT(false, "unknown conditional branch type");
}
#endif /* X86/ARM/RISCV64/AARCH64 */
}

DR_API void
Expand Down
8 changes: 5 additions & 3 deletions suite/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2419,13 +2419,15 @@ if (X86) # FIXME i#1551, i#1569: port to ARM and AArch64
client-interface/cleancall.c "" "-thread_private -prof_pcs -opt_cleancall 0" "")
endif (NOT X64)
endif (UNIX)
tobuild_ci(client.syscall client-interface/syscall.c "" "-no_follow_children" "")
tobuild_ci(client.count-bbs client-interface/count-bbs.c "" "" "")
endif (X86)
if (X86 OR AARCH64)
tobuild_ci(client.count-ctis client-interface/count-ctis.c "" "" "")
# check dr_insert_cbr_instrumentation with out-of-line clean call
torunonly_ci(client.count-ctis-noopt client.count-ctis client.count-ctis.dll
client-interface/count-ctis.c "" "-opt_cleancall 0" "")
tobuild_ci(client.syscall client-interface/syscall.c "" "-no_follow_children" "")
tobuild_ci(client.count-bbs client-interface/count-bbs.c "" "" "")
endif (X86)
endif (X86 OR AARCH64)
if (NOT RISCV64) # TODO i#3544: Port tests to RISC-V 64
tobuild_ci(client.cleancallparams client-interface/cleancallparams.c "" "" "")
tobuild_ci(client.app_inscount client-interface/app_inscount.c "" "" "")
Expand Down
31 changes: 31 additions & 0 deletions suite/tests/client-interface/count-ctis.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ main()
#else /* asm code *************************************************************/
# include "asm_defines.asm"
/* clang-format off */
#ifdef X86
START_FILE

#define FUNCNAME test_jecxz
Expand Down Expand Up @@ -95,5 +96,35 @@ GLOBAL_LABEL(FUNCNAME:)


END_FILE
#elif defined(AARCH64)
START_FILE

#define FUNCNAME test_jecxz
DECLARE_FUNC(FUNCNAME)
GLOBAL_LABEL(FUNCNAME:)
mov x1, ARG1
END_PROLOG

/* test cbnz */
mov x1, 0
cbz x1, jecxz_taken
nop
jecxz_taken:
mov x1, 1
cbz x1, jecxz_taken
nop
jecxz_not_taken:
nop

/* test x0 being preserved */
ldr x1, =0xabcd1234
str x1, [x0]

ret
END_FUNC(FUNCNAME)


END_FILE
#endif
/* clang-format on */
#endif
Loading