-
Notifications
You must be signed in to change notification settings - Fork 571
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
i#1569 AArch64: Implement trace optimisation. #2442
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -107,6 +107,8 @@ insert_exit_stub_other_flags(dcontext_t *dcontext, fragment_t *f, | |
/* FIXME i#1575: coarse-grain NYI on ARM */ | ||
ASSERT_NOT_IMPLEMENTED(!TEST(FRAG_COARSE_GRAIN, f->flags)); | ||
if (LINKSTUB_DIRECT(l_flags)) { | ||
/* We put a NOP here for future linking. */ | ||
*pc++ = 0xd503201f; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use a named constant |
||
/* stp x0, x1, [x(stolen), #(offs)] */ | ||
*pc++ = (0xa9000000 | 0 | 1 << 10 | (dr_reg_stolen - DR_REG_X0) << 5 | | ||
TLS_REG0_SLOT >> 3 << 15); | ||
|
@@ -119,6 +121,8 @@ insert_exit_stub_other_flags(dcontext_t *dcontext, fragment_t *f, | |
/* br x1 */ | ||
*pc++ = 0xd61f0000 | 1 << 5; | ||
} else { | ||
/* We put a NOP here for trace building. */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does trace building need a nop? I think there's some code missing from this diff (e.g., fixup_indirect_trace_exit): I can't get a clear picture of what the trace building strategy is on A64 vs x86 and why a nop would help. |
||
*pc++ = 0xd503201f; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use a named constant |
||
/* Stub starts out unlinked. */ | ||
cache_pc exit_target = get_unlinked_entry(dcontext, | ||
EXIT_TARGET_TAG(dcontext, f, l)); | ||
|
@@ -184,9 +188,8 @@ stub_is_patched(fragment_t *f, cache_pc stub_pc) | |
void | ||
unpatch_stub(fragment_t *f, cache_pc stub_pc, bool hot_patch) | ||
{ | ||
/* Restore the stp x0, x1, [x28] instruction. */ | ||
*(uint *)stub_pc = (0xa9000000 | 0 | 1 << 10 | (dr_reg_stolen - DR_REG_X0) << 5 | | ||
TLS_REG0_SLOT >> 3 << 15); | ||
/* Restore the NOP instruction. */ | ||
*(uint *)stub_pc = 0xd503201f; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use a named constant |
||
if (hot_patch) | ||
machine_cache_sync(stub_pc, stub_pc + AARCH64_INSTR_SIZE, true); | ||
} | ||
|
@@ -269,8 +272,14 @@ indirect_linkstub_stub_pc(dcontext_t *dcontext, fragment_t *f, linkstub_t *l) | |
cache_pc cti = EXIT_CTI_PC(f, l); | ||
if (!EXIT_HAS_STUB(l->flags, f->flags)) | ||
return NULL; | ||
ASSERT(decode_raw_is_jmp(dcontext, cti)); | ||
return decode_raw_jmp_target(dcontext, cti); | ||
if (decode_raw_is_jmp(dcontext, cti)) | ||
return decode_raw_jmp_target(dcontext, cti); | ||
/* In trace, we might have cbz/cbnz to indirect linkstubs. */ | ||
if (decode_raw_is_cond_branch_zero(dcontext, cti)) | ||
return decode_raw_cond_branch_zero_target(dcontext, cti); | ||
/* There should be no other types of branch to linkstubs. */ | ||
ASSERT_NOT_REACHED(); | ||
return NULL; | ||
} | ||
|
||
cache_pc | ||
|
@@ -343,12 +352,25 @@ insert_fragment_prefix(dcontext_t *dcontext, fragment_t *f) | |
/* Always use prefix on AArch64 as there is no load to PC. */ | ||
byte *pc = (byte *)f->start_pc; | ||
ASSERT(f->prefix_size == 0); | ||
/* ldr x0, [x(stolen), #(off)] */ | ||
*(uint *)pc = (0xf9400000 | (ENTRY_PC_REG - DR_REG_X0) | | ||
(dr_reg_stolen - DR_REG_X0) << 5 | | ||
ENTRY_PC_SPILL_SLOT >> 3 << 10); | ||
pc += 4; | ||
f->prefix_size = (byte)(((cache_pc)pc) - f->start_pc); | ||
if (use_ibt_prefix(f->flags)) { | ||
/* ldr x0, [x(stolen), #(off)] */ | ||
*(uint *)pc = (0xf9400000 | (ENTRY_PC_REG - DR_REG_X0) | | ||
(dr_reg_stolen - DR_REG_X0) << 5 | | ||
ENTRY_PC_SPILL_SLOT >> 3 << 10); | ||
pc += 4; | ||
f->prefix_size = (byte)((cache_pc)pc - f->start_pc); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code looks identical to the else code below which looks identical to the original code: so the original was sufficient and there's nothing to change here, right? |
||
} else { | ||
#ifdef CLIENT_INTERFACE | ||
/* FIXME i#1569 */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does this FIXME refer to? Is there something incomplete about the code below? (i#1569 is very general so it doesn't help. The comment should explain.) |
||
#endif | ||
/* ldr x0, [x(stolen), #(off)] */ | ||
*(uint *)pc = (0xf9400000 | (ENTRY_PC_REG - DR_REG_X0) | | ||
(dr_reg_stolen - DR_REG_X0) << 5 | | ||
ENTRY_PC_SPILL_SLOT >> 3 << 10); | ||
pc += 4; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. s/4/AARCH64_INSTR_SIZE/ |
||
f->prefix_size = (byte)((cache_pc)pc - f->start_pc); | ||
} | ||
/* Make sure emitted size matches size we requested. */ | ||
ASSERT(f->prefix_size == fragment_prefix_size(f->flags)); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1328,6 +1328,11 @@ update_indirect_exit_stub(dcontext_t *dcontext, fragment_t *f, linkstub_t *l) | |
int | ||
fragment_prefix_size(uint flags) | ||
{ | ||
#ifdef AARCH64 | ||
/* For AArch64, there is no need to save the flags | ||
* so we always have the same ibt prefix. */ | ||
return fragment_ibt_prefix_size(flags); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this code needs to be here: use_ibt_prefix should be true always for AARCH64, right? So this routine was already returning fragment_ibt_prefix_size(flags). Cleaner to not have an ifdef. |
||
#endif | ||
if (use_ibt_prefix(flags)) { | ||
return fragment_ibt_prefix_size(flags); | ||
} else { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -295,7 +295,7 @@ typedef enum _dr_pred_type_t { | |
#define PREFIX_PRED_BITS 5 | ||
#define PREFIX_PRED_BITPOS (32 - PREFIX_PRED_BITS) | ||
#define PREFIX_PRED_MASK \ | ||
(((1 << PREFIX_PRED_BITS)-1) << PREFIX_PRED_BITPOS) /*0xf8000000 */ | ||
((((uint)1 << PREFIX_PRED_BITS)-1) << PREFIX_PRED_BITPOS) /*0xf8000000 */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or just 1U |
||
/* DR_API EXPORT BEGIN */ | ||
|
||
/** | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -705,7 +705,8 @@ instr_get_predicate(instr_t *instr) | |
instr_t * | ||
instr_set_predicate(instr_t *instr, dr_pred_type_t pred) | ||
{ | ||
instr->prefixes |= ((pred << PREFIX_PRED_BITPOS) & PREFIX_PRED_MASK); | ||
instr->prefixes = ((instr->prefixes & ~PREFIX_PRED_MASK ) | | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks incorrect: I think there are other bits in prefixes besides predicates, which this routine should not clobber |
||
((pred << PREFIX_PRED_BITPOS) & PREFIX_PRED_MASK)); | ||
return instr; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But linking was already working fine, so it is unclear why we'd need a nop now. Please elaborate in the comment -- maybe best in the pre-function comment. Putting an asm listing is also helpful (esp with this stub getting very large: 8 instructions now). This is unusual.