Skip to content

Commit

Permalink
i#3699 ARM: Add padding to priv_mcontext_t for 8-byte alignment. (#7191)
Browse files Browse the repository at this point in the history
ef4482e caused a few additional failures. In particular, the child of
a clone3 system call started with the register values shifted: R2 got
the value that should be in R1 and so on. That caused the test
linux.clone to fail.

It turns out that some callers of insert_{push,pop}_all_registers expect
that an unpadded priv_mcontext_t will be pushed/popped. Moving the
4-byte padding to above the struct instead of below it makes some tests
pass but breaks others. So we add some padding in the middle of
priv_mcontext_t, changing its size from 324 to 328. It will probably run
slightly faster with the struct (and particularly the field "simd")
8-byte aligned.

Update insert_{push,pop}_all_registers and arm.asm for the new struct
layout, with some additional changes to arm.asm:

+ Delete irrelevant X64 macros.

+ Document the macros more clearly (they can be found with grep).

+ Change SP only at start and end of function (standard practice, makes
debugging easier).

+ Avoid writing below SP (compatible with signal handlers).

This change breaks compatibility so the version is increased to 11.90.

Issue: #3699
  • Loading branch information
egrimley-arm authored Jan 17, 2025
1 parent d3a91d6 commit 2dfd7cb
Show file tree
Hide file tree
Showing 7 changed files with 76 additions and 72 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
# We only use a non-zero build # when making multiple manual builds in one day.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/ci-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ jobs:
# We only use a non-zero build # when making multiple manual builds in one day.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -196,7 +196,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -285,7 +285,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -374,7 +374,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -457,7 +457,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -541,7 +541,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER="11.3.$((`git log -n 1 --format=%ct` / (60*60*24)))"
export VERSION_NUMBER="11.90.$((`git log -n 1 --format=%ct` / (60*60*24)))"
export PREFIX="cronbuild-"
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ endif (EXISTS "${PROJECT_SOURCE_DIR}/.svn")

# N.B.: When updating this, update all the default versions in ci-package.yml
# and ci-docs.yml. We should find a way to share (xref i#1565).
set(VERSION_NUMBER_DEFAULT "11.3.${VERSION_NUMBER_PATCHLEVEL}")
set(VERSION_NUMBER_DEFAULT "11.90.${VERSION_NUMBER_PATCHLEVEL}")
# do not store the default VERSION_NUMBER in the cache to prevent a stale one
# from preventing future version updates in a pre-existing build dir
set(VERSION_NUMBER "" CACHE STRING "Version number: leave empty for default")
Expand Down
4 changes: 3 additions & 1 deletion api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,9 @@ clients.

The changes between version \DR_VERSION and 11.3.0 include the following compatibility
changes:
- No compatibility changes yet.
- The size of #dr_mcontext_t on 32-bit Arm has been increased by 4 so that
the struct can be pushed onto the 8-byte aligned stack without additional
padding. The offset of the field "simd" has changed.

Further non-compatibility-affecting changes include:
- Added support for reading a single drmemtrace trace file from stdin
Expand Down
20 changes: 10 additions & 10 deletions core/arch/aarchxx/mangle.c
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,11 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci,
ASSERT(proc_num_simd_registers() == MCXT_NUM_SIMD_SLOTS);
ASSERT(ALIGNED(dstack_offs, get_ABI_stack_alignment()));

/* padding */
PRE(ilist, instr,
XINST_CREATE_sub(dcontext, opnd_create_reg(DR_REG_SP), OPND_CREATE_INT(XSP_SZ)));
dstack_offs += XSP_SZ;

/* pc and aflags */
if (cci->skip_save_flags) {
/* even if we skip flag saves we want to keep mcontext shape */
Expand Down Expand Up @@ -642,7 +647,6 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci,
if (spill)
PRE(ilist, instr, instr_create_restore_from_tls(dcontext, scratch, slot));
}
ASSERT(ALIGNED(dstack_offs, get_ABI_stack_alignment()));

/* We rely on dr_get_mcontext_priv() to fill in the app's stolen reg value
* and sp value.
Expand All @@ -668,12 +672,6 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci,
DR_REG_LIST_LENGTH_ARM, DR_REG_LIST_ARM));
dstack_offs += DR_REG_LIST_LENGTH_ARM * XSP_SZ;
}
/* Make dstack_offs 8-byte aligned as we have just pushed an odd
* number of 4-byte registers.
*/
PRE(ilist, instr,
XINST_CREATE_sub(dcontext, opnd_create_reg(DR_REG_SP), OPND_CREATE_INT(XSP_SZ)));
dstack_offs += XSP_SZ;
ASSERT(ALIGNED(dstack_offs, get_ABI_stack_alignment()));

#endif
Expand Down Expand Up @@ -779,9 +777,6 @@ insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, instrlist
}

#else
/* This undoes the XINST_CREATE_sub done for alignment in XINST_CREATE_sub. */
PRE(ilist, instr,
XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_SP), OPND_CREATE_INT(XSP_SZ)));
/* We rely on dr_set_mcontext_priv() to set the app's stolen reg value,
* and the stack swap to set the sp value: we assume the stolen reg on
* the stack still has our TLS base in it.
Expand Down Expand Up @@ -816,6 +811,11 @@ insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, instrlist
OPND_CREATE_INT_MSR_NZCVQG(), opnd_create_reg(scratch)));
PRE(ilist, instr, instr_create_restore_from_tls(dcontext, scratch, slot));
}

/* padding */
PRE(ilist, instr,
XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_SP), OPND_CREATE_INT(XSP_SZ)));

/* FIXME i#1551: once we have cci->num_simd_skip, skip this if possible */
PRE(ilist, instr,
INSTR_CREATE_vldm_wb(dcontext, OPND_CREATE_MEMLIST(DR_REG_SP), SIMD_REG_LIST_LEN,
Expand Down
107 changes: 54 additions & 53 deletions core/arch/arm/arm.asm
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/* **********************************************************
* Copyright (c) 2014-2022 Google, Inc. All rights reserved.
* Copyright (c) 2025 Arm Limited. All rights reserved.
* ********************************************************** */

/*
Expand Down Expand Up @@ -54,31 +55,17 @@ DECL_EXTERN(initstack_mutex)
#define SAVE_TO_DCONTEXT_VIA_REG(reg,offs,src) str src, PTRSZ [reg, POUND (offs)]

/* offsetof(dcontext_t, dstack) */
#define dstack_OFFSET 0x16c
#define dstack_OFFSET 0x170
/* offsetof(dcontext_t, is_exiting) */
#define is_exiting_OFFSET (dstack_OFFSET+1*ARG_SZ)

#ifdef X64
# define MCXT_NUM_SIMD_SLOTS 32
# define SIMD_REG_SIZE 64
# define MCXT_NUM_PRED_SLOTS 17 /* P regs + FFR */
# define PRED_REG_SIZE 8
# define NUM_GPR_SLOTS 33 /* incl flags */
# define GPR_REG_SIZE 8
#else
# define MCXT_NUM_SIMD_SLOTS 16
# define SIMD_REG_SIZE 16
# define MCXT_NUM_PRED_SLOTS 0
# define PRED_REG_SIZE 0
# define NUM_GPR_SLOTS 17 /* incl flags */
# define GPR_REG_SIZE 4
#endif
#define PRE_SIMD_PADDING 0
#define PRIV_MCXT_SIMD_SIZE (PRE_SIMD_PADDING + MCXT_NUM_SIMD_SLOTS*SIMD_REG_SIZE \
+ MCXT_NUM_PRED_SLOTS*PRED_REG_SIZE)
#define PRIV_MCXT_SIZE (NUM_GPR_SLOTS*GPR_REG_SIZE + PRIV_MCXT_SIMD_SIZE)
#define PRIV_MCXT_SP_FROM_SIMD (-(4*GPR_REG_SIZE)) /* flags, pc, lr, then sp */
#define PRIV_MCXT_PC_FROM_SIMD (-(2*GPR_REG_SIZE)) /* flags, then pc */
/* The struct priv_mcontext_t is defined in mcxtx_api.h. */
#define PRIV_MCXT_SIZE 0x148 /* sizeof(priv_mcontext_t) */
#define PRIV_MCXT_SP_OFFSET 0x34 /* offsetof(priv_mcontext_t, sp) */
#define PRIV_MCXT_LR_OFFSET 0x38 /* offsetof(priv_mcontext_t, lr) */
#define PRIV_MCXT_PC_OFFSET 0x3c /* offsetof(priv_mcontext_t, pc) */
#define PRIV_MCXT_CPSR_OFFSET 0x40 /* offsetof(priv_mcontext_t, cpsr) */
#define PRIV_MCXT_SIMD_OFFSET 0x48 /* offsetof(priv_mcontext_t, simd) */

#ifndef UNIX
# error Non-Unix is not supported
Expand Down Expand Up @@ -181,24 +168,31 @@ GLOBAL_LABEL(dr_call_on_clean_stack:)
#ifdef DR_APP_EXPORTS
DECLARE_EXPORTED_FUNC(dr_app_start)
GLOBAL_LABEL(dr_app_start:)
push {lr}
vstmdb sp!, {d16-d31}
vstmdb sp!, {d0-d15}
mrs REG_R0, cpsr /* r0 is scratch */
push {REG_R0}
/* We can't push all regs w/ writeback */
stmdb sp, {REG_R0-r15}
str lr, [sp, #(PRIV_MCXT_PC_FROM_SIMD+4)] /* +4 b/c we pushed cpsr */
/* we need the sp at function entry */
mov REG_R0, sp
add REG_R0, REG_R0, #(PRIV_MCXT_SIMD_SIZE + 8) /* offset simd,cpsr,lr */
str REG_R0, [sp, #(PRIV_MCXT_SP_FROM_SIMD+4)] /* +4 b/c we pushed cpsr */
sub sp, sp, #(PRIV_MCXT_SIZE-PRIV_MCXT_SIMD_SIZE-4) /* simd,cpsr */
mov REG_R0, sp
#if PRIV_MCXT_SIZE % 8 != 0
# error Size of priv_mcontext_t should be 8-byte aligned.
#endif
/* space for mcontext + padding + LR (stack must be 8-byte aligned */
sub sp, sp, #(PRIV_MCXT_SIZE + 8)
str lr, [sp, #(PRIV_MCXT_SIZE + 4)]
#if PRIV_MCXT_R0_OFFSET != 0
# error
#endif
stmia sp, {r0-r12}
add r0, sp, #(PRIV_MCXT_SIZE + 8) /* get SP at function entry */
str r0, [sp, #PRIV_MCXT_SP_OFFSET]
str lr, [sp, #PRIV_MCXT_LR_OFFSET]
str lr, [sp, #PRIV_MCXT_PC_OFFSET] /* save LR as PC */
mrs r0, cpsr
str r0, [sp, #PRIV_MCXT_CPSR_OFFSET]
add r0, sp, #PRIV_MCXT_SIMD_OFFSET
vstmia r0!, {d0-d15}
vstmia r0!, {d16-d31}
mov r0, sp
CALLC1(GLOBAL_REF(dr_app_start_helper), REG_R0)
/* if we get here, DR is not taking over */
ldr lr, [sp, #(PRIV_MCXT_SIZE + 4)]
add sp, sp, #PRIV_MCXT_SIZE
pop {pc}
bx lr
END_FUNC(dr_app_start)

/*
Expand Down Expand Up @@ -230,25 +224,32 @@ GLOBAL_LABEL(dr_app_running_under_dynamorio:)
*/
DECLARE_EXPORTED_FUNC(dynamorio_app_take_over)
GLOBAL_LABEL(dynamorio_app_take_over:)
push {lr}
vstmdb sp!, {d16-d31}
vstmdb sp!, {d0-d15}
mrs REG_R0, cpsr /* r0 is scratch */
push {REG_R0}
/* We can't push all regs w/ writeback */
stmdb sp, {REG_R0-r15}
str lr, [sp, #(PRIV_MCXT_PC_FROM_SIMD+4)] /* +4 b/c we pushed cpsr */
/* we need the sp at function entry */
mov REG_R0, sp
add REG_R0, REG_R0, #(PRIV_MCXT_SIMD_SIZE + 8) /* offset simd,cpsr,lr */
str REG_R0, [sp, #(PRIV_MCXT_SP_FROM_SIMD+4)] /* +4 b/c we pushed cpsr */
sub sp, sp, #(PRIV_MCXT_SIZE-PRIV_MCXT_SIMD_SIZE-4) /* simd,cpsr */
mov REG_R0, sp
#if PRIV_MCXT_SIZE % 8 != 0
# error Size of priv_mcontext_t should be 8-byte aligned.
#endif
/* space for mcontext + padding + LR (stack must be 8-byte aligned */
sub sp, sp, #(PRIV_MCXT_SIZE + 8)
str lr, [sp, #(PRIV_MCXT_SIZE + 4)]
#if PRIV_MCXT_R0_OFFSET != 0
# error
#endif
stmia sp, {r0-r12}
add r0, sp, #(PRIV_MCXT_SIZE + 8) /* get SP at function entry */
str r0, [sp, #PRIV_MCXT_SP_OFFSET]
str lr, [sp, #PRIV_MCXT_LR_OFFSET]
str lr, [sp, #PRIV_MCXT_PC_OFFSET] /* save LR as PC */
mrs r0, cpsr
str r0, [sp, #PRIV_MCXT_CPSR_OFFSET]
add r0, sp, #PRIV_MCXT_SIMD_OFFSET
vstmia r0!, {d0-d15}
vstmia r0!, {d16-d31}
mov r0, sp
CALLC1(GLOBAL_REF(dynamorio_app_take_over_helper), REG_R0)
/* if we get here, DR is not taking over */
ldr lr, [sp, #(PRIV_MCXT_SIZE + 4)]
add sp, sp, #PRIV_MCXT_SIZE
pop {pc}
END_FUNC(dynamorio_app_take_over)
bx lr
END_FUNC(dr_app_start)


/*
Expand Down
1 change: 1 addition & 0 deletions core/lib/mcxtx_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@
uint apsr; /**< The application program status registers in AArch32. */
uint cpsr; /**< The current program status registers in AArch32. */
}; /**< The anonymous union of alternative names for apsr/cpsr register. */
byte padding[4]; /**< The padding to get simd field 8-byte aligned. */
# endif /* 64/32-bit */

# ifdef X64 /* 64-bit */
Expand Down

0 comments on commit 2dfd7cb

Please sign in to comment.