Skip to content

Commit 1a1fdf3

Browse files
committed
rt: Add lots of documentation to __morestack
1 parent 7359fa4 commit 1a1fdf3

File tree

2 files changed

+139
-42
lines changed

2 files changed

+139
-42
lines changed

src/rt/arch/i386/morestack.S

Lines changed: 116 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,70 @@
1-
.text
2-
3-
// __morestack
4-
//
5-
// LLVM generates a call to this to allocate more stack space in a function
6-
// prolog when we run out.
1+
/*
2+
__morestack
3+
4+
This function implements stack growth using the mechanism
5+
devised by Ian Lance Taylor for gccgo, described here:
6+
7+
http://gcc.gnu.org/wiki/SplitStacks
8+
9+
The Rust stack is composed of a linked list of stack segments,
10+
and each stack segment contains two parts: the work area,
11+
where Rust functions are allowed to execute; and the red zone,
12+
where no Rust code can execute, but where short runtime
13+
functions (including __morestack), the dynamic linker, signal
14+
handlers, and the unwinder can run.
15+
16+
Each Rust function contains an LLVM-generated prologue that
17+
compares the stack space required for the current function to
18+
the space space remaining in the current stack segment,
19+
maintained in a platform-specific TLS slot. The stack limit
20+
is strategically maintained by the Rust runtime so that it is
21+
always in place whenever a Rust function is running.
22+
23+
When there is not enough room to run the function, the function
24+
prologue makes a call to __morestack to allocate a new stack
25+
segment, copy any stack-based arguments to it, switch stacks,
26+
then resume execution of the original function.
27+
28+
-- The __morestack calling convention --
29+
30+
For reasons of efficiency the __morestack calling convention
31+
is bizarre. The calling function does not attempt to align the
32+
stack for the call, and on x86_64 the arguments to __morestack
33+
are passed in scratch registers in order to preserve the
34+
original function's arguments.
35+
36+
Once __morestack has switched to the new stack, instead of
37+
returning, it then calls into the original function, resuming
38+
execution at the instruction following the call to
39+
__morestack. Thus, when the original function returns it
40+
actually returns to __morestack, which then deallocates the
41+
stack and returns again to the original function's caller.
42+
43+
-- Unwinding --
44+
45+
All this trickery causes hell when it comes time for the
46+
unwinder to navigate it's way through this function. What
47+
will happen is the original function will be unwound first
48+
without any special effort, then the unwinder encounters
49+
the __morestack frame, which is sitting just above a
50+
tiny fraction of a frame (containing just a return pointer
51+
and, on 32-bit, the arguments to __morestack).
52+
53+
We deal with this by claiming that that little bit of stack
54+
is actually part of the __morestack frame, encoded as
55+
DWARF call frame instructions (CFI) by .cfi assembler
56+
pseudo-ops.
57+
58+
One final complication (that took me a week to figure out)
59+
is that OS X 10.6+ uses its own 'compact unwind info',
60+
an undocumented format generated by the linker from
61+
the DWARF CFI. This compact unwind info doesn't correctly
62+
capture the nuance of the __morestack frame, and as a
63+
result all of our linking on OS X uses the -no_compact_unwind
64+
flag.
65+
*/
66+
67+
.text
768

869
#if defined(__APPLE__)
970
#define RUST_GET_TASK L_rust_get_task$stub
@@ -51,13 +112,31 @@ MORESTACK:
51112
.cfi_startproc
52113
#endif
53114

115+
// This base pointer setup differs from most in that we are
116+
// telling the unwinder to consider the Canonical Frame
117+
// Address (CFA) for this frame to be the value of the stack
118+
// pointer prior to entry to the original function, whereas
119+
// the CFA would typically be the the value of the stack
120+
// pointer prior to entry to this function. This will allow
121+
// the unwinder to understand how to skip the tiny partial
122+
// frame that the original function created by calling
123+
// __morestack.
124+
125+
// In practical terms, our CFA is 12 bytes greater than it
126+
// would normally be, accounting for the two arguments to
127+
// __morestack, and an extra return address.
128+
54129
pushl %ebp
55130
#if defined(__linux__) || defined(__APPLE__)
131+
// The CFA is 20 bytes above the register that it is
132+
// associated with for this frame (which will be %ebp)
56133
.cfi_def_cfa_offset 20
134+
// %ebp is -20 bytes from the CFA
57135
.cfi_offset %ebp, -20
58136
#endif
59137
movl %esp, %ebp
60138
#if defined(__linux__) || defined(__APPLE__)
139+
// Calculate the CFA as an offset from %ebp
61140
.cfi_def_cfa_register %ebp
62141
#endif
63142

@@ -81,17 +160,25 @@ MORESTACK:
81160

82161
// Save the the correct %esp value for our grandparent frame,
83162
// for the unwinder
163+
// FIXME: This isn't used
84164
leal 20(%ebp), %eax
85165
movl %eax, -4(%ebp)
86166

87-
// The arguments to rust_new_stack2
88-
movl 56(%esp),%eax // Size of stack arguments
167+
// The arguments to upcall_new_stack
168+
169+
// The size of the stack arguments to copy to the new stack,
170+
// ane of the the arguments to __morestack
171+
movl 56(%esp),%eax
89172
movl %eax,20(%esp)
90-
leal 64(%esp),%eax // Address of stack arguments
173+
// The address of the stack arguments to the original function
174+
leal 64(%esp),%eax
91175
movl %eax,16(%esp)
176+
// The amount of stack needed for the original function,
177+
// the other argument to __morestack
92178
movl 52(%esp),%eax // The amount of stack needed
93179
movl %eax,12(%esp)
94-
movl $0, 8(%esp) // Out pointer
180+
// Out pointer to the new stack
181+
movl $0, 8(%esp)
95182

96183
#ifdef __APPLE__
97184
call 1f
@@ -106,18 +193,22 @@ MORESTACK:
106193
movl %eax,(%esp)
107194
call UPCALL_CALL_C
108195

109-
movl 48(%esp),%eax // Grab the return pointer.
110-
inc %eax // Skip past the ret instruction in the parent fn
196+
// Grab the __morestack return pointer
197+
movl 48(%esp),%eax
198+
// Skip past the ret instruction in the parent fn
199+
inc %eax
111200

112-
// Restore fastcc arguments
201+
// Restore the fastcc arguments to the original function
113202
movl 28(%esp), %ecx
114203
movl 24(%esp), %edx
115204

116-
movl 8(%esp),%esp // Switch stacks.
117-
call *%eax // Re-enter the function that called us.
205+
// Switch stacks
206+
movl 8(%esp),%esp
207+
// Re-enter the function that called us
208+
call *%eax
118209

119-
// Now the function that called us has returned, so we need to delete the
120-
// old stack space.
210+
// Now the function that called us has returned, so we need to
211+
// delete the old stack space
121212

122213
// Switch back to the rust stack
123214
movl %ebp, %esp
@@ -127,8 +218,8 @@ MORESTACK:
127218
subl $4, %esp
128219

129220
// Now that we're on the return path we want to avoid
130-
// stomping on %eax. FIXME: Need to save and restore
131-
// eax to actually preserve it across the call to delete the stack
221+
// stomping on %eax. FIXME: Need to save and restore %eax to
222+
// actually preserve it across the call to delete the stack
132223
#ifdef __APPLE__
133224
call 1f
134225
1: popl %ecx
@@ -144,8 +235,14 @@ MORESTACK:
144235
addl $12,%esp
145236

146237
popl %ebp
238+
239+
// FIXME: I don't think these rules are necessary
240+
// since the unwinder should never encounter an instruction
241+
// pointer pointing here.
147242
#if defined(__linux__) || defined(__APPLE__)
243+
// Restore the rule for how to find %ebp
148244
.cfi_restore %ebp
245+
// Tell the unwinder how to find the CFA in terms of %esp
149246
.cfi_def_cfa %esp, 16
150247
#endif
151248
retl $8

src/rt/arch/x86_64/morestack.S

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
.text
1+
/*
2+
__morestack
23
3-
// __morestack
4-
//
5-
// LLVM generates a call to this to allocate more stack space in a functiono
6-
// prolog when we run out.
4+
See i386/morestack.S for the lengthy, general explanation.
5+
*/
6+
7+
.text
78

89
#if defined(__APPLE__) || defined(_WIN32)
910
#define UPCALL_NEW_STACK _upcall_new_stack
@@ -15,27 +16,13 @@
1516
#define UPCALL_DEL_STACK upcall_del_stack
1617
#define UPCALL_CALL_C upcall_call_shim_on_c_stack
1718
#define MORESTACK __morestack
18-
#endif
19-
20-
// Naturally, nobody can agree as to
21-
// which arguments should go in which
22-
// registers:
23-
#if defined(_WIN32)
24-
# define ARG0 %rcx
25-
# define ARG1 %rdx
26-
# define ARG2 %r8
27-
#else
28-
# define ARG0 %rdi
29-
# define ARG1 %rsi
30-
# define ARG2 %rdx
3119
#endif
3220

3321
.globl UPCALL_NEW_STACK
3422
.globl UPCALL_DEL_STACK
3523
.globl UPCALL_CALL_C
3624
.globl MORESTACK
3725

38-
// FIXME: What about _WIN32?
3926
#if defined(__linux__)
4027
.hidden MORESTACK
4128
#else
@@ -48,24 +35,31 @@
4835
.type MORESTACK,@function
4936
#endif
5037

38+
5139
#if defined(__linux__) || defined(__APPLE__)
5240
MORESTACK:
5341
.cfi_startproc
54-
55-
// Set up a normal backtrace
42+
5643
pushq %rbp
44+
// The CFA is 24 bytes above the register that it will
45+
// be associated with for this frame (%rbp). That is 8
46+
// bytes greater than a normal frame, to allow the unwinder
47+
// to skip the partial frame of the original function.
5748
.cfi_def_cfa_offset 24
49+
// %rbp is -24 bytes from the CFA
5850
.cfi_offset %rbp, -24
5951
movq %rsp, %rbp
52+
// Calculate the CFA as on offset from %ebp
6053
.cfi_def_cfa_register %rbp
6154

6255
// Save the grandparent stack pointer for the unwinder
56+
// FIXME: This isn't used
6357
leaq 24(%rbp), %rax
6458
pushq %rax
6559

6660
// FIXME: libgcc also saves rax. not sure if we need to
6761

68-
// Save argument registers
62+
// Save argument registers of the original function
6963
pushq %rdi
7064
pushq %rsi
7165
pushq %rdx
@@ -79,6 +73,8 @@ MORESTACK:
7973
movq %rbp, %rcx
8074
addq $24, %rcx // Base pointer, return address x2
8175

76+
// The arguments to __morestack are passed in %r10 & %r11
77+
8278
pushq %r11 // Size of stack arguments
8379
pushq %rcx // Address of stack arguments
8480
pushq %r10 // The amount of stack needed
@@ -119,7 +115,8 @@ MORESTACK:
119115

120116
// Align the stack again
121117
pushq $0
122-
118+
119+
// FIXME: Should preserve %rax here
123120
movq UPCALL_DEL_STACK@GOTPCREL(%rip), %rsi
124121
movq $0, %rdi
125122
#ifdef __APPLE__
@@ -131,6 +128,9 @@ MORESTACK:
131128

132129
addq $8, %rsp
133130
popq %rbp
131+
// FIXME: I don't think these rules are necessary
132+
// since the unwinder should never encounter an instruction
133+
// pointer pointing here.
134134
.cfi_restore %rbp
135135
.cfi_def_cfa %rsp, 16
136136
ret

0 commit comments

Comments
 (0)