Skip to content

Commit d4a3466

Browse files
committed
runtime: write trace stack tab directly to trace buffer
Currently, the stack frame of (*traceStackTable).dump is 68KiB. We're about to move (*traceStackTable).dump to the system stack, where we often don't have this much room. 5140 bytes of this is an on-stack temporary buffer for constructing potentially large trace events before copying these out to the actual trace buffer. Reduce the stack frame size by writing these events directly to the trace buffer rather than temporary space. This introduces a couple complications: - The trace event starts with a varint encoding the event payload's length in bytes. These events are large and somewhat complicated, so it's hard to know the size ahead of time. That's not a problem with the temporary buffer because we can just construct the event and see how long it is. In order to support writing directly to the trace buffer, we reserve enough bytes for a maximum size varint and add support for populating a reserved space after the fact. - Emitting a stack event calls traceFrameForPC, which can itself emit string events. If these were emitted in the middle of the stack event, it would corrupt the stream. We already allocate a []Frame to convert the PC slice to frames, and then convert each Frame into a traceFrame with trace string IDs, so we address this by combining these two steps into one so that all trace string events are emitted before we start constructing the stack event. For #53979. Change-Id: Ie60704be95199559c426b551f8e119b14e06ddac Reviewed-on: https://go-review.googlesource.com/c/go/+/422954 Run-TryBot: Austin Clements <austin@google.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
1 parent db84f53 commit d4a3466

File tree

1 file changed

+53
-23
lines changed

1 file changed

+53
-23
lines changed

Diff for: src/runtime/trace.go

+53-23
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,22 @@ func (buf *traceBuf) varint(v uint64) {
935935
buf.pos = pos
936936
}
937937

938+
// varintAt writes varint v at byte position pos in buf. This always
939+
// consumes traceBytesPerNumber bytes. This is intended for when the
940+
// caller needs to reserve space for a varint but can't populate it
941+
// until later.
942+
func (buf *traceBuf) varintAt(pos int, v uint64) {
943+
for i := 0; i < traceBytesPerNumber; i++ {
944+
if i < traceBytesPerNumber-1 {
945+
buf.arr[pos] = 0x80 | byte(v)
946+
} else {
947+
buf.arr[pos] = byte(v)
948+
}
949+
v >>= 7
950+
pos++
951+
}
952+
}
953+
938954
// byte appends v to buf.
939955
func (buf *traceBuf) byte(v byte) {
940956
buf.arr[buf.pos] = v
@@ -1024,48 +1040,60 @@ func (tab *traceStackTable) newStack(n int) *traceStack {
10241040
return (*traceStack)(tab.mem.alloc(unsafe.Sizeof(traceStack{}) + uintptr(n)*goarch.PtrSize))
10251041
}
10261042

1027-
// allFrames returns all of the Frames corresponding to pcs.
1028-
func allFrames(pcs []uintptr) []Frame {
1029-
frames := make([]Frame, 0, len(pcs))
1043+
// traceFrames returns the frames corresponding to pcs. It may
1044+
// allocate and may emit trace events.
1045+
func traceFrames(bufp traceBufPtr, pcs []uintptr) ([]traceFrame, traceBufPtr) {
1046+
frames := make([]traceFrame, 0, len(pcs))
10301047
ci := CallersFrames(pcs)
10311048
for {
1049+
var frame traceFrame
10321050
f, more := ci.Next()
1033-
frames = append(frames, f)
1051+
frame, bufp = traceFrameForPC(bufp, 0, f)
1052+
frames = append(frames, frame)
10341053
if !more {
1035-
return frames
1054+
return frames, bufp
10361055
}
10371056
}
10381057
}
10391058

10401059
// dump writes all previously cached stacks to trace buffers,
10411060
// releases all memory and resets state.
10421061
func (tab *traceStackTable) dump() {
1043-
var tmp [(2 + 4*traceStackSize) * traceBytesPerNumber]byte
10441062
bufp := traceFlush(0, 0)
10451063
for _, stk := range tab.tab {
10461064
stk := stk.ptr()
10471065
for ; stk != nil; stk = stk.link.ptr() {
1048-
tmpbuf := tmp[:0]
1049-
tmpbuf = traceAppend(tmpbuf, uint64(stk.id))
1050-
frames := allFrames(stk.stack())
1051-
tmpbuf = traceAppend(tmpbuf, uint64(len(frames)))
1052-
for _, f := range frames {
1053-
var frame traceFrame
1054-
frame, bufp = traceFrameForPC(bufp, 0, f)
1055-
tmpbuf = traceAppend(tmpbuf, uint64(f.PC))
1056-
tmpbuf = traceAppend(tmpbuf, uint64(frame.funcID))
1057-
tmpbuf = traceAppend(tmpbuf, uint64(frame.fileID))
1058-
tmpbuf = traceAppend(tmpbuf, uint64(frame.line))
1059-
}
1060-
// Now copy to the buffer.
1061-
size := 1 + traceBytesPerNumber + len(tmpbuf)
1062-
if buf := bufp.ptr(); len(buf.arr)-buf.pos < size {
1066+
var frames []traceFrame
1067+
frames, bufp = traceFrames(bufp, stk.stack())
1068+
1069+
// Estimate the size of this record. This
1070+
// bound is pretty loose, but avoids counting
1071+
// lots of varint sizes.
1072+
maxSize := 1 + traceBytesPerNumber + (2+4*len(frames))*traceBytesPerNumber
1073+
// Make sure we have enough buffer space.
1074+
if buf := bufp.ptr(); len(buf.arr)-buf.pos < maxSize {
10631075
bufp = traceFlush(bufp, 0)
10641076
}
1077+
1078+
// Emit header, with space reserved for length.
10651079
buf := bufp.ptr()
10661080
buf.byte(traceEvStack | 3<<traceArgCountShift)
1067-
buf.varint(uint64(len(tmpbuf)))
1068-
buf.pos += copy(buf.arr[buf.pos:], tmpbuf)
1081+
lenPos := buf.pos
1082+
buf.pos += traceBytesPerNumber
1083+
1084+
// Emit body.
1085+
recPos := buf.pos
1086+
buf.varint(uint64(stk.id))
1087+
buf.varint(uint64(len(frames)))
1088+
for _, frame := range frames {
1089+
buf.varint(uint64(frame.PC))
1090+
buf.varint(frame.funcID)
1091+
buf.varint(frame.fileID)
1092+
buf.varint(frame.line)
1093+
}
1094+
1095+
// Fill in size header.
1096+
buf.varintAt(lenPos, uint64(buf.pos-recPos))
10691097
}
10701098
}
10711099

@@ -1079,6 +1107,7 @@ func (tab *traceStackTable) dump() {
10791107
}
10801108

10811109
type traceFrame struct {
1110+
PC uintptr
10821111
funcID uint64
10831112
fileID uint64
10841113
line uint64
@@ -1089,6 +1118,7 @@ type traceFrame struct {
10891118
func traceFrameForPC(buf traceBufPtr, pid int32, f Frame) (traceFrame, traceBufPtr) {
10901119
bufp := &buf
10911120
var frame traceFrame
1121+
frame.PC = f.PC
10921122

10931123
fn := f.Function
10941124
const maxLen = 1 << 10

0 commit comments

Comments
 (0)