Skip to content

Commit

Permalink
runtime: write trace stack tab directly to trace buffer
Browse files Browse the repository at this point in the history
Currently, the stack frame of (*traceStackTable).dump is 68KiB. We're
about to move (*traceStackTable).dump to the system stack, where we
often don't have this much room.

5140 bytes of this is an on-stack temporary buffer for constructing
potentially large trace events before copying these out to the actual
trace buffer.

Reduce the stack frame size by writing these events directly to the
trace buffer rather than temporary space. This introduces a couple
complications:

- The trace event starts with a varint encoding the event payload's
  length in bytes. These events are large and somewhat complicated, so
  it's hard to know the size ahead of time. That's not a problem with
  the temporary buffer because we can just construct the event and see
  how long it is. In order to support writing directly to the trace
  buffer, we reserve enough bytes for a maximum size varint and add
  support for populating a reserved space after the fact.

- Emitting a stack event calls traceFrameForPC, which can itself emit
  string events. If these were emitted in the middle of the stack
  event, it would corrupt the stream. We already allocate a []Frame to
  convert the PC slice to frames, and then convert each Frame into a
  traceFrame with trace string IDs, so we address this by combining
  these two steps into one so that all trace string events are emitted
  before we start constructing the stack event.

For #53979.

Change-Id: Ie60704be95199559c426b551f8e119b14e06ddac
Reviewed-on: https://go-review.googlesource.com/c/go/+/422954
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
  • Loading branch information
aclements committed Aug 11, 2022
1 parent db84f53 commit d4a3466
Showing 1 changed file with 53 additions and 23 deletions.
76 changes: 53 additions & 23 deletions src/runtime/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -935,6 +935,22 @@ func (buf *traceBuf) varint(v uint64) {
buf.pos = pos
}

// varintAt writes varint v at byte position pos in buf. This always
// consumes traceBytesPerNumber bytes. This is intended for when the
// caller needs to reserve space for a varint but can't populate it
// until later.
func (buf *traceBuf) varintAt(pos int, v uint64) {
for i := 0; i < traceBytesPerNumber; i++ {
if i < traceBytesPerNumber-1 {
buf.arr[pos] = 0x80 | byte(v)
} else {
buf.arr[pos] = byte(v)
}
v >>= 7
pos++
}
}

// byte appends v to buf.
func (buf *traceBuf) byte(v byte) {
buf.arr[buf.pos] = v
Expand Down Expand Up @@ -1024,48 +1040,60 @@ func (tab *traceStackTable) newStack(n int) *traceStack {
return (*traceStack)(tab.mem.alloc(unsafe.Sizeof(traceStack{}) + uintptr(n)*goarch.PtrSize))
}

// allFrames returns all of the Frames corresponding to pcs.
func allFrames(pcs []uintptr) []Frame {
frames := make([]Frame, 0, len(pcs))
// traceFrames returns the frames corresponding to pcs. It may
// allocate and may emit trace events.
func traceFrames(bufp traceBufPtr, pcs []uintptr) ([]traceFrame, traceBufPtr) {
frames := make([]traceFrame, 0, len(pcs))
ci := CallersFrames(pcs)
for {
var frame traceFrame
f, more := ci.Next()
frames = append(frames, f)
frame, bufp = traceFrameForPC(bufp, 0, f)
frames = append(frames, frame)
if !more {
return frames
return frames, bufp
}
}
}

// dump writes all previously cached stacks to trace buffers,
// releases all memory and resets state.
func (tab *traceStackTable) dump() {
var tmp [(2 + 4*traceStackSize) * traceBytesPerNumber]byte
bufp := traceFlush(0, 0)
for _, stk := range tab.tab {
stk := stk.ptr()
for ; stk != nil; stk = stk.link.ptr() {
tmpbuf := tmp[:0]
tmpbuf = traceAppend(tmpbuf, uint64(stk.id))
frames := allFrames(stk.stack())
tmpbuf = traceAppend(tmpbuf, uint64(len(frames)))
for _, f := range frames {
var frame traceFrame
frame, bufp = traceFrameForPC(bufp, 0, f)
tmpbuf = traceAppend(tmpbuf, uint64(f.PC))
tmpbuf = traceAppend(tmpbuf, uint64(frame.funcID))
tmpbuf = traceAppend(tmpbuf, uint64(frame.fileID))
tmpbuf = traceAppend(tmpbuf, uint64(frame.line))
}
// Now copy to the buffer.
size := 1 + traceBytesPerNumber + len(tmpbuf)
if buf := bufp.ptr(); len(buf.arr)-buf.pos < size {
var frames []traceFrame
frames, bufp = traceFrames(bufp, stk.stack())

// Estimate the size of this record. This
// bound is pretty loose, but avoids counting
// lots of varint sizes.
maxSize := 1 + traceBytesPerNumber + (2+4*len(frames))*traceBytesPerNumber
// Make sure we have enough buffer space.
if buf := bufp.ptr(); len(buf.arr)-buf.pos < maxSize {
bufp = traceFlush(bufp, 0)
}

// Emit header, with space reserved for length.
buf := bufp.ptr()
buf.byte(traceEvStack | 3<<traceArgCountShift)
buf.varint(uint64(len(tmpbuf)))
buf.pos += copy(buf.arr[buf.pos:], tmpbuf)
lenPos := buf.pos
buf.pos += traceBytesPerNumber

// Emit body.
recPos := buf.pos
buf.varint(uint64(stk.id))
buf.varint(uint64(len(frames)))
for _, frame := range frames {
buf.varint(uint64(frame.PC))
buf.varint(frame.funcID)
buf.varint(frame.fileID)
buf.varint(frame.line)
}

// Fill in size header.
buf.varintAt(lenPos, uint64(buf.pos-recPos))
}
}

Expand All @@ -1079,6 +1107,7 @@ func (tab *traceStackTable) dump() {
}

type traceFrame struct {
PC uintptr
funcID uint64
fileID uint64
line uint64
Expand All @@ -1089,6 +1118,7 @@ type traceFrame struct {
func traceFrameForPC(buf traceBufPtr, pid int32, f Frame) (traceFrame, traceBufPtr) {
bufp := &buf
var frame traceFrame
frame.PC = f.PC

fn := f.Function
const maxLen = 1 << 10
Expand Down

0 comments on commit d4a3466

Please sign in to comment.