Skip to content

Commit 342d90c

Browse files
committed
runtime/pprof: add debug=4 goroutine profile with labels and reduced STW
This adds two new goroutine profile modes, debug=3 and debug=4, that emit in the same output format as debug=0 and debug=1, but contain one entry per goroutine along with extended per-goroutine information such as goroutine ID, creator ID, state, wait minutes, and creation location. Previously the debug=2 mode was the only way to get this per-goroutine information, however this mode has a significantly different underlying implementation that mean it a) requires a pontentially lengthy STW pause, and B) does not include labels. These new modes make the level of detailed, per-goroutine informaiton previously only available in debug=2 available in a new format that also includes labels and does not incur the same duration of stop-the-world pause during collection. The difference in latency observed by running goroutines is demonstrated by the included benchmark: │ debug=2 │ debug=4 │ max_latency_ns │ max_latency_ns vs base goroutines=100x3-14 1013.17k ± 47% 84.06k ± 27% -91.70% (p=0.002 n=6) goroutines=100x10-14 769.23k ± 7% 80.29k ± 22% -89.56% (p=0.002 n=6) goroutines=100x50-14 2172.4k ± 9% 181.8k ± 46% -91.63% (p=0.002 n=6) goroutines=1000x3-14 7133.9k ± 3% 195.7k ± 42% -97.26% (p=0.002 n=6) goroutines=1000x10-14 11787.6k ± 48% 494.4k ± 77% -95.81% (p=0.002 n=6) goroutines=1000x50-14 20234.0k ± 87% 174.8k ± 137% -99.14% (p=0.002 n=6) goroutines=10000x3-14 68611.0k ± 49% 168.5k ± 2768% -99.75% (p=0.002 n=6) goroutines=10000x10-14 60.261M ± 95% 3.460M ± 166% -94.26% (p=0.002 n=6) goroutines=10000x50-14 284.144M ± 40% 4.672M ± 89% -98.36% (p=0.002 n=6) goroutines=25000x3-14 171.290M ± 48% 4.287M ± 394% -97.50% (p=0.002 n=6) goroutines=25000x10-14 150.827M ± 92% 6.424M ± 158% -95.74% (p=0.002 n=6) goroutines=25000x50-14 708.238M ± 34% 2.249M ± 410% -99.68% (p=0.002 n=6) geomean 25.08M 624.2k -97.51% The new debug=4 format is added a new format rather than altering debug=2 in-place to use the lower latency collection method as the concurrent collection method would require some behavior changes to debug=2 that are likely to significant to be made in place, chriefly that debug=2 includes argument values in its output, which are not collected by the concurrent collection method, and are not included in any other format, and that adding labels to the debug=2 format could break existing parsers.
1 parent d83b16f commit 342d90c

File tree

8 files changed

+581
-153
lines changed

8 files changed

+581
-153
lines changed

src/internal/profilerecord/profilerecord.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,16 @@
88
// TODO: Consider moving this to internal/runtime, see golang.org/issue/65355.
99
package profilerecord
1010

11+
import "unsafe"
12+
1113
type StackRecord struct {
1214
Stack []uintptr
1315
}
1416

17+
func (r StackRecord) GetStack() []uintptr { return r.Stack }
18+
func (r StackRecord) GetLabels() unsafe.Pointer { return nil }
19+
func (r StackRecord) GetGoroutine() GoroutineRecord { return GoroutineRecord{} }
20+
1521
type MemProfileRecord struct {
1622
AllocBytes, FreeBytes int64
1723
AllocObjects, FreeObjects int64
@@ -26,3 +32,18 @@ type BlockProfileRecord struct {
2632
Cycles int64
2733
Stack []uintptr
2834
}
35+
36+
type GoroutineRecord struct {
37+
ID uint64
38+
State uint32
39+
WaitReason uint8
40+
CreatorID uint64
41+
CreationPC uintptr
42+
WaitSince int64 // approx time when the g became blocked, in nanoseconds
43+
Labels unsafe.Pointer
44+
Stack []uintptr
45+
}
46+
47+
func (r GoroutineRecord) GetStack() []uintptr { return r.Stack }
48+
func (r GoroutineRecord) GetLabels() unsafe.Pointer { return r.Labels }
49+
func (r GoroutineRecord) GetGoroutine() GoroutineRecord { return r }

src/runtime/mprof.go

Lines changed: 24 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1246,25 +1246,15 @@ func pprof_threadCreateInternal(p []profilerecord.StackRecord) (n int, ok bool)
12461246
}
12471247

12481248
//go:linkname pprof_goroutineProfileWithLabels
1249-
func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
1250-
return goroutineProfileWithLabels(p, labels)
1251-
}
1252-
1253-
// labels may be nil. If labels is non-nil, it must have the same length as p.
1254-
func goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
1255-
if labels != nil && len(labels) != len(p) {
1256-
labels = nil
1257-
}
1258-
1259-
return goroutineProfileWithLabelsConcurrent(p, labels)
1249+
func pprof_goroutineProfileWithLabels(p []profilerecord.GoroutineRecord) (n int, ok bool) {
1250+
return goroutineProfileInternal(p)
12601251
}
12611252

12621253
var goroutineProfile = struct {
12631254
sema uint32
12641255
active bool
12651256
offset atomic.Int64
1266-
records []profilerecord.StackRecord
1267-
labels []unsafe.Pointer
1257+
records []profilerecord.GoroutineRecord
12681258
}{
12691259
sema: 1,
12701260
}
@@ -1302,7 +1292,9 @@ func (p *goroutineProfileStateHolder) CompareAndSwap(old, new goroutineProfileSt
13021292
return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new))
13031293
}
13041294

1305-
func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
1295+
func goroutineProfileInternal(
1296+
p []profilerecord.GoroutineRecord,
1297+
) (n int, ok bool) {
13061298
if len(p) == 0 {
13071299
// An empty slice is obviously too small. Return a rough
13081300
// allocation estimate without bothering to STW. As long as
@@ -1345,9 +1337,15 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels
13451337
systemstack(func() {
13461338
saveg(pc, sp, ourg, &p[0], pcbuf)
13471339
})
1348-
if labels != nil {
1349-
labels[0] = ourg.labels
1350-
}
1340+
1341+
p[0].ID = ourg.goid
1342+
p[0].CreatorID = ourg.parentGoid
1343+
p[0].CreationPC = ourg.gopc
1344+
p[0].Labels = ourg.labels
1345+
p[0].State = readgstatus(ourg) &^ _Gscan
1346+
p[0].WaitReason = uint8(ourg.waitreason)
1347+
p[0].WaitSince = ourg.waitsince
1348+
13511349
ourg.goroutineProfiled.Store(goroutineProfileSatisfied)
13521350
goroutineProfile.offset.Store(1)
13531351

@@ -1358,7 +1356,6 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels
13581356
// field set to goroutineProfileSatisfied.
13591357
goroutineProfile.active = true
13601358
goroutineProfile.records = p
1361-
goroutineProfile.labels = labels
13621359
startTheWorld(stw)
13631360

13641361
// Visit each goroutine that existed as of the startTheWorld call above.
@@ -1380,7 +1377,6 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels
13801377
endOffset := goroutineProfile.offset.Swap(0)
13811378
goroutineProfile.active = false
13821379
goroutineProfile.records = nil
1383-
goroutineProfile.labels = nil
13841380
startTheWorld(stw)
13851381

13861382
// Restore the invariant that every goroutine struct in allgs has its
@@ -1503,79 +1499,13 @@ func doRecordGoroutineProfile(gp1 *g, pcbuf []uintptr) {
15031499
// to avoid schedule delays.
15041500
systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &goroutineProfile.records[offset], pcbuf) })
15051501

1506-
if goroutineProfile.labels != nil {
1507-
goroutineProfile.labels[offset] = gp1.labels
1508-
}
1509-
}
1510-
1511-
func goroutineProfileWithLabelsSync(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
1512-
gp := getg()
1513-
1514-
isOK := func(gp1 *g) bool {
1515-
// Checking isSystemGoroutine here makes GoroutineProfile
1516-
// consistent with both NumGoroutine and Stack.
1517-
return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1, false)
1518-
}
1519-
1520-
pcbuf := makeProfStack() // see saveg() for explanation
1521-
stw := stopTheWorld(stwGoroutineProfile)
1522-
1523-
// World is stopped, no locking required.
1524-
n = 1
1525-
forEachGRace(func(gp1 *g) {
1526-
if isOK(gp1) {
1527-
n++
1528-
}
1529-
})
1530-
1531-
if n <= len(p) {
1532-
ok = true
1533-
r, lbl := p, labels
1534-
1535-
// Save current goroutine.
1536-
sp := sys.GetCallerSP()
1537-
pc := sys.GetCallerPC()
1538-
systemstack(func() {
1539-
saveg(pc, sp, gp, &r[0], pcbuf)
1540-
})
1541-
r = r[1:]
1542-
1543-
// If we have a place to put our goroutine labelmap, insert it there.
1544-
if labels != nil {
1545-
lbl[0] = gp.labels
1546-
lbl = lbl[1:]
1547-
}
1548-
1549-
// Save other goroutines.
1550-
forEachGRace(func(gp1 *g) {
1551-
if !isOK(gp1) {
1552-
return
1553-
}
1554-
1555-
if len(r) == 0 {
1556-
// Should be impossible, but better to return a
1557-
// truncated profile than to crash the entire process.
1558-
return
1559-
}
1560-
// saveg calls gentraceback, which may call cgo traceback functions.
1561-
// The world is stopped, so it cannot use cgocall (which will be
1562-
// blocked at exitsyscall). Do it on the system stack so it won't
1563-
// call into the schedular (see traceback.go:cgoContextPCs).
1564-
systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &r[0], pcbuf) })
1565-
if labels != nil {
1566-
lbl[0] = gp1.labels
1567-
lbl = lbl[1:]
1568-
}
1569-
r = r[1:]
1570-
})
1571-
}
1572-
1573-
if raceenabled {
1574-
raceacquire(unsafe.Pointer(&labelSync))
1575-
}
1576-
1577-
startTheWorld(stw)
1578-
return n, ok
1502+
goroutineProfile.records[offset].Labels = gp1.labels
1503+
goroutineProfile.records[offset].ID = gp1.goid
1504+
goroutineProfile.records[offset].CreatorID = gp1.parentGoid
1505+
goroutineProfile.records[offset].CreationPC = gp1.gopc
1506+
goroutineProfile.records[offset].State = readgstatus(gp1) &^ _Gscan
1507+
goroutineProfile.records[offset].WaitReason = uint8(gp1.waitreason)
1508+
goroutineProfile.records[offset].WaitSince = gp1.waitsince
15791509
}
15801510

15811511
// GoroutineProfile returns n, the number of records in the active goroutine stack profile.
@@ -1585,7 +1515,7 @@ func goroutineProfileWithLabelsSync(p []profilerecord.StackRecord, labels []unsa
15851515
// Most clients should use the [runtime/pprof] package instead
15861516
// of calling GoroutineProfile directly.
15871517
func GoroutineProfile(p []StackRecord) (n int, ok bool) {
1588-
records := make([]profilerecord.StackRecord, len(p))
1518+
records := make([]profilerecord.GoroutineRecord, len(p))
15891519
n, ok = goroutineProfileInternal(records)
15901520
if !ok {
15911521
return
@@ -1597,11 +1527,7 @@ func GoroutineProfile(p []StackRecord) (n int, ok bool) {
15971527
return
15981528
}
15991529

1600-
func goroutineProfileInternal(p []profilerecord.StackRecord) (n int, ok bool) {
1601-
return goroutineProfileWithLabels(p, nil)
1602-
}
1603-
1604-
func saveg(pc, sp uintptr, gp *g, r *profilerecord.StackRecord, pcbuf []uintptr) {
1530+
func saveg(pc, sp uintptr, gp *g, r *profilerecord.GoroutineRecord, pcbuf []uintptr) {
16051531
// To reduce memory usage, we want to allocate a r.Stack that is just big
16061532
// enough to hold gp's stack trace. Naively we might achieve this by
16071533
// recording our stack trace into mp.profStack, and then allocating a

src/runtime/pprof/label.go

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,28 @@ type labelMap struct {
4242
// String satisfies Stringer and returns key, value pairs in a consistent
4343
// order.
4444
func (l *labelMap) String() string {
45-
if l == nil {
45+
return labelsString(l)
46+
}
47+
48+
func labelsString(l *labelMap, extra ...string) string {
49+
if l == nil && len(extra) == 0 {
4650
return ""
4751
}
48-
keyVals := make([]string, 0, len(l.list))
4952

50-
for _, lbl := range l.list {
51-
keyVals = append(keyVals, fmt.Sprintf("%q:%q", lbl.key, lbl.value))
53+
length := len(extra)
54+
if l != nil {
55+
length += len(l.list)
56+
}
57+
keyVals := make([]string, 0, length)
58+
59+
if l != nil {
60+
for _, lbl := range l.list {
61+
keyVals = append(keyVals, fmt.Sprintf("%q:%q", lbl.key, lbl.value))
62+
}
63+
}
64+
65+
for i := 0; i < len(extra)-1; i += 2 {
66+
keyVals = append(keyVals, fmt.Sprintf("%q:%q", extra[i], extra[i+1]))
5267
}
5368

5469
slices.Sort(keyVals)

0 commit comments

Comments
 (0)