From d1831734a1117c10bbbe821389bdba6700f2e745 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Fri, 19 Feb 2021 16:48:39 +0100 Subject: [PATCH] [dnm]: use background profiling Background profiling is an experimental patch that was floated as a way to get granular CPU usage information in https://github.com/golang/go/issues/23458. I decided to dust off the Golang patch that implements this (https://go-review.googlesource.com/c/go/+/102755) and see for myself what you actually get. The output is something like this: ``` bgprof_test.go:51: ts: 12h37m15.540022251s labels: map[foo:bar] fn: hash/crc32.ieeeCLMUL bgprof_test.go:51: ts: 12h37m15.54801909s labels: map[foo:bar] fn: hash/crc32.ieeeCLMUL bgprof_test.go:51: ts: 12h37m15.560015269s labels: map[foo:bar] fn: hash/crc32.ieeeCLMUL bgprof_test.go:51: ts: 12h37m15.568026639s labels: map[foo:bar] fn: hash/crc32.ieeeCLMUL bgprof_test.go:51: ts: 12h37m15.580029608s labels: map[foo:bar] fn: hash/crc32.ieeeCLMUL ... ``` If we used profiler labels to identify queries (similar to what's shown in https://github.com/cockroachdb/cockroach/pull/60508) I guess I can see how you can somehow build a profile from this stream and then use the tag breakdown of the profile to reason about CPU allocated to each query. It seems wildly experimental and also unergonomical, though. It seems like we'd get maybe roughly the same, without as much bending over backwards and wildly experimental bleeding edge, by relying on periodic foreground sampling at a lower frequency (`runtime.SetCPUProfileRate`) than the default 100hz. To run this PR (which you shouldn't need to do for anything, but still) you'd clone `cockroachdb/go`, check out the [bgprof] branch, run `./make.bash` in `src` and change your PATH so that `$(which go)` is `bin/go`. Release note: None --- pkg/server/bgprof_test.go | 94 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 pkg/server/bgprof_test.go diff --git a/pkg/server/bgprof_test.go b/pkg/server/bgprof_test.go new file mode 100644 index 000000000000..5adbeb6476cb --- /dev/null +++ b/pkg/server/bgprof_test.go @@ -0,0 +1,94 @@ +package server_test + +import ( + "context" + "hash/crc32" + "runtime" + runtimepprof "runtime/pprof" + "testing" + "time" + "unsafe" + + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + stop "github.com/cockroachdb/cockroach/pkg/util/stop" +) + +func TestBackgroundProfiling(t *testing.T) { + defer leaktest.AfterTest(t)() + + ctx := context.Background() + //tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{}) + //defer tc.Stopper().Stop(ctx) + s := stop.NewStopper() + defer s.Stop(ctx) + + done := make(chan struct{}) + s.RunAsyncTask(ctx, "spin", func(ctx context.Context) { + runtimepprof.Do(ctx, runtimepprof.Labels("foo", "bar"), func(ctx context.Context) { + buf := make([]byte, 1024) + for { + select { + case <-done: + return + default: + } + for i := 0; i < 1000; i++ { + crc32.Update(0, crc32.IEEETable, buf) + } + } + }) + }) + + runtime.EnableProfCPU(100) + time.Sleep(time.Second) + runtime.EnableProfCPU(0) + close(done) + n := 0 + readProfile(t, runtime.ReadProfCPU, func(d time.Duration, frames *runtime.Frames, labels map[string]string) { + n++ + if n < 100 { + fr, _ := frames.Next() + t.Logf("ts: %s\nlabels: %s\nfn: %s", d, labels, fr.Function) + } + return + }) +} + +func readProfile( + t *testing.T, + read func() ([]uint64, []unsafe.Pointer, bool), + f func(d time.Duration, frames *runtime.Frames, labels map[string]string), +) { + var us []uintptr + for { + data, tags, eof := read() + if eof { + break + } + for len(data) > 0 { + n := int(data[0]) + if n < 3 || n > len(data) { + t.Errorf("bad record: n=%d, len(data)=%d", n, len(data)) + break + } + _ = runtime.ReadProfCPU + if count := data[2]; count != 0 { // 1 or num_samples or overflow (0) + var labelMap map[string]string + if tags[0] != nil { + labelMap = *(*map[string]string)(tags[0]) + } + + d := time.Duration(data[1]) // time since process start + for _, n := range data[3:n] { + us = append(us, uintptr(n)) + } + frames := runtime.CallersFrames(us) + us = us[:0] + f(d, frames, labelMap) + } // else: overflow record + + data = data[n:] + tags = tags[1:] + } + } +}