Skip to content

runtime/pprof: change in behavior between Go 1.13 and 1.14 #36874

Closed
@tbpg

Description

@tbpg

What version of Go are you using (go version)?

$ go version
go version devel +1319bb9 Wed Jan 29 14:58:15 2020 +0000 linux/amd64

Does this issue reproduce with the latest release?

Yes.

What operating system and processor architecture are you using (go env)?

go env Output
$ go env
GO111MODULE=""
GOARCH="amd64"
GOBIN=""
GOCACHE="/usr/local/google/home/tbp/.cache/go-build"
GOENV="/usr/local/google/home/tbp/.config/go/env"
GOEXE=""
GOFLAGS=""
GOHOSTARCH="amd64"
GOHOSTOS="linux"
GOINSECURE=""
GONOPROXY=""
GONOSUMDB=""
GOOS="linux"
GOPATH="/usr/local/google/home/tbp/go"
GOPRIVATE=""
GOPROXY="https://proxy.golang.org"
GOROOT="/usr/local/google/home/tbp/sdk/gotip"
GOSUMDB="sum.golang.org"
GOTMPDIR=""
GOTOOLDIR="/usr/local/google/home/tbp/sdk/gotip/pkg/tool/linux_amd64"
GCCGO="gccgo"
AR="ar"
CC="gcc"
CXX="g++"
CGO_ENABLED="1"
GOMOD="/usr/local/google/home/tbp/go/src/cloud.google.com/go/go.mod"
CGO_CFLAGS="-g -O2"
CGO_CPPFLAGS=""
CGO_CXXFLAGS="-g -O2"
CGO_FFLAGS="-g -O2"
CGO_LDFLAGS="-g -O2"
PKG_CONFIG="pkg-config"
GOGCCFLAGS="-fPIC -m64 -pthread -fmessage-length=0 -fdebug-prefix-map=/tmp/go-build554794736=/tmp/go-build -gno-record-gcc-switches"
GOROOT/bin/go version: go version devel +1319bb9 Wed Jan 29 14:58:15 2020 +0000 linux/amd64
GOROOT/bin/go tool compile -V: compile version devel +1319bb9 Wed Jan 29 14:58:15 2020 +0000
uname -sr: Linux 5.2.17-1rodete3-amd64
Distributor ID:	Debian
Description:	Debian GNU/Linux rodete
Release:	rodete
Codename:	rodete
/lib/x86_64-linux-gnu/libc.so.6: GNU C Library (Debian GLIBC 2.28-10) stable release version 2.28.
gdb --version: GNU gdb (GDB) 8.3-gg5

What did you do?

@eclipseo originally reported this at googleapis/google-cloud-go#1761. A test started failing for 1.14beta1 that wasn't failing for Go 1.13.

I copied the test case into a standalone test below. The symbolize call is what is causing problems:

	// The mutex profile is not symbolized by runtime.pprof until
	// golang.org/issue/21474 is fixed in go1.10.
	symbolize(p)

If you comment out/delete the call to symbolize, the test passes. golang.org/issue/21474 is marked as fixed in Go 1.10.

We (cloud.google.com/go) are OK to remove symbolize since we no longer need to support Go < 1.10. But, I'm wondering if this is an expected behavior change of Go.

Test case that passes on Go 1.13 and fails on Go 1.14beta1 (weird formatting)
	package profiletest
import (
	"bytes"
	"context"
	"errors"
	"runtime"
	"runtime/pprof"
	"strings"
	"sync"
	"testing"
	"time"

	"github.com/google/pprof/profile"
	gax "github.com/googleapis/gax-go/v2"
)

func TestDeltaMutexProfile(t *testing.T) {
	runtime.SetMutexProfileFraction(100)
	oldMaxProcs := runtime.GOMAXPROCS(10)
	defer func() {
		runtime.GOMAXPROCS(oldMaxProcs)
	}()
	
	hog(time.Second, mutexHog)
	go func() {
		hog(2*time.Second, backgroundHog)
	}()
	
	var prof bytes.Buffer
	if err := deltaMutexProfile(context.Background(), time.Second, &prof); err != nil {
		t.Fatalf("deltaMutexProfile() got error: %v", err)
	}
	p, err := profile.Parse(&prof)
	if err != nil {
		t.Fatalf("profile.Parse() got error: %v", err)
	}

	if s := sum(p, "mutexHog"); s != 0 {
		t.Errorf("mutexHog found in the delta mutex profile (sum=%d):\n%s", s, p)
	}
	if s := sum(p, "backgroundHog"); s <= 0 {
		t.Errorf("backgroundHog not in the delta mutex profile (sum=%d):\n%s", s, p)
	}
}

// deltaMutexProfile writes mutex profile changes over a time period specified
// with 'duration' to 'prof'.
func deltaMutexProfile(ctx context.Context, duration time.Duration, prof *bytes.Buffer) error {
	p0, err := mutexProfile()
	if err != nil {
		return err
	}
	gax.Sleep(ctx, duration)
	p, err := mutexProfile()
	if err != nil {
		return err
	}

	p0.Scale(-1)
	p, err = profile.Merge([]*profile.Profile{p0, p})
	if err != nil {
		return err
	}

	// The mutex profile is not symbolized by runtime.pprof until
	// golang.org/issue/21474 is fixed in go1.10.
	symbolize(p)
	return p.Write(prof)
}

func mutexHog(mu1, mu2 *sync.Mutex, start time.Time, dt time.Duration) {
	for time.Since(start) < dt {
		mu1.Lock()
		runtime.Gosched()
		mu2.Lock()
		mu1.Unlock()
		mu2.Unlock()
	}
}

// backgroundHog is identical to mutexHog. We keep them separate
// in order to distinguish them with function names in the stack trace.
func backgroundHog(mu1, mu2 *sync.Mutex, start time.Time, dt time.Duration) {
	for time.Since(start) < dt {
		mu1.Lock()
		runtime.Gosched()
		mu2.Lock()
		mu1.Unlock()
		mu2.Unlock()
	}
}

func hog(dt time.Duration, hogger func(mu1, mu2 *sync.Mutex, start time.Time, dt time.Duration)) {
	start := time.Now()
	mu1 := new(sync.Mutex)
	mu2 := new(sync.Mutex)
	var wg sync.WaitGroup
	wg.Add(10)
	for i := 0; i < 10; i++ {
		go func() {
			defer wg.Done()
			hogger(mu1, mu2, start, dt)
		}()
	}
	wg.Wait()
}

func mutexProfile() (*profile.Profile, error) {
	p := pprof.Lookup("mutex")
	if p == nil {
		return nil, errors.New("mutex profiling is not supported")
	}
	var buf bytes.Buffer
	if err := p.WriteTo(&buf, 0); err != nil {
		return nil, err
	}
	return profile.Parse(&buf)
}

// sum returns the sum of all mutex counts from the samples whose
// stacks include the specified function name.
func sum(p *profile.Profile, fname string) int64 {
	locIDs := map[*profile.Location]bool{}
	for _, loc := range p.Location {
		for _, l := range loc.Line {
			if strings.Contains(l.Function.Name, fname) {
				locIDs[loc] = true
				break
			}
		}
	}
	var s int64
	for _, sample := range p.Sample {
		for _, loc := range sample.Location {
			if locIDs[loc] {
				s += sample.Value[0]
				break
			}
		}
	}
	return s
}

type profileFunctionMap map[profile.Function]*profile.Function

func symbolize(p *profile.Profile) {
	fns := profileFunctionMap{}
	for _, l := range p.Location {
		pc := uintptr(l.Address)
		f := funcForPC(pc)
		if f == nil {
			continue
		}
		file, lineno := f.FileLine(pc)
		l.Line = []profile.Line{
			{
				Function: fns.findOrAddFunction(f.Name(), file, p),
				Line:     int64(lineno),
			},
		}
	}
	// Trim runtime functions. Always hide runtime.goexit. Other runtime
	// functions are only hidden for heap profile when they appear at the beginning.
	isHeapProfile := p.PeriodType != nil && p.PeriodType.Type == "space"
	for _, s := range p.Sample {
		show := !isHeapProfile
		var i int
		for _, l := range s.Location {
			if len(l.Line) > 0 && l.Line[0].Function != nil {
				name := l.Line[0].Function.Name
				if name == "runtime.goexit" || !show && strings.HasPrefix(name, "runtime.") {
					continue
				}
			}
			show = true
			s.Location[i] = l
			i++
		}
		// If all locations of a sample are trimmed, keep the root location.
		if i == 0 && len(s.Location) > 0 {
			s.Location[0] = s.Location[len(s.Location)-1]
			i = 1
		}
		s.Location = s.Location[:i]
	}
}

type function interface {
	Name() string
	FileLine(pc uintptr) (string, int)
}

// funcForPC is a wrapper for runtime.FuncForPC. Defined as var for testing.
var funcForPC = func(pc uintptr) function {
	return runtime.FuncForPC(pc)
}

func (fns profileFunctionMap) findOrAddFunction(name, filename string, p *profile.Profile) *profile.Function {
	f := profile.Function{
		Name:       name,
		SystemName: name,
		Filename:   filename,
	}
	if fp := fns[f]; fp != nil {
		return fp
	}
	fp := new(profile.Function)
	fns[f] = fp

	*fp = f
	fp.ID = uint64(len(p.Function) + 1)
	p.Function = append(p.Function, fp)
	return fp
}

What did you expect to see?

No changes in behavior.

What did you see instead?

A change of behavior of the symbolize function above between Go 1.13 and Go 1.14beta1.

cc @dmitshur

Metadata

Metadata

Assignees

No one assigned

    Labels

    FrozenDueToAgeNeedsInvestigationSomeone must examine and confirm this is a valid issue and not a duplicate of an existing one.

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions