Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add gsym support #636

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ on:
push:
branches:
- master
- add-gsym-support
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shouldn't be a part of this PR.

pull_request:
schedule:
- cron: '0 2 * * *' # Run every day, at 2AM UTC.
Expand Down
172 changes: 172 additions & 0 deletions internal/binutils/addr2liner_gsym.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// Copyright 2021 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package binutils

import (
"bufio"
"fmt"
"io"
"os/exec"
"regexp"
"strconv"
"strings"
"sync"

"github.com/google/pprof/internal/plugin"
)

const (
defaultLLVMGsymUtil = "llvm-gsymutil"
)

var prefixRegex *regexp.Regexp = regexp.MustCompile(`^(0x[[:xdigit:]]+:\s|\s+)`)

// Matches output lines like:
// _ZNK2sf12RefCountBaseILb0EE9removeRefEv + 3 @ /home/user/repo/x/../src/foo/Bar.hpp:67 [inlined]
var frameRegex *regexp.Regexp = regexp.MustCompile(`(\S+).* @ (.*):([[:digit:]]+)`)

// llvmGsymUtil is a connection to an llvm-symbolizer command for
// obtaining address and line number information from a binary.
type llvmGsymUtil struct {
sync.Mutex
filename string
rw lineReaderWriter
base uint64
}

type llvmGsymUtilJob struct {
cmd *exec.Cmd
in io.WriteCloser
out *bufio.Reader
}

func (a *llvmGsymUtilJob) write(s string) error {
_, err := fmt.Fprintln(a.in, s)
return err
}

func (a *llvmGsymUtilJob) readLine() (string, error) {
s, err := a.out.ReadString('\n')
if err != nil {
return "", err
}
return strings.TrimSpace(s), nil
}

// close releases any resources used by the llvmGsymUtil object.
func (a *llvmGsymUtilJob) close() {
a.in.Close()
a.cmd.Wait()
}

// newLLVMGsymUtil starts the given llvmGsymUtil command reporting
// information about the given executable file. If file is a shared
// library, base should be the address at which it was mapped in the
// program under consideration.
func newLLVMGsymUtil(cmd, file string, base uint64, isData bool) (*llvmGsymUtil, error) {
if cmd == "" {
cmd = defaultLLVMGsymUtil
}

j := &llvmGsymUtilJob{
cmd: exec.Command(cmd, "--addresses-from-stdin"),
}

var err error
if j.in, err = j.cmd.StdinPipe(); err != nil {
return nil, err
}

outPipe, err := j.cmd.StdoutPipe()
if err != nil {
return nil, err
}

j.out = bufio.NewReader(outPipe)
if err := j.cmd.Start(); err != nil {
return nil, err
}

a := &llvmGsymUtil{
filename: file,
rw: j,
base: base,
}

return a, nil
}

// readFrame parses the llvm-symbolizer output for a single address. It
// returns a populated plugin.Frame and whether it has reached the end of the
// data.
func (d *llvmGsymUtil) readFrame() (plugin.Frame, bool) {
line, err := d.rw.readLine()
if err != nil || len(line) == 0 {
return plugin.Frame{}, true
}

// The first frame contains an address: prefix. We don't need that. The remaining frames start with spaces.
suffix := prefixRegex.ReplaceAllString(line, "")

if strings.HasPrefix(suffix, "error:") {
// Skip empty line that follows.
_, _ = d.rw.readLine()
return plugin.Frame{}, true
}

frameMatch := frameRegex.FindStringSubmatch(suffix)
if frameMatch == nil {
return plugin.Frame{}, true
}

// TODO handle cases where no source file/line is available
// TODO handle column number?

funcname := frameMatch[1]
sourceFile := frameMatch[2]
sourceLineStr := frameMatch[3]

sourceLine := 0
if line, err := strconv.Atoi(sourceLineStr); err == nil {
sourceLine = line
}

return plugin.Frame{Func: funcname, File: sourceFile, Line: sourceLine}, false
}

// addrInfo returns the stack frame information for a specific program
// address. It returns nil if the address could not be identified.
func (d *llvmGsymUtil) addrInfo(addr uint64) ([]plugin.Frame, error) {
d.Lock()
defer d.Unlock()

if err := d.rw.write(fmt.Sprintf("0x%x %s.gsym", addr-d.base, d.filename)); err != nil {
return nil, err
}

var stack []plugin.Frame
for {
frame, end := d.readFrame()
if end {
break
}

if frame != (plugin.Frame{}) {
stack = append(stack, frame)
}
}

return stack, nil
}
33 changes: 29 additions & 4 deletions internal/binutils/binutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,12 @@ type binrep struct {
objdump string
objdumpFound bool
isLLVMObjdump bool
llvmGsymUtil string
llvmGsymUtilFound bool

// if fast, perform symbolization using nm (symbol names only),
// instead of file-line detail from the slower addr2line.
// TODO update the comment and handling depending on whether llvm-gsymutil is as fast as nm
fast bool
}

Expand Down Expand Up @@ -98,7 +101,7 @@ func (bu *Binutils) update(fn func(r *binrep)) {
// String returns string representation of the binutils state for debug logging.
func (bu *Binutils) String() string {
r := bu.get()
var llvmSymbolizer, addr2line, nm, objdump string
var llvmSymbolizer, addr2line, nm, objdump, llvmGsymUtil string
if r.llvmSymbolizerFound {
llvmSymbolizer = r.llvmSymbolizer
}
Expand All @@ -111,13 +114,17 @@ func (bu *Binutils) String() string {
if r.objdumpFound {
objdump = r.objdump
}
return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q fast=%t",
llvmSymbolizer, addr2line, nm, objdump, r.fast)
if r.llvmGsymUtilFound {
llvmGsymUtil = r.llvmGsymUtil
}
return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q llvmGsymUtil=%q fast=%t",
llvmSymbolizer, addr2line, nm, objdump, llvmGsymUtil, r.fast)
}

// SetFastSymbolization sets a toggle that makes binutils use fast
// symbolization (using nm), which is much faster than addr2line but
// provides only symbol name information (no file/line).
// TODO update the comment and handling depending on whether llvm-gsymutil is as fast as nm
func (bu *Binutils) SetFastSymbolization(fast bool) {
bu.update(func(r *binrep) { r.fast = fast })
}
Expand Down Expand Up @@ -147,9 +154,11 @@ func initTools(b *binrep, config string) {
b.addr2line, b.addr2lineFound = chooseExe([]string{"addr2line"}, []string{"gaddr2line"}, append(paths["addr2line"], defaultPath...))
// The "-n" option is supported by LLVM since 2011. The output of llvm-nm
// and GNU nm with "-n" option is interchangeable for our purposes, so we do
// not need to differrentiate them.
// not need to differentiate them.
b.nm, b.nmFound = chooseExe([]string{"llvm-nm", "nm"}, []string{"gnm"}, append(paths["nm"], defaultPath...))
b.objdump, b.objdumpFound, b.isLLVMObjdump = findObjdump(append(paths["objdump"], defaultPath...))
b.llvmGsymUtil, b.llvmGsymUtilFound = chooseExe([]string{"llvm-gsymutil"}, []string{}, append(paths["llvm-gsymutil"], defaultPath...))
// TODO check if llvm-gsymutil is recent enough to support --addresses-from-stdin
}

// findObjdump finds and returns path to preferred objdump binary.
Expand Down Expand Up @@ -681,6 +690,7 @@ type fileAddr2Line struct {
file
addr2liner *addr2Liner
llvmSymbolizer *llvmSymbolizer
llvmGsymUtil *llvmGsymUtil
isData bool
}

Expand All @@ -690,6 +700,9 @@ func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) {
return nil, f.baseErr
}
f.once.Do(f.init)
if f.llvmGsymUtil != nil {
return f.llvmGsymUtil.addrInfo(addr)
}
if f.llvmSymbolizer != nil {
return f.llvmSymbolizer.addrInfo(addr)
}
Expand All @@ -700,6 +713,13 @@ func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) {
}

func (f *fileAddr2Line) init() {
if _, err := os.Stat(f.name + ".gsym"); err == nil {
if llvmGsymUtil, err := newLLVMGsymUtil(f.b.llvmGsymUtil, f.name, f.base, f.isData); err == nil {
f.llvmGsymUtil = llvmGsymUtil
return
}
}

if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base, f.isData); err == nil {
f.llvmSymbolizer = llvmSymbolizer
return
Expand All @@ -711,13 +731,18 @@ func (f *fileAddr2Line) init() {
// When addr2line encounters some gcc compiled binaries, it
// drops interesting parts of names in anonymous namespaces.
// Fallback to NM for better function names.
// This seems to have been fixed in binutils 2.26 though, see
// https://sourceware.org/bugzilla/show_bug.cgi?id=17541
if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil {
f.addr2liner.nm = nm
}
}
}

func (f *fileAddr2Line) Close() error {
if f.llvmGsymUtil != nil {
f.llvmGsymUtil = nil
}
if f.llvmSymbolizer != nil {
f.llvmSymbolizer.rw.close()
f.llvmSymbolizer = nil
Expand Down
Binary file added internal/binutils/testdata/exe_linux_64.gsym
Binary file not shown.
2 changes: 1 addition & 1 deletion internal/plugin/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ type Inst struct {

// An ObjFile is a single object file: a shared library or executable.
type ObjFile interface {
// Name returns the underlyinf file name, if available
// Name returns the underlying file name, if available
Name() string

// ObjAddr returns the objdump (linker) address corresponding to a runtime
Expand Down
5 changes: 5 additions & 0 deletions internal/symbolizer/symbolizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,11 @@ func doLocalSymbolize(prof *profile.Profile, fast, force bool, obj plugin.ObjToo

stack, err := segment.SourceLine(l.Address)
if err != nil || len(stack) == 0 {

if err != nil {
fmt.Println(err.Error())
}

// No answers from addr2line.
continue
}
Expand Down