diff --git a/.dockerignore b/.dockerignore index f49ff5b7a1..55435ca03f 100644 --- a/.dockerignore +++ b/.dockerignore @@ -11,3 +11,10 @@ benchmark/docker-compose.yml benchmark/grafana-* benchmark/node_modules benchmark/*.env + +third_party/bcc/lib +third_party/bcc/src +third_party/libbpf/lib +third_party/libbpf/src +pkg/agent/ebpfspy/bpf/profile.bpf.o +pkg/agent/ebpfspy/bpf/vmlinux.h diff --git a/.github/workflows/lint-go.yml b/.github/workflows/lint-go.yml index 0380d9c34e..db9ceb4c83 100644 --- a/.github/workflows/lint-go.yml +++ b/.github/workflows/lint-go.yml @@ -13,7 +13,7 @@ jobs: - name: Checkout uses: actions/checkout@v2 - name: Run revive action - uses: petethepig/revive-action@v5 + uses: korniltsev/revive-action@v6 with: config: revive.toml # same as in the `lint` rule of Makefile diff --git a/Dockerfile b/Dockerfile index e512209453..6d6f12414d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -68,6 +68,25 @@ ARG EXTRA_METADATA="" RUN EXTRA_METADATA=$EXTRA_METADATA make assets-release + + +# _ __ +# | | / _| +# ___| |__ _ __ | |_ +# / _ \ '_ \| '_ \| _| +# | __/ |_) | |_) | | +# \___|_.__/| .__/|_| +# | | +# |_| +FROM alpine:3.12 as ebpf-builder +RUN apk add cmake make binutils gcc g++ clang musl-dev linux-headers zlib-dev elfutils-dev libelf-static zlib-static git openssh +ADD third_party/libbpf/Makefile /build/libbpf/ +RUN make -C /build/libbpf/ +ADD third_party/bcc/Makefile /build/bcc/ +RUN make -C /build/bcc/ +ADD pkg/agent/ebpfspy/bpf/Makefile pkg/agent/ebpfspy/bpf/profile.bpf.c pkg/agent/ebpfspy/bpf/profile.bpf.h /build/profile.bpf/ +RUN CFLAGS=-I/build/libbpf/lib/include make -C /build/profile.bpf + # _ # | | # __ _ ___ | | __ _ _ __ __ _ @@ -84,8 +103,8 @@ RUN EXTRA_METADATA=$EXTRA_METADATA make assets-release # see https://github.com/pyroscope-io/pyroscope/pull/372 for more context FROM pyroscope/golang:1.18.0-alpine3.12 AS go-builder - -RUN apk add --no-cache make git zstd gcc g++ libc-dev musl-dev bash +RUN apk add --no-cache make git zstd gcc g++ libc-dev musl-dev bash zlib-dev elfutils-dev libelf-static zlib-static \ + linux-headers RUN apk upgrade binutils RUN apk upgrade elfutils @@ -98,6 +117,9 @@ COPY third_party/rustdeps/pyspy.h /opt/pyroscope/third_party/rustdeps/pyspy.h COPY third_party/phpspy/phpspy.h /opt/pyroscope/third_party/phpspy/phpspy.h COPY --from=phpspy-builder /var/www/html/third_party/phpspy/libphpspy.a /opt/pyroscope/third_party/phpspy/libphpspy.a COPY --from=js-builder /opt/pyroscope/webapp/public ./webapp/public +COPY --from=ebpf-builder /build/bcc/lib third_party/bcc/lib +COPY --from=ebpf-builder /build/libbpf/lib third_party/libbpf/lib +COPY --from=ebpf-builder /build/profile.bpf/profile.bpf.o pkg/agent/ebpfspy/bpf/profile.bpf.o COPY Makefile ./ COPY tools ./tools COPY go.mod go.sum ./ @@ -159,7 +181,7 @@ LABEL maintainer="Pyroscope team " WORKDIR /var/lib/pyroscope RUN apk add --no-cache ca-certificates bash tzdata openssl musl-utils -RUN apk add --no-cache bcc-tools python3 + RUN ln -s $(which python3) /usr/bin/python RUN addgroup -S pyroscope && adduser -S pyroscope -G pyroscope diff --git a/Makefile b/Makefile index c36c350694..6875faec5c 100644 --- a/Makefile +++ b/Makefile @@ -31,9 +31,21 @@ else endif EXTRA_GO_TAGS ?= +CGO_CFLAGS ?= +CGO_LDFLAGS ?= +EXTRA_CGO_CFLAGS ?= +EXTRA_CGO_LDFLAGS ?= GO_TAGS = $(ENABLED_SPIES)$(EXTRA_GO_TAGS) ALPINE_TAG = +ifneq (,$(findstring ebpfspy,$(GO_TAGS))) + EXTRA_CGO_CFLAGS := $(EXTRA_CGO_CFLAGS) -I$(abspath ./third_party/libbpf/lib/include) \ + -I$(abspath ./third_party/bcc/lib/include) + EXTRA_CGO_LDFLAGS := $(EXTRA_CGO_LDFLAGS) -L$(abspath ./third_party/libbpf/lib/lib64) -lbpf \ + -L$(abspath ./third_party/bcc/lib/lib) -lbcc-syms -lstdc++ -lelf -lz + THIRD_PARTY_DEPENDENCIES := $(THIRD_PARTY_DEPENDENCIES) build-profile-bpf build-bcc build-libbpf +endif + ifeq ("$(OS)", "Linux") ifeq ("$(shell cat /etc/os-release | grep ^ID=)", "ID=alpine") RUST_TARGET ?= "$(ARCH)-unknown-linux-musl" @@ -56,7 +68,7 @@ else endif EMBEDDED_ASSETS_DEPS ?= "assets-release" -EXTRA_LDFLAGS ?= "" +EXTRA_LDFLAGS ?= ifndef $(GOPATH) GOPATH=$(shell go env GOPATH || true) @@ -106,7 +118,9 @@ install-go-dependencies: ## installs golang dependencies .PHONY: build build: ## Builds the binary - $(GOBUILD) -tags "$(GO_TAGS)" -ldflags "$(EXTRA_LDFLAGS) $(shell scripts/generate-build-flags.sh)" -o ./bin/pyroscope ./cmd/pyroscope + CGO_CFLAGS="$(CGO_CFLAGS) $(EXTRA_CGO_CFLAGS)" \ + CGO_LDFLAGS="$(CGO_LDFLAGS) $(EXTRA_CGO_LDFLAGS)" \ + $(GOBUILD) -tags "$(GO_TAGS)" -ldflags "$(EXTRA_LDFLAGS) $(shell scripts/generate-build-flags.sh)" -o ./bin/pyroscope ./cmd/pyroscope .PHONY: build-release build-release: embedded-assets ## Builds the release build @@ -136,6 +150,19 @@ build-phpspy-dependencies: ## Builds the PHP dependency cd third_party/phpspy_src && USE_ZEND=1 make CFLAGS="-DUSE_DIRECT" || $(MAKE) print-deps-error-message cp third_party/phpspy_src/libphpspy.a third_party/phpspy/libphpspy.a +.PHONY: build-libbpf +build-libbpf: + $(MAKE) -C third_party/libbpf + +.PHONY: build-bcc +build-bcc: + $(MAKE) -C third_party/bcc + +.PHONY: build-profile-bpf +build-profile-bpf: build-libbpf + CFLAGS="-I$(abspath ./third_party/libbpf/lib/include)" $(MAKE) -C pkg/agent/ebpfspy/bpf + + .PHONY: build-third-party-dependencies build-third-party-dependencies: $(shell echo $(THIRD_PARTY_DEPENDENCIES)) ## Builds third party dep @@ -236,6 +263,9 @@ go-deps-graph: ## Generate the deps graph .PHONY: clean clean: ## Clean up storage rm -rf tmp/pyroscope-storage + $(MAKE) -C third_party/bcc clean + $(MAKE) -C third_party/libbpf clean + $(MAKE) -C pkg/agent/ebpfspy/bpf clean .PHONY: update-contributors update-contributors: ## Update the contributors diff --git a/cmd/pyroscope/command/ebpf.go b/cmd/pyroscope/command/ebpf.go new file mode 100644 index 0000000000..cbfca8ac7d --- /dev/null +++ b/cmd/pyroscope/command/ebpf.go @@ -0,0 +1,27 @@ +//go:build ebpfspy + +package command + +import ( + "github.com/spf13/cobra" + + "github.com/pyroscope-io/pyroscope/pkg/cli" + "github.com/pyroscope-io/pyroscope/pkg/config" + "github.com/pyroscope-io/pyroscope/pkg/exec" +) + +func newEBPFSpyCmd(cfg *config.EBPF) *cobra.Command { + vpr := newViper() + connectCmd := &cobra.Command{ + Use: "ebpf [flags]", + Short: "Profile whole system using eBPF sampling profiler", + Args: cobra.NoArgs, + + RunE: cli.CreateCmdRunFn(cfg, vpr, func(_ *cobra.Command, _ []string) error { + return exec.RunEBPF(cfg) + }), + } + + cli.PopulateFlagSet(cfg, connectCmd.Flags(), vpr) + return connectCmd +} diff --git a/cmd/pyroscope/command/ebpf_stub.go b/cmd/pyroscope/command/ebpf_stub.go new file mode 100644 index 0000000000..cff2fd757a --- /dev/null +++ b/cmd/pyroscope/command/ebpf_stub.go @@ -0,0 +1,13 @@ +//go:build !ebpfspy + +package command + +import ( + "github.com/spf13/cobra" + + "github.com/pyroscope-io/pyroscope/pkg/config" +) + +func newEBPFSpyCmd(_ *config.EBPF) *cobra.Command { + return nil +} diff --git a/cmd/pyroscope/command/root.go b/cmd/pyroscope/command/root.go index b1aaecc1fb..18ef90457c 100644 --- a/cmd/pyroscope/command/root.go +++ b/cmd/pyroscope/command/root.go @@ -21,6 +21,7 @@ func Execute() error { newAdminCmd(&cfg.Admin), newAgentCmd(&cfg.Agent), newConnectCmd(&cfg.Connect), + newEBPFSpyCmd(&cfg.EBPF), newConvertCmd(&cfg.Convert), newDbManagerCmd(&config.CombinedDbManager{DbManager: &cfg.DbManager, Server: &cfg.Server}), newExecCmd(&cfg.Exec), @@ -29,6 +30,9 @@ func Execute() error { } for _, c := range subcommands { + if c == nil { + continue + } addHelpSubcommand(c) c.HasHelpSubCommands() rootCmd.AddCommand(c) diff --git a/go.mod b/go.mod index 517c987de0..df351a6f5b 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/pyroscope-io/pyroscope go 1.18 require ( + github.com/aquasecurity/libbpfgo v0.3.0-libbpf-0.8.0 github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d github.com/aws/aws-sdk-go v1.44.37 github.com/aybabtme/rgbterm v0.0.0-20170906152045-cc83f3b3ce59 diff --git a/go.sum b/go.sum index a470b6d3d3..3f9f2da1a5 100644 --- a/go.sum +++ b/go.sum @@ -83,6 +83,8 @@ github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= +github.com/aquasecurity/libbpfgo v0.3.0-libbpf-0.8.0 h1:NQEf484vQOshZwZOLTE7kzo62TvYrM906gUjlVg4D2k= +github.com/aquasecurity/libbpfgo v0.3.0-libbpf-0.8.0/go.mod h1:qu0TVGRvtNMFkuKLscJkY1FwmageNBLqeImAFslqPPc= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= @@ -1015,6 +1017,7 @@ golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/pkg/agent/ebpfspy/bpf/.gitignore b/pkg/agent/ebpfspy/bpf/.gitignore new file mode 100644 index 0000000000..2a9ccb0c3e --- /dev/null +++ b/pkg/agent/ebpfspy/bpf/.gitignore @@ -0,0 +1,5 @@ +libbpf* +bcc-src +libs/ +profile.bpf.o +vmlinux.h diff --git a/pkg/agent/ebpfspy/bpf/Makefile b/pkg/agent/ebpfspy/bpf/Makefile new file mode 100644 index 0000000000..9e9a40bad1 --- /dev/null +++ b/pkg/agent/ebpfspy/bpf/Makefile @@ -0,0 +1,14 @@ +CLANG ?= clang + +CFLAGS := $(CFLAGS) -ggdb -gdwarf -O2 -Wall -fpie -Wno-unused-variable -Wno-unused-function + + +profile.bpf.o: profile.bpf.c profile.bpf.h vmlinux.h + $(CLANG) $(CFLAGS) -target bpf -D__TARGET_ARCH_x86 -I. -c profile.bpf.c -o $@ + +vmlinux.h: + wget https://raw.githubusercontent.com/iovisor/bcc/798a1056903b57687fd9d30a43c64c7a4402934c/libbpf-tools/x86/vmlinux_518.h -O $@ + +.PHONY: clean +clean: + rm -rf profile.bpf.o vmlinux.h diff --git a/pkg/agent/ebpfspy/bpf/profile.bpf.c b/pkg/agent/ebpfspy/bpf/profile.bpf.c new file mode 100644 index 0000000000..3acf7ef9be --- /dev/null +++ b/pkg/agent/ebpfspy/bpf/profile.bpf.c @@ -0,0 +1,65 @@ +#include "vmlinux.h" +#include +#include +#include "profile.bpf.h" + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct profile_key_t); + __type(value, u32); + __uint(max_entries, PROFILE_MAPS_SIZE); +} counts SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(key_size, sizeof(u32)); + __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64)); + __uint(max_entries, PROFILE_MAPS_SIZE); +} stacks SEC(".maps"); + + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, struct profile_bss_args_t); + __uint(max_entries, 1); +} args SEC(".maps"); + +#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) +#define USER_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK) + + + +SEC("perf_event") +int do_perf_event(struct bpf_perf_event_data *ctx) +{ + u64 id = bpf_get_current_pid_tgid(); + u32 tgid = id >> 32; + u32 pid = id; + struct profile_key_t key = { .pid = tgid }; + u32 *val, one = 1, zero = 0; + struct profile_bss_args_t *arg = bpf_map_lookup_elem(&args, &zero); + if (!arg) { + return 0; + } + if (pid == 0) { + return 0; + } + if (arg->tgid_filter != 0 && tgid != arg->tgid_filter) { + return 0; + } + + bpf_get_current_comm(&key.comm, sizeof(key.comm)); + + key.kern_stack = bpf_get_stackid(ctx, &stacks, KERN_STACKID_FLAGS); + key.user_stack = bpf_get_stackid(ctx, &stacks, USER_STACKID_FLAGS); + + val = bpf_map_lookup_elem(&counts, &key); + if (val) + (*val)++; + else + bpf_map_update_elem(&counts, &key, &one, BPF_NOEXIST); + return 0; +} + +char _license[] SEC("license") = "GPL"; //todo diff --git a/pkg/agent/ebpfspy/bpf/profile.bpf.h b/pkg/agent/ebpfspy/bpf/profile.bpf.h new file mode 100644 index 0000000000..41931e6708 --- /dev/null +++ b/pkg/agent/ebpfspy/bpf/profile.bpf.h @@ -0,0 +1,15 @@ + +#define PERF_MAX_STACK_DEPTH 127 +#define PROFILE_MAPS_SIZE 16384 + + +struct profile_key_t { + __u32 pid; + __s64 kern_stack; + __s64 user_stack; + char comm[16]; +}; + +struct profile_bss_args_t { + __u32 tgid_filter; // 0 => profile everything +}; diff --git a/pkg/agent/ebpfspy/cpuonline/cpuonline.go b/pkg/agent/ebpfspy/cpuonline/cpuonline.go new file mode 100644 index 0000000000..9d6f4c6fcc --- /dev/null +++ b/pkg/agent/ebpfspy/cpuonline/cpuonline.go @@ -0,0 +1,43 @@ +package cpuonline + +import ( + "io/ioutil" + "strconv" + "strings" +) + +const cpuOnline = "/sys/devices/system/cpu/online" + +// Get returns a slice with the online CPUs, for example `[0, 2, 3]` +func Get() ([]uint, error) { + buf, err := ioutil.ReadFile(cpuOnline) + if err != nil { + return nil, err + } + return ReadCPURange(string(buf)) +} + +// loosely based on https://github.com/iovisor/bcc/blob/v0.3.0/src/python/bcc/utils.py#L15 +func ReadCPURange(cpuRangeStr string) ([]uint, error) { + var cpus []uint + cpuRangeStr = strings.Trim(cpuRangeStr, "\n ") + for _, cpuRange := range strings.Split(cpuRangeStr, ",") { + rangeOp := strings.SplitN(cpuRange, "-", 2) + first, err := strconv.ParseUint(rangeOp[0], 10, 32) + if err != nil { + return nil, err + } + if len(rangeOp) == 1 { + cpus = append(cpus, uint(first)) + continue + } + last, err := strconv.ParseUint(rangeOp[1], 10, 32) + if err != nil { + return nil, err + } + for n := first; n <= last; n++ { + cpus = append(cpus, uint(n)) + } + } + return cpus, nil +} diff --git a/pkg/agent/ebpfspy/ebpfspy_linux.go b/pkg/agent/ebpfspy/ebpfspy_linux.go index c06d6eb31b..58bcb08646 100644 --- a/pkg/agent/ebpfspy/ebpfspy_linux.go +++ b/pkg/agent/ebpfspy/ebpfspy_linux.go @@ -1,69 +1,71 @@ //go:build ebpfspy -// +build ebpfspy -// Package ebpfspy provides integration with Linux eBPF. It calls profile.py from BCC tools: -// https://github.com/iovisor/bcc/blob/master/tools/profile.py -// TODO: At some point we might extract the part that starts another process because it has good potential to be reused by similar profiling tools. package ebpfspy import ( - "sync" - + "github.com/pyroscope-io/pyroscope/pkg/agent/ebpfspy/sd" "github.com/pyroscope-io/pyroscope/pkg/agent/spy" + "sync" ) type EbpfSpy struct { - resetMutex sync.Mutex - reset bool - stop bool + mutex sync.Mutex + reset bool + stop bool + stopCh chan struct{} - profilingSession *session + session *Session +} - stopCh chan struct{} +func NewEBPFSpy(s *Session) *EbpfSpy { + return &EbpfSpy{ + session: s, + stopCh: make(chan struct{}), + } } -func Start(pid int, _ spy.ProfileType, _ uint32, _ bool) (spy.Spy, error) { - s := newSession(pid) +func Start(pid int, _ spy.ProfileType, sampleRate uint32, _ bool) (spy.Spy, error) { + s := NewSession(pid, sampleRate, 256, sd.NoopServiceDiscovery{}, false) err := s.Start() if err != nil { return nil, err } - return &EbpfSpy{ - profilingSession: s, - stopCh: make(chan struct{}), - }, nil -} - -func (s *EbpfSpy) Stop() error { - s.stop = true - <-s.stopCh - return nil + return NewEBPFSpy(s), nil } func (s *EbpfSpy) Snapshot(cb func(*spy.Labels, []byte, uint64) error) error { - s.resetMutex.Lock() - defer s.resetMutex.Unlock() + s.mutex.Lock() + defer s.mutex.Unlock() if !s.reset { return nil } s.reset = false - err := s.profilingSession.Reset(func(name []byte, v uint64) error { - return cb(nil, name, v) + + err := s.session.Reset(func(labels *spy.Labels, name []byte, v uint64, pid uint32) error { + return cb(labels, name, v) }) if s.stop { + s.session.Stop() s.stopCh <- struct{}{} - s.profilingSession.Stop() } return err } +func (s *EbpfSpy) Stop() error { + s.mutex.Lock() + s.stop = true + s.mutex.Unlock() + <-s.stopCh + return nil +} + func (s *EbpfSpy) Reset() { - s.resetMutex.Lock() - defer s.resetMutex.Unlock() + s.mutex.Lock() + defer s.mutex.Unlock() s.reset = true } diff --git a/pkg/agent/ebpfspy/sd/k8s.go b/pkg/agent/ebpfspy/sd/k8s.go new file mode 100644 index 0000000000..50cbff151e --- /dev/null +++ b/pkg/agent/ebpfspy/sd/k8s.go @@ -0,0 +1,163 @@ +package sd + +import ( + "bufio" + "context" + "fmt" + "github.com/pyroscope-io/pyroscope/pkg/agent/spy" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "os" + "regexp" + "strings" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +type K8SServiceDiscovery struct { + cs *kubernetes.Clientset + nodeName string + containerID2Labels map[string]*spy.Labels + pid2Labels map[uint32]*spy.Labels +} + +var knownContainerIDPrefixes = []string{"docker://", "containerd://"} +var knownRuntimes = []string{"docker://", "containerd://"} + +func NewK8ServiceDiscovery(ctx context.Context, nodeName string) (ServiceDiscovery, error) { + config, err := rest.InClusterConfig() + if err != nil { + return nil, err + } + + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, err + } + + node, err := clientset.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) + if err != nil { + return nil, err + } + criVersion := node.Status.NodeInfo.ContainerRuntimeVersion + + if !isKnownContainerRuntime(criVersion) { + return nil, fmt.Errorf("unknown cri %s", criVersion) + } + + return &K8SServiceDiscovery{ + cs: clientset, + nodeName: nodeName, + containerID2Labels: map[string]*spy.Labels{}, + pid2Labels: map[uint32]*spy.Labels{}, + }, nil +} + +func (sd *K8SServiceDiscovery) Refresh(ctx context.Context) error { + sd.containerID2Labels = map[string]*spy.Labels{} + sd.pid2Labels = map[uint32]*spy.Labels{} + pods, err := sd.cs.CoreV1().Pods("").List(ctx, metav1.ListOptions{ + FieldSelector: fields.OneTermEqualSelector("spec.nodeName", sd.nodeName).String(), + }) + if err != nil { + return err + } + + for _, pod := range pods.Items { + for _, status := range pod.Status.ContainerStatuses { + cid, err := getContainerIDFromK8S(status.ContainerID) + if err != nil { + return err + } + ls := spy.NewLabels() + ls.Set("k8s_node", sd.nodeName) + ls.Set("k8s_pod_name", pod.Name) + ls.Set("k8s_pod_namespace", pod.Namespace) + ls.Set("k8s_container_id", cid) + ls.Set("k8s_container_name", status.Name) + if v, ok := pod.Labels["app.kubernetes.io/name"]; ok { + ls.Set("k8s_app_name", v) + } + if v, ok := pod.Labels["app.kubernetes.io/version"]; ok { + ls.Set("k8s_app_version", v) + } + sd.containerID2Labels[cid] = ls + } + } + return nil +} + +func (sd *K8SServiceDiscovery) GetLabels(pid uint32) *spy.Labels { + ls, ok := sd.pid2Labels[pid] + if ok { + return ls + } + cid := getContainerIDFromPID(pid) + + if cid == "" { + sd.pid2Labels[pid] = nil + return nil + } + ls, ok = sd.containerID2Labels[cid] + sd.pid2Labels[pid] = ls + return ls +} + +func isKnownContainerRuntime(criVersion string) bool { + for _, runtime := range knownRuntimes { + if strings.HasPrefix(criVersion, runtime) { + return true + } + } + return false +} + +func getContainerIDFromK8S(k8sContainerID string) (string, error) { + for _, p := range knownContainerIDPrefixes { + if strings.HasPrefix(k8sContainerID, p) { + return strings.TrimPrefix(k8sContainerID, p), nil + } + } + return "", fmt.Errorf("unknown container id %s", k8sContainerID) +} + +func getContainerIDFromPID(pid uint32) string { + f, err := os.Open(fmt.Sprintf("/proc/%d/cgroup", pid)) + if err != nil { + return "" + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Text() + cid := getContainerIDFromCGroup(line) + if cid != "" { + return cid + } + } + return "" +} + +func getContainerIDFromCGroup(line string) string { + parts := dockerPattern.FindStringSubmatch(line) + if parts != nil { + return parts[1] + } + parts = kubePattern.FindStringSubmatch(line) + if parts != nil { + return parts[1] + } + parts = cgroupV2ScopePattern.FindStringSubmatch(line) + if parts != nil { + return parts[1] + } + return "" +} + +var ( + kubePattern = regexp.MustCompile(`\d+:.+:/kubepods/[^/]+/pod[^/]+/([0-9a-f]{64})`) + dockerPattern = regexp.MustCompile(`\d+:.+:/docker/pod[^/]+/([0-9a-f]{64})`) + cgroupV2ScopePattern = regexp.MustCompile(`^0::.*/(?:docker-|cri-containerd-)([0-9a-f]{64})\.scope$`) +) diff --git a/pkg/agent/ebpfspy/sd/k8s_test.go b/pkg/agent/ebpfspy/sd/k8s_test.go new file mode 100644 index 0000000000..4e71489ad9 --- /dev/null +++ b/pkg/agent/ebpfspy/sd/k8s_test.go @@ -0,0 +1,34 @@ +package sd + +import "testing" + +func TestKubePodsCgroupsV1(t *testing.T) { + cg := "11:devices:/kubepods/besteffort/pod85adbef3-622f-4ef2-8f60-a8bdf3eb6c72/" + + "7edda1de1e0d1d366351e478359cf5fa16bb8ab53063a99bb119e56971bfb7e2" + cid := getContainerIDFromCGroup(cg) + expected := "7edda1de1e0d1d366351e478359cf5fa16bb8ab53063a99bb119e56971bfb7e2" + if cid != expected { + t.Fatalf("wrong cid %s != %s", cid, expected) + } +} + +func TestContainerdCgroupsV2(t *testing.T) { + cg := "0::/kubepods.slice/kubepods-burstable.slice/" + + "kubepods-burstable-podf9a04ecc_1875_491b_926c_d2f64757704e.slice/" + + "cri-containerd-47e320f795efcec1ecf2001c3a09c95e3701ed87de8256837b70b10e23818251.scope" + cid := getContainerIDFromCGroup(cg) + expected := "47e320f795efcec1ecf2001c3a09c95e3701ed87de8256837b70b10e23818251" + if cid != expected { + t.Fatalf("wrong cid %s != %s", cid, expected) + } +} + +func TestDockerCgroupsV2(t *testing.T) { + cg := "0::/../../kubepods-besteffort-pod88f6f4e3_59c0_4ce8_9ecf_391c8b5a60ad.slice/" + + "docker-656959d9ee87a0b131c601ce9d9f8f76b1dda60e8608c503b5979d849cbdc714.scope" + cid := getContainerIDFromCGroup(cg) + expected := "656959d9ee87a0b131c601ce9d9f8f76b1dda60e8608c503b5979d849cbdc714" + if cid != expected { + t.Fatalf("wrong cid %s != %s", cid, expected) + } +} diff --git a/pkg/agent/ebpfspy/sd/sd.go b/pkg/agent/ebpfspy/sd/sd.go new file mode 100644 index 0000000000..9dfec93ae3 --- /dev/null +++ b/pkg/agent/ebpfspy/sd/sd.go @@ -0,0 +1,25 @@ +package sd + +import ( + "context" + "github.com/pyroscope-io/pyroscope/pkg/agent/spy" +) + +type ServiceDiscovery interface { + // Refresh called every 10s before session reset + Refresh(ctx context.Context) error + + // GetLabels may return nil + GetLabels(pid uint32) *spy.Labels +} + +type NoopServiceDiscovery struct { +} + +func (NoopServiceDiscovery) Refresh(_ context.Context) error { + return nil +} + +func (NoopServiceDiscovery) GetLabels(_ uint32) *spy.Labels { + return nil +} diff --git a/pkg/agent/ebpfspy/session_linux.go b/pkg/agent/ebpfspy/session_linux.go index ad43380b03..4dc73ba822 100644 --- a/pkg/agent/ebpfspy/session_linux.go +++ b/pkg/agent/ebpfspy/session_linux.go @@ -1,143 +1,291 @@ //go:build ebpfspy -// +build ebpfspy -// Package ebpfspy provides integration with Linux eBPF. +// Package ebpfspy provides integration with Linux eBPF. It is a rough copy of profile.py from BCC tools: +// +// https://github.com/iovisor/bcc/blob/master/tools/profile.py package ebpfspy +import "C" import ( + "bytes" + "context" + _ "embed" + "encoding/binary" "fmt" - "io" - "os/exec" - "strconv" - "strings" + "github.com/pyroscope-io/pyroscope/pkg/agent/ebpfspy/cpuonline" + "github.com/pyroscope-io/pyroscope/pkg/agent/ebpfspy/sd" + "github.com/pyroscope-io/pyroscope/pkg/agent/spy" + "golang.org/x/sys/unix" "sync" "syscall" + "unsafe" - "github.com/hashicorp/go-multierror" - - "github.com/pyroscope-io/pyroscope/pkg/convert" - "github.com/pyroscope-io/pyroscope/pkg/util/file" + bpf "github.com/aquasecurity/libbpfgo" ) -type line struct { - name []byte - val int -} +//#cgo CFLAGS: -I./bpf/ +//#include +//#include "profile.bpf.h" +import "C" -type session struct { - pid int +type Session struct { + pid int + sampleRate uint32 + symbolCacheSize int + serviceDiscovery sd.ServiceDiscovery + onlyServices bool - cmd *exec.Cmd - ch chan line + perfEventFds []int - stopMutex sync.Mutex - stop bool + symCache *symbolCache - stderr io.ReadCloser -} + module *bpf.Module + mapCounts *bpf.BPFMap + mapStacks *bpf.BPFMap + mapArgs *bpf.BPFMap + prog *bpf.BPFProg + link *bpf.BPFLink -const helpURL = "https://github.com/iovisor/bcc/blob/master/INSTALL.md" + modMutex sync.Mutex -var possibleCommandLocations = []string{ - "/usr/sbin/profile-bpfcc", // debian: https://github.com/pyroscope-io/pyroscope/issues/114 - "/usr/share/bcc/tools/profile", + roundNumber int } -// TODO: make these configurable -var commandArgs = []string{"-F", "100", "-f", "11"} +const btf = "should not be used" // canary to detect we got relocations -func newSession(pid int) *session { - return &session{pid: pid} -} - -func findSuitableExecutable() (string, error) { - for _, str := range possibleCommandLocations { - if file.Exists(str) { - return str, nil - } +func NewSession(pid int, sampleRate uint32, symbolCacheSize int, serviceDiscovery sd.ServiceDiscovery, onlyServices bool) *Session { + return &Session{ + pid: pid, + sampleRate: sampleRate, + symbolCacheSize: symbolCacheSize, + serviceDiscovery: serviceDiscovery, + onlyServices: onlyServices, } - return "", fmt.Errorf("Could not find profile.py at %s. Visit %s for instructions on how to install it", strings.Join(possibleCommandLocations, ", "), helpURL) } -func (s *session) Start() error { - command, err := findSuitableExecutable() - if err != nil { +func (s *Session) Start() error { + var err error + if err = unix.Setrlimit(unix.RLIMIT_MEMLOCK, &unix.Rlimit{ + Cur: unix.RLIM_INFINITY, + Max: unix.RLIM_INFINITY, + }); err != nil { return err } - args := commandArgs - if s.pid != -1 { - args = append(commandArgs, "-p", strconv.Itoa(s.pid)) - } + s.modMutex.Lock() + defer s.modMutex.Unlock() - s.cmd = exec.Command(command, args...) - stdout, err := s.cmd.StdoutPipe() - if err != nil { + if s.symCache, err = newSymbolCache(s.symbolCacheSize); err != nil { return err } - s.stderr, err = s.cmd.StderrPipe() - if err != nil { + args := bpf.NewModuleArgs{BPFObjBuff: profileBpf, + BTFObjPath: btf} + if s.module, err = bpf.NewModuleFromBufferArgs(args); err != nil { return err } + if err = s.module.BPFLoadObject(); err != nil { + return err + } + if s.prog, err = s.module.GetProgram("do_perf_event"); err != nil { + return err + } + if err = s.findMaps(); err != nil { + return err + } + if err = s.initArgs(); err != nil { + return err + } + if err = s.attachPerfEvent(); err != nil { + return err + } + return nil +} + +func (s *Session) Reset(cb func(labels *spy.Labels, name []byte, value uint64, pid uint32) error) error { + s.modMutex.Lock() + defer s.modMutex.Unlock() - s.ch = make(chan line) + s.roundNumber += 1 + refreshResult := make(chan error) go func() { - convert.ParseGroups(stdout, func(name []byte, val int) { - s.ch <- line{ - name: name, - val: val, - } - }) - stdout.Close() - close(s.ch) + refreshResult <- s.serviceDiscovery.Refresh(context.TODO()) }() - err = s.cmd.Start() - return err -} + keys, values, batch, err := s.getCountsMapValues() + if err != nil { + return err + } -func (s *session) Reset(cb func([]byte, uint64) error) error { - var errs error - s.cmd.Process.Signal(syscall.SIGINT) - type stderrRes struct { - bs []byte - err error + err = <-refreshResult + if err != nil { + return err } - stderrCh := make(chan stderrRes) - go func() { - bs, err := io.ReadAll(s.stderr) - stderrCh <- stderrRes{bs, err} - }() - for v := range s.ch { - if err := cb(v.name, uint64(v.val)); err != nil { - errs = multierror.Append(errs, err) + + type sf struct { + pid uint32 + count uint32 + kStack []byte + uStack []byte + comm string + labels *spy.Labels + } + var sfs []sf + knownStacks := map[uint32]bool{} + for i, key := range keys { + ck := (*C.struct_profile_key_t)(unsafe.Pointer(&key[0])) + value := values[i] + + pid := uint32(ck.pid) + kStackID := int64(ck.kern_stack) + uStackID := int64(ck.user_stack) + count := binary.LittleEndian.Uint32(value) + var comm string = C.GoString(&ck.comm[0]) + if uStackID >= 0 { + knownStacks[uint32(uStackID)] = true + } + if kStackID >= 0 { + knownStacks[uint32(kStackID)] = true + } + labels := s.serviceDiscovery.GetLabels(pid) + if labels == nil && s.onlyServices { + fmt.Printf("skipping %d %s\n", pid, comm) + continue } + uStack := s.getStack(uStackID) + kStack := s.getStack(kStackID) + sfs = append(sfs, sf{pid: pid, uStack: uStack, kStack: kStack, count: count, comm: comm, labels: labels}) } - stderr := <-stderrCh - if stderr.err != nil { - errs = multierror.Append(errs, stderr.err) + for _, it := range sfs { + buf := bytes.NewBuffer(nil) + buf.Write([]byte(it.comm)) + buf.Write([]byte{';'}) + s.walkStack(buf, it.uStack, it.pid, true) + s.walkStack(buf, it.kStack, it.pid, false) + + err = cb(it.labels, buf.Bytes(), uint64(it.count), it.pid) + if err != nil { + return err + } } - if err := s.cmd.Wait(); err != nil { - errs = multierror.Append(errs, fmt.Errorf("%s: %w", string(stderr.bs), err)) + if err = s.clearCountsMap(keys, batch); err != nil { + return err + } + if err = s.clearStacksMap(knownStacks); err != nil { + return err } + return nil +} - s.stopMutex.Lock() - defer s.stopMutex.Unlock() +func (s *Session) Stop() { + s.symCache.clear() + for fd := range s.perfEventFds { + _ = syscall.Close(fd) + } + s.module.Close() +} - if s.stop { - return errs +func (s *Session) findMaps() error { + var err error + if s.mapArgs, err = s.module.GetMap("args"); err != nil { + return err } - if err := s.Start(); err != nil { - errs = multierror.Append(errs, err) + if s.mapCounts, err = s.module.GetMap("counts"); err != nil { + return err + } + if s.mapStacks, err = s.module.GetMap("stacks"); err != nil { + return err } - return errs + return nil +} +func (s *Session) initArgs() error { + var zero uint32 + var err error + var tgidFilter uint32 + if s.pid <= 0 { + tgidFilter = 0 + } else { + tgidFilter = uint32(s.pid) + } + args := C.struct_profile_bss_args_t{ + tgid_filter: C.uint(tgidFilter), + } + err = s.mapArgs.UpdateValueFlags(unsafe.Pointer(&zero), unsafe.Pointer(&args), 0) + if err != nil { + return err + } + return nil } -func (s *session) Stop() error { - s.stopMutex.Lock() - defer s.stopMutex.Unlock() - - s.stop = true +func (s *Session) attachPerfEvent() error { + var cpus []uint + var err error + if cpus, err = cpuonline.Get(); err != nil { + return err + } + for _, cpu := range cpus { + attr := unix.PerfEventAttr{ + Type: unix.PERF_TYPE_SOFTWARE, + Config: unix.PERF_COUNT_SW_CPU_CLOCK, + Bits: unix.PerfBitFreq, + Sample: uint64(s.sampleRate), + } + fd, err := unix.PerfEventOpen(&attr, -1, int(cpu), -1, unix.PERF_FLAG_FD_CLOEXEC) + if err != nil { + return err + } + s.perfEventFds = append(s.perfEventFds, fd) + if _, err = s.prog.AttachPerfEvent(fd); err != nil { + return err + } + } return nil } + +func (s *Session) getStack(stackId int64) []byte { + if stackId < 0 { + return nil + } + stackIdU32 := uint32(stackId) + key := unsafe.Pointer(&stackIdU32) + stack, err := s.mapStacks.GetValue(key) + if err != nil { + return nil + } + return stack + +} +func (s *Session) walkStack(line *bytes.Buffer, stack []byte, pid uint32, userspace bool) { + if len(stack) == 0 { + return + } + var stackFrames []string + for i := 0; i < 127; i++ { + it := stack[i*8 : i*8+8] + ip := binary.LittleEndian.Uint64(it) + if ip == 0 { + break + } + sym, _, _ := s.symCache.bccResolve(pid, ip, s.roundNumber) + if !userspace && sym == "" { + continue + } + if sym == "" { + sym = symbolUnknown + } + stackFrames = append(stackFrames, sym+";") + } + reverse(stackFrames) + for _, s := range stackFrames { + line.Write([]byte(s)) + } +} + +func reverse(s []string) { + for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 { + s[i], s[j] = s[j], s[i] + } +} + +//go:embed bpf/profile.bpf.o +var profileBpf []byte diff --git a/pkg/agent/ebpfspy/session_maps_linux.go b/pkg/agent/ebpfspy/session_maps_linux.go new file mode 100644 index 0000000000..80b9c471af --- /dev/null +++ b/pkg/agent/ebpfspy/session_maps_linux.go @@ -0,0 +1,109 @@ +//go:build ebpfspy +// +build ebpfspy + +// Package ebpfspy provides integration with Linux eBPF. It is a rough copy of profile.py from BCC tools: +// +// https://github.com/iovisor/bcc/blob/master/tools/profile.py +package ebpfspy + +import ( + "unsafe" +) + +//#cgo CFLAGS: -I./bpf/ +//#include +//#include "profile.bpf.h" +import "C" + +func (s *Session) getCountsMapValues() (keys [][]byte, values [][]byte, batch bool, err error) { + // try lookup_and_delete_batch + var ( + mapSize = C.PROFILE_MAPS_SIZE + keySize = int(unsafe.Sizeof(C.struct_profile_key_t{})) + allKeys = make([]byte, mapSize*keySize) + pKeys = unsafe.Pointer(&allKeys[0]) + nextKey = C.struct_profile_key_t{} + ) + values, err = s.mapCounts.GetValueAndDeleteBatch(pKeys, nil, unsafe.Pointer(&nextKey), uint32(mapSize)) + if len(values) > 0 { + keys = collectBatchValues(allKeys, len(values), keySize) + return keys, values, true, nil + } + + // batch failed or unsupported or just unlucky and got 0 stack-traces + // try iterating + it := s.mapCounts.Iterator() + for it.Next() { + key := it.Key() + v, err := s.mapCounts.GetValue(unsafe.Pointer(&key[0])) + if err != nil { + return nil, nil, false, err + } + keyCopy := make([]byte, len(key)) // The slice is valid only until the next call to Next. + copy(keyCopy, key) + keys = append(keys, keyCopy) + values = append(values, v) + } + return keys, values, false, nil +} + +func (s *Session) clearCountsMap(keys [][]byte, batch bool) error { + if len(keys) == 0 { + return nil + } + if batch { + // do nothing, already deleted with GetValueAndDeleteBatch in getCountsMapValues + return nil + } + for _, key := range keys { + err := s.mapCounts.DeleteKey(unsafe.Pointer(&key[0])) + if err != nil { + return err + } + } + return nil +} + +func (s *Session) clearStacksMap(knownKeys map[uint32]bool) error { + m := s.mapStacks + cnt := 0 + errs := 0 + if s.roundNumber%10 == 0 { + // do a full reset once in a while + it := m.Iterator() + var keys [][]byte + for it.Next() { + key := it.Key() + keyCopy := make([]byte, len(key)) // The slice is valid only until the next call to Next. + copy(keyCopy, key) + keys = append(keys, keyCopy) + } + for _, key := range keys { + if err := m.DeleteKey(unsafe.Pointer(&key[0])); err != nil { + errs += 1 + } else { + cnt += 1 + } + } + return nil + } + for stackId := range knownKeys { + k := stackId + if err := m.DeleteKey(unsafe.Pointer(&k)); err != nil { + errs += 1 + } else { + cnt += 1 + } + } + return nil +} + +func collectBatchValues(values []byte, count int, valueSize int) [][]byte { + var value []byte + var collected [][]byte + for i := 0; i < count*valueSize; i += valueSize { + value = values[i : i+valueSize] + collected = append(collected, value) + } + return collected +} diff --git a/pkg/agent/ebpfspy/symbols_linux.go b/pkg/agent/ebpfspy/symbols_linux.go new file mode 100644 index 0000000000..08041ce4fa --- /dev/null +++ b/pkg/agent/ebpfspy/symbols_linux.go @@ -0,0 +1,100 @@ +//go:build ebpfspy +// +build ebpfspy + +// Package ebpfspy provides integration with Linux eBPF. It is a rough copy of profile.py from BCC tools: +// +// https://github.com/iovisor/bcc/blob/master/tools/profile.py +package ebpfspy + +import ( + "debug/elf" + "github.com/pyroscope-io/pyroscope/pkg/util/genericlru" + "sync" + "unsafe" +) + +/* +#include "bcc_syms/bcc_syms.h" +*/ +import "C" + +const symbolUnknown = "[unknown]" + +type symbolCacheEntry struct { + cache unsafe.Pointer + roundNumber int +} +type pidKey uint32 + +type symbolCache struct { + pid2Cache *genericlru.GenericLRU[pidKey, symbolCacheEntry] + mutex sync.Mutex +} + +func newSymbolCache(cacheSize int) (*symbolCache, error) { + pid2Cache, err := genericlru.NewGenericLRU[pidKey, symbolCacheEntry](cacheSize, func(pid pidKey, e *symbolCacheEntry) { + C.bcc_free_symcache(e.cache, C.int(pid)) + }) + if err != nil { + return nil, err + } + return &symbolCache{ + pid2Cache: pid2Cache, + }, nil +} + +func (sc *symbolCache) bccResolve(pid uint32, addr uint64, roundNumber int) (string, uint64, string) { + symbol := C.struct_bcc_symbol{} + var symbolC = (*C.struct_bcc_symbol)(unsafe.Pointer(&symbol)) + e := sc.getOrCreateCacheEntry(pidKey(pid)) + staleCheck := false + if roundNumber != e.roundNumber { + e.roundNumber = roundNumber + staleCheck = true + } + var res C.int + if pid == 0 { + res = C.bcc_symcache_resolve_no_demangle(e.cache, C.ulong(addr), symbolC, C.bool(staleCheck)) + } else { + res = C.bcc_symcache_resolve(e.cache, C.ulong(addr), symbolC, C.bool(staleCheck)) + defer C.bcc_symbol_free_demangle_name(symbolC) + } + + if res < 0 { + if symbol.offset > 0 { + return "", uint64(symbol.offset), C.GoString(symbol.module) + } + return "", addr, "" + } + if pid == 0 { + return C.GoString(symbol.name), uint64(symbol.offset), C.GoString(symbol.module) + } else { + return C.GoString(symbol.demangle_name), uint64(symbol.offset), C.GoString(symbol.module) + } +} + +func (sc *symbolCache) getOrCreateCacheEntry(pid pidKey) *symbolCacheEntry { + sc.mutex.Lock() + defer sc.mutex.Unlock() + if cache, ok := sc.pid2Cache.Get(pid); ok { + return cache + } + pidC := C.int(pid) + if pid == 0 { + pidC = C.int(-1) // for KSyms + } + symbolOpt := C.struct_bcc_symbol_option{use_symbol_type: C.uint(1 << elf.STT_FUNC)} + symbolOptC := (*C.struct_bcc_symbol_option)(unsafe.Pointer(&symbolOpt)) + cache := C.bcc_symcache_new(pidC, symbolOptC) + e := &symbolCacheEntry{cache: cache} + sc.pid2Cache.Add(pid, e) + return e +} + +func (sc *symbolCache) clear() { + sc.mutex.Lock() + defer sc.mutex.Unlock() + for _, pid := range sc.pid2Cache.Keys() { + sc.pid2Cache.Remove(pid) + } +} diff --git a/pkg/agent/session.go b/pkg/agent/session.go index 003943ce6e..d9a8c9326a 100644 --- a/pkg/agent/session.go +++ b/pkg/agent/session.go @@ -67,6 +67,7 @@ type ProfileSession struct { disableGCRuns bool withSubprocesses bool clibIntegration bool + spyFactory SpyFactory noForkDetection bool pid int @@ -88,6 +89,8 @@ type ProfileSession struct { tries map[string][]*transporttrie.Trie } +type SpyFactory func(pid int) ([]spy.Spy, error) + type SessionConfig struct { upstream.Upstream Logger @@ -104,6 +107,10 @@ type SessionConfig struct { } func NewSession(c SessionConfig) (*ProfileSession, error) { + return NewSessionWithSpyFactory(c, NewGenericSpyFactory(c)) +} + +func NewSessionWithSpyFactory(c SessionConfig, spyFactory SpyFactory) (*ProfileSession, error) { appName, err := mergeTagsWithAppName(c.AppName, c.Tags) if err != nil { return nil, err @@ -124,6 +131,7 @@ func NewSession(c SessionConfig) (*ProfileSession, error) { clibIntegration: c.ClibIntegration, logger: c.Logger, throttler: throttle.New(errorThrottlerPeriod), + spyFactory: spyFactory, // string is appName, int is index in pids previousTries: make(map[string][]*transporttrie.Trie), @@ -135,6 +143,27 @@ func NewSession(c SessionConfig) (*ProfileSession, error) { return ps, nil } +func NewGenericSpyFactory(c SessionConfig) SpyFactory { + return func(pid int) ([]spy.Spy, error) { + var res []spy.Spy + + sf, err := spy.StartFunc(c.SpyName) + if err != nil { + return res, err + } + + for _, pt := range c.ProfilingTypes { + s, err := sf(pid, pt, c.SampleRate, c.DisableGCRuns) + + if err != nil { + return res, err + } + res = append(res, s) + } + return res, nil + } +} + func addSuffix(name string, ptype spy.ProfileType) (string, error) { k, err := segment.ParseKey(name) if err != nil { @@ -256,22 +285,7 @@ func (ps *ProfileSession) takeSnapshots() { } func (ps *ProfileSession) initializeSpies(pid int) ([]spy.Spy, error) { - res := []spy.Spy{} - - sf, err := spy.StartFunc(ps.spyName) - if err != nil { - return res, err - } - - for _, pt := range ps.profileTypes { - s, err := sf(pid, pt, ps.sampleRate, ps.disableGCRuns) - - if err != nil { - return res, err - } - res = append(res, s) - } - return res, nil + return ps.spyFactory(pid) } func (ps *ProfileSession) ChangeName(newName string) error { diff --git a/pkg/agent/spy/spy.go b/pkg/agent/spy/spy.go index 5116f85276..c0366d8d00 100644 --- a/pkg/agent/spy/spy.go +++ b/pkg/agent/spy/spy.go @@ -26,6 +26,7 @@ const ( ProfileAllocSpace ProfileType = "alloc_space" Go = "gospy" + EBPF = "ebpfspy" Python = "pyspy" Ruby = "rbspy" ) diff --git a/pkg/config/config.go b/pkg/config/config.go index 670e64f48f..fd68495ca0 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -24,6 +24,7 @@ type Config struct { Convert Convert `skip:"true" mapstructure:",squash"` Exec Exec `skip:"true" mapstructure:",squash"` Connect Connect `skip:"true" mapstructure:",squash"` + EBPF EBPF `skip:"true" mapstructure:",squash"` DbManager DbManager `skip:"true" mapstructure:",squash"` Admin Admin `skip:"true" mapstructure:",squash"` Adhoc Adhoc `skip:"true" mapstructure:",squash"` @@ -325,6 +326,29 @@ type Connect struct { Pid int `def:"0" desc:"PID of the process you want to profile. Pass -1 to profile the whole system (only supported by ebpfspy)" mapstructure:"pid"` } +type EBPF struct { + LogLevel string `def:"info" desc:"log level: debug|info|warn|error" mapstructure:"log-level"` + NoLogging bool `def:"false" desc:"disables logging from pyroscope" mapstructure:"no-logging"` + + // Spy configuration + ApplicationName string `def:"" desc:"application name used when uploading profiling data" mapstructure:"application-name"` + SampleRate uint `def:"100" desc:"sample rate for the profiler in Hz. 100 means reading 100 times per second" mapstructure:"sample-rate"` + + // Remote upstream configuration + ServerAddress string `def:"http://localhost:4040" desc:"address of the pyroscope server" mapstructure:"server-address"` + AuthToken string `def:"" desc:"authorization token used to upload profiling data" mapstructure:"auth-token"` + UpstreamThreads int `def:"4" desc:"number of upload threads" mapstructure:"upstream-threads"` + UpstreamRequestTimeout time.Duration `def:"10s" desc:"profile upload timeout" mapstructure:"upstream-request-timeout"` + + Tags map[string]string `name:"tag" def:"" desc:"tag in key=value form. The flag may be specified multiple times" mapstructure:"tags"` + + Pid int `def:"-1" desc:"PID of the process you want to profile. Pass -1 to profile the whole system" mapstructure:"pid"` + DetectSubprocesses bool `def:"false" desc:"makes pyroscope keep track of and profile subprocesses of the main process" mapstructure:"detect-subprocesses"` + SymbolCacheSize int `def:"256" desc:"max size of symbols cache (1 entry per process)" mapstructure:"symbol-cache-size"` + KubernetesNode string `def:"" desc:"Set to current k8s Node.nodeName for service discovery and labeling" mapstructure:"kubernetes-node"` + OnlyServices bool `def:"false" desc:"Ignore processes unknown to service discovery" mapstructure:"only-services"` +} + // TODO how to abstract this better? type Admin struct { AdminAppDelete AdminAppDelete `skip:"true" mapstructure:",squash"` diff --git a/pkg/exec/ebpf.go b/pkg/exec/ebpf.go new file mode 100644 index 0000000000..0495ae7b07 --- /dev/null +++ b/pkg/exec/ebpf.go @@ -0,0 +1,126 @@ +//go:build ebpfspy + +package exec + +import ( + "context" + "errors" + "fmt" + "github.com/pyroscope-io/pyroscope/pkg/agent/ebpfspy" + sd "github.com/pyroscope-io/pyroscope/pkg/agent/ebpfspy/sd" + "os" + "os/signal" + "syscall" + "time" + + "github.com/pyroscope-io/pyroscope/pkg/agent" + "github.com/pyroscope-io/pyroscope/pkg/agent/spy" + "github.com/pyroscope-io/pyroscope/pkg/agent/types" + "github.com/pyroscope-io/pyroscope/pkg/agent/upstream/remote" + "github.com/pyroscope-io/pyroscope/pkg/config" + "github.com/pyroscope-io/pyroscope/pkg/util/process" +) + +func RunEBPF(cfg *config.EBPF) error { + if cfg.Pid == -1 && cfg.DetectSubprocesses { + return fmt.Errorf("pid -1 can only be used without dectecting subprocesses") + } + if !isRoot() { + return errors.New("when using eBPF you're required to run the agent with sudo") + } + + logger := NewLogger(cfg.LogLevel, cfg.NoLogging) + + rc := remote.RemoteConfig{ + AuthToken: cfg.AuthToken, + UpstreamThreads: cfg.UpstreamThreads, + UpstreamAddress: cfg.ServerAddress, + UpstreamRequestTimeout: cfg.UpstreamRequestTimeout, + } + up, err := remote.New(rc, logger) + if err != nil { + return fmt.Errorf("new remote upstream: %v", err) + } + + // if the sample rate is zero, use the default value + sampleRate := uint32(types.DefaultSampleRate) + if cfg.SampleRate != 0 { + sampleRate = uint32(cfg.SampleRate) + } + + appName := CheckApplicationName(logger, cfg.ApplicationName, spy.EBPF, []string{}) + + var serviceDiscovery sd.ServiceDiscovery = sd.NoopServiceDiscovery{} + if cfg.KubernetesNode != "" { + serviceDiscovery, err = sd.NewK8ServiceDiscovery(context.TODO(), cfg.KubernetesNode) + if err != nil { + return err + } + } + + logger.Debug("starting command") + + // The channel buffer capacity should be sufficient to be keep up with + // the expected signal rate (in case of Exec all the signals to be relayed + // to the child process) + ch := make(chan os.Signal, 10) + signal.Notify(ch, syscall.SIGINT, syscall.SIGTERM) + defer func() { + signal.Stop(ch) + close(ch) + }() + + sc := agent.SessionConfig{ + Upstream: up, + AppName: appName, + Tags: cfg.Tags, + ProfilingTypes: []spy.ProfileType{spy.ProfileCPU}, + SpyName: spy.EBPF, + SampleRate: sampleRate, + UploadRate: 10 * time.Second, + Pid: cfg.Pid, + WithSubprocesses: cfg.DetectSubprocesses, + Logger: logger, + } + session, err := agent.NewSessionWithSpyFactory(sc, func(pid int) ([]spy.Spy, error) { + s := ebpfspy.NewSession(cfg.Pid, sampleRate, cfg.SymbolCacheSize, serviceDiscovery, cfg.OnlyServices) + err := s.Start() + if err != nil { + return nil, err + } + + res := ebpfspy.NewEBPFSpy(s) + return []spy.Spy{res}, nil + }) + if err != nil { + return fmt.Errorf("new session: %w", err) + } + + up.Start() + defer up.Stop() + + if err = session.Start(); err != nil { + return fmt.Errorf("start session: %w", err) + } + defer session.Stop() + + // wait for process to exit + // pid == -1 means we're profiling whole system + if cfg.Pid == -1 { + <-ch + return nil + } + ticker := time.NewTicker(time.Second) + defer ticker.Stop() + for { + select { + case <-ch: + return nil + case <-ticker.C: + if !process.Exists(cfg.Pid) { + logger.Debugf("child process exited") + return nil + } + } + } +} diff --git a/pkg/util/genericlru/generic_lru.go b/pkg/util/genericlru/generic_lru.go new file mode 100644 index 0000000000..16dc5e2e20 --- /dev/null +++ b/pkg/util/genericlru/generic_lru.go @@ -0,0 +1,48 @@ +package genericlru + +import ( + "github.com/hashicorp/golang-lru/simplelru" +) + +type GenericLRU[K any, V any] struct { + lru *simplelru.LRU +} + +type EvictCallback[K any, V any] func(k K, v *V) + +func NewGenericLRU[K any, V any](sz int, evict EvictCallback[K, V]) (*GenericLRU[K, V], error) { + lru, err := simplelru.NewLRU(sz, func(key interface{}, value interface{}) { + evict(key.(K), value.(*V)) + }) + if err != nil { + return nil, err + } + return &GenericLRU[K, V]{lru}, nil +} + +func (l *GenericLRU[K, V]) Get(k K) (*V, bool) { + v, ok := l.lru.Get(k) + if ok { + return v.(*V), ok + } + return nil, ok +} + +func (l *GenericLRU[K, V]) Add(k K, v *V) (evicted bool) { + return l.lru.Add(k, v) +} + +func (l *GenericLRU[K, V]) Remove(k K) (present bool) { + return l.lru.Remove(k) +} + +func (l *GenericLRU[K, V]) Keys() (keys []K) { + for _, key := range l.lru.Keys() { + keys = append(keys, key.(K)) + } + return keys +} + +func (l *GenericLRU[K, V]) Len() int { + return l.lru.Len() +} diff --git a/revive.toml b/revive.toml index 062a5d6a07..9a7c5d9aed 100644 --- a/revive.toml +++ b/revive.toml @@ -66,8 +66,8 @@ warningCode = 0 # Custom rules. We use a revive fork available here: https://github.com/pyroscope-io/revive # See Makefile `lint` section for how to use the fork -[rule.byte-array-limit] - arguments = [7] +#[rule.byte-array-limit] +# arguments = [7] # These are pretty much disabled [rule.cognitive-complexity] diff --git a/scripts/generate-build-flags.sh b/scripts/generate-build-flags.sh index c4b783f89a..48ad53b52b 100755 --- a/scripts/generate-build-flags.sh +++ b/scripts/generate-build-flags.sh @@ -3,7 +3,12 @@ set -e CURRENT_TIME="$(date -u +"%Y-%m-%dT%H:%M:%SZ")" -echo "-X github.com/pyroscope-io/pyroscope/pkg/build.Time=$CURRENT_TIME" +if [ -z $NO_BUILD_TIME_TAG ] +then + echo "-X github.com/pyroscope-io/pyroscope/pkg/build.Time=$CURRENT_TIME" +else + echo "-X github.com/pyroscope-io/pyroscope/pkg/build.Time=NO_BUILD_TIME_TAG" +fi # we don't copy .git to docker context, so in docker context we use git-info if [ -d ".git" ] diff --git a/third_party/bcc/.gitignore b/third_party/bcc/.gitignore new file mode 100644 index 0000000000..d917b5949e --- /dev/null +++ b/third_party/bcc/.gitignore @@ -0,0 +1,2 @@ +lib/ +src/ diff --git a/third_party/bcc/Makefile b/third_party/bcc/Makefile new file mode 100644 index 0000000000..c522b9ad1d --- /dev/null +++ b/third_party/bcc/Makefile @@ -0,0 +1,19 @@ +BCC_VERSION ?= b9554b585afe18540ba98dde5b667e5b4036f479 + +.PHONY: build-bcc +build-bcc: + test -d src || git clone https://github.com/korniltsev/bcc src + cd src && git checkout $(BCC_VERSION) + test -d src/build && rm -rf src/build || echo bcc src/build dir does not exits + mkdir src/build + cd src/build \ + && cmake ../build-syms \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=$(shell pwd)/lib \ + && make -j16 install + +.PHONY: clean +clean: + rm -rf src lib diff --git a/third_party/libbpf/.gitignore b/third_party/libbpf/.gitignore new file mode 100644 index 0000000000..d917b5949e --- /dev/null +++ b/third_party/libbpf/.gitignore @@ -0,0 +1,2 @@ +lib/ +src/ diff --git a/third_party/libbpf/Makefile b/third_party/libbpf/Makefile new file mode 100644 index 0000000000..4de35c7fe5 --- /dev/null +++ b/third_party/libbpf/Makefile @@ -0,0 +1,11 @@ +LIBBPF_VERSION ?= v0.8.1 + +.PHONY: build-libbpf +build-libbpf: + test -d src || git clone https://github.com/libbpf/libbpf src + cd src && git checkout $(LIBBPF_VERSION) + PREFIX=$(shell pwd)/lib make -C src/src -j16 install + +.PHONY: clean +clean: + rm -rf src lib