Skip to content

Commit b2fff2b

Browse files
authored
Fix dotnetspy panic on premature exit and add tests. (#203)
There is a case when dotnetspy couldn't establish a connection to Diagnostics Server before the next snapshot/stop call (e.g, target process exited, or the socket file cannot be found, etc). This fix adds a check whether the session has been actually created before accessing it. Given that there are no race conditions between reset/stop/snapshot calls, redundant mutexes were removed.
1 parent 0f526ea commit b2fff2b

File tree

4 files changed

+79
-19
lines changed

4 files changed

+79
-19
lines changed

pkg/agent/dotnetspy/dotnetspy.go

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,11 @@
33
package dotnetspy
44

55
import (
6-
"sync"
7-
86
"github.com/pyroscope-io/pyroscope/pkg/agent/spy"
97
)
108

119
type DotnetSpy struct {
1210
session *session
13-
m sync.Mutex
1411
reset bool
1512
}
1613

@@ -29,14 +26,10 @@ func (s *DotnetSpy) Stop() error {
2926
}
3027

3128
func (s *DotnetSpy) Reset() {
32-
s.m.Lock()
33-
defer s.m.Unlock()
3429
s.reset = true
3530
}
3631

3732
func (s *DotnetSpy) Snapshot(cb func([]byte, uint64, error)) {
38-
s.m.Lock()
39-
defer s.m.Unlock()
4033
if !s.reset {
4134
return
4235
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// +build dotnetspy
2+
3+
package dotnetspy_test
4+
5+
import (
6+
"testing"
7+
8+
. "github.com/onsi/ginkgo"
9+
. "github.com/onsi/gomega"
10+
)
11+
12+
func TestDotnetSpy(t *testing.T) {
13+
RegisterFailHandler(Fail)
14+
RunSpecs(t, ".NET Spy Suite")
15+
}

pkg/agent/dotnetspy/dotnetspy_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// +build dotnetspy
2+
3+
package dotnetspy
4+
5+
import (
6+
"time"
7+
8+
. "github.com/onsi/ginkgo"
9+
. "github.com/onsi/gomega"
10+
)
11+
12+
var _ = Describe("agent.DotnetSpy", func() {
13+
Describe("Does not panic if a session has not been established", func() {
14+
s := newSession(31337)
15+
s.timeout = time.Millisecond * 10
16+
Expect(s.start()).To(HaveOccurred())
17+
spy := &DotnetSpy{session: s}
18+
19+
It("On Snapshot before Reset", func() {
20+
spy.Snapshot(func(name []byte, samples uint64, err error) {
21+
Fail("Snapshot callback must not be called")
22+
})
23+
})
24+
25+
It("On Snapshot after Reset", func() {
26+
spy.Reset()
27+
spy.Snapshot(func(name []byte, samples uint64, err error) {
28+
Fail("Snapshot callback must not be called")
29+
})
30+
})
31+
32+
It("On Stop", func() {
33+
Expect(spy.Stop()).ToNot(HaveOccurred())
34+
})
35+
})
36+
})

pkg/agent/dotnetspy/session.go

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ package dotnetspy
55
import (
66
"context"
77
"io"
8-
"sync"
98
"time"
109

1110
"github.com/pyroscope-io/dotnetdiag"
@@ -14,13 +13,13 @@ import (
1413
)
1514

1615
type session struct {
17-
pid int
16+
pid int
17+
timeout time.Duration
1818

1919
config dotnetdiag.CollectTracingConfig
2020
session *dotnetdiag.Session
2121

2222
ch chan line
23-
m sync.Mutex
2423
stopped bool
2524
}
2625

@@ -31,7 +30,8 @@ type line struct {
3130

3231
func newSession(pid int) *session {
3332
return &session{
34-
pid: pid,
33+
pid: pid,
34+
timeout: 3 * time.Second,
3535
config: dotnetdiag.CollectTracingConfig{
3636
CircularBufferSizeMB: 100,
3737
Providers: []dotnetdiag.ProviderConfig{
@@ -45,10 +45,13 @@ func newSession(pid int) *session {
4545
}
4646
}
4747

48+
// start opens a new diagnostic session to the process given, and asynchronously
49+
// processes the event stream.
4850
func (s *session) start() error {
49-
ctx, cancel := context.WithTimeout(context.Background(), time.Second*3)
51+
ctx, cancel := context.WithTimeout(context.Background(), s.timeout)
5052
defer cancel()
51-
53+
// If the process does not create Diagnostic Server, the next call will
54+
// fail, and a session won't be created.
5255
client := dotnetdiag.NewClient(waitDiagnosticServer(ctx, s.pid))
5356
ns, err := client.CollectTracing(s.config)
5457
if err != nil {
@@ -78,6 +81,8 @@ func (s *session) start() error {
7881
case nil:
7982
continue
8083
case io.EOF:
84+
// The session is closed by us (on flush or stop call),
85+
// or the target process has exited.
8186
for k, v := range p.Samples() {
8287
s.ch <- line{
8388
name: []byte(k),
@@ -92,30 +97,41 @@ func (s *session) start() error {
9297
return nil
9398
}
9499

100+
// flush closes NetTrace stream in order to retrieve samples,
101+
// and starts a new session, if not in stopped state.
95102
func (s *session) flush(cb func([]byte, uint64)) error {
103+
// Ignore call, if NetTrace session has not been established.
104+
if s.session == nil {
105+
return nil
106+
}
96107
_ = s.session.Close()
97108
for v := range s.ch {
98109
cb(v.name, uint64(v.val))
99110
}
100-
s.m.Lock()
101-
defer s.m.Unlock()
102111
if s.stopped {
103112
return nil
104113
}
105114
return s.start()
106115
}
107116

117+
// stop closes diagnostic session, if it was established, and sets the
118+
// flag preventing session to start again.
108119
func (s *session) stop() error {
109-
s.m.Lock()
110-
defer s.m.Unlock()
111-
_ = s.session.Close()
120+
if s.session != nil {
121+
_ = s.session.Close()
122+
}
112123
s.stopped = true
113124
return nil
114125
}
115126

116127
// .Net runtime requires some time to initialize diagnostic IPC server and
117-
// start accepting connections.
128+
// start accepting connections. If it fails before context cancel, an empty
129+
// string will be returned.
118130
func waitDiagnosticServer(ctx context.Context, pid int) string {
131+
// Do not wait for the timer to fire for the first time.
132+
if addr := dotnetdiag.DefaultServerAddress(pid); addr != "" {
133+
return addr
134+
}
119135
ticker := time.NewTicker(time.Millisecond * 100)
120136
defer ticker.Stop()
121137
for {

0 commit comments

Comments
 (0)