Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Heal segfaults on slow expired endpoints #534

Closed
xzfc opened this issue Oct 16, 2020 · 5 comments
Closed

Heal segfaults on slow expired endpoints #534

xzfc opened this issue Oct 16, 2020 · 5 comments
Labels
bug Something isn't working

Comments

@xzfc
Copy link
Contributor

xzfc commented Oct 16, 2020

Test case: https://github.com/xzfc/networkservicemesh-sdk/commit/bfe6f167587c43ad88848d3ee76e0559605192e4

$ go test -v -run TestNSMGR_Slow
=== RUN   TestNSMGR_Slow
INFO[0000] ExcludedPrefixesService: adding excluded prefixes to connection: []
ERRO[0000] An error during watch file: no such file or directory
INFO[0000] ExcludedPrefixesService: adding excluded prefixes to connection: []
INFO[0001] ExcludedPrefixesService: adding excluded prefixes to connection: []
INFO[0001] ExcludedPrefixesService: adding excluded prefixes to connection: []
INFO[0001] ExcludedPrefixesService: adding excluded prefixes to connection: []
INFO[0001] ExcludedPrefixesService: adding excluded prefixes to connection: []
panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x20 pc=0xa1451c]

goroutine 145 [running]:
github.com/networkservicemesh/sdk/pkg/networkservice/common/heal.(*healClient).processEvent(0xc000335c70, 0xd068a0, 0xc0003445c0, 0xc0005ca380, 0xc0003b8fc0, 0x0, 0x0, 0x0, 0x0, 0x0)
	/go/github.com/networkservicemesh/sdk/pkg/networkservice/common/heal/client.go:260 +0x26c
github.com/networkservicemesh/sdk/pkg/networkservice/common/heal.(*healClient).healAsNeeded(0xc000335c70, 0xd068a0, 0xc0003445c0, 0xc0005ca380, 0xc000113620, 0x0, 0x0, 0x0)
	/go/github.com/networkservicemesh/sdk/pkg/networkservice/common/heal/client.go:195 +0x6d1
created by github.com/networkservicemesh/sdk/pkg/networkservice/common/heal.(*healClient).startHeal
	/go/github.com/networkservicemesh/sdk/pkg/networkservice/common/heal/client.go:119 +0x1bc
exit status 2
FAIL	github.com/networkservicemesh/sdk/pkg/networkservice/chains/nsmgr	1.124s

Another segfault is on line 100 (the reproduction code is messy at the moment):

github.com/networkservicemesh/sdk/pkg/networkservice/common/heal.(*healClient).stopHeal(0xc000231b80, 0xc00046c000)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/common/heal/client.go:100 +0xb1
github.com/networkservicemesh/sdk/pkg/networkservice/common/heal.(*healClient).Close(0xc000231b80, 0xd11c20, 0xc000296990, 0xc00046c000, 0x0, 0x0, 0x0, 0xc000296990, 0x7f60e9ca47d0, 0xc0003d9800)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/common/heal/client.go:87 +0x39
github.com/networkservicemesh/sdk/pkg/networkservice/core/trace.(*traceClient).Close(0xc00053b510, 0xd11c20, 0xc000296630, 0xc00046c000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/trace/client.go:78 +0x253
github.com/networkservicemesh/sdk/pkg/networkservice/core/next.(*nextClient).Close(0xc000500630, 0xd11c20, 0xc0002963c0, 0xc00046c000, 0x0, 0x0, 0x0, 0x0, 0xae72e0, 0xc0001f8650)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/next/client.go:82 +0x293
github.com/networkservicemesh/sdk/pkg/networkservice/common/updatepath.(*updatePathClient).Close(0xc00053b4c0, 0xd11c20, 0xc0002963c0, 0xc00046c000, 0x0, 0x0, 0x0, 0xc0002963c0, 0x7f60e9ca62f0, 0x0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/common/updatepath/client.go:59 +0xf5
github.com/networkservicemesh/sdk/pkg/networkservice/core/trace.(*traceClient).Close(0xc00053b500, 0xd11c20, 0xc000500660, 0xc00046c000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/trace/client.go:78 +0x253
github.com/networkservicemesh/sdk/pkg/networkservice/core/next.(*nextClient).Close(0xc000500090, 0xd11c20, 0xc0005004b0, 0xc00046c000, 0x0, 0x0, 0x0, 0x0, 0xae72e0, 0xc000118430)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/next/client.go:82 +0x293
github.com/networkservicemesh/sdk/pkg/networkservice/common/authorize.(*authorizeClient).Close(0xc0005380d8, 0xd11c20, 0xc0005004b0, 0xc00046c000, 0x0, 0x0, 0x0, 0xc0005004b0, 0x7f60e9ca62f0, 0x0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/common/authorize/client.go:58 +0xf6
github.com/networkservicemesh/sdk/pkg/networkservice/core/trace.(*traceClient).Close(0xc00053b4f0, 0xd11c20, 0xc000500180, 0xc00046c000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/trace/client.go:78 +0x253
github.com/networkservicemesh/sdk/pkg/networkservice/core/next.(*nextClient).Close(0xc0001c39e0, 0xd11c20, 0xc0004864e0, 0xc00046c000, 0x0, 0x0, 0x0, 0x0, 0xae72e0, 0xc00053a3a0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/next/client.go:82 +0x293
github.com/networkservicemesh/sdk/pkg/networkservice/common/clienturl.(*clientURLClient).Close(0xc000513c80, 0xd11c20, 0xc0004864e0, 0xc00046c000, 0x0, 0x0, 0x0, 0xc0004864e0, 0x7f60e9ca7080, 0x0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/common/clienturl/client.go:65 +0xaa
github.com/networkservicemesh/sdk/pkg/networkservice/core/trace.(*traceClient).Close(0xc00053aa50, 0xd11c20, 0xc000486180, 0xc00046c000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/trace/client.go:78 +0x253
github.com/networkservicemesh/sdk/pkg/networkservice/core/next.(*nextClient).Close(0xc000297980, 0xd11c20, 0xc000297d10, 0xc00046c000, 0x0, 0x0, 0x0, 0xc0001198d0, 0xbd0b40, 0x1271fd0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/next/client.go:84 +0x123
github.com/networkservicemesh/sdk/pkg/networkservice/common/connect.(*connectClient).Close(0xc0006dd520, 0xd11c20, 0xc000297d10, 0xc000548ea0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/common/connect/client.go:60 +0x157
github.com/networkservicemesh/sdk/pkg/networkservice/core/trace.(*traceClient).Close(0xc00053aa40, 0xd11c20, 0xc0002979b0, 0xc000548ea0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/trace/client.go:78 +0x253
github.com/networkservicemesh/sdk/pkg/networkservice/core/next.(*nextClient).Close(0xc0001c2b10, 0xd11c20, 0xc0002978c0, 0xc000548ea0, 0x0, 0x0, 0x0, 0xc084bc, 0x7, 0xa)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/next/client.go:82 +0x293
github.com/networkservicemesh/sdk/pkg/networkservice/common/connect.(*connectServer).Close(0xc0001a3780, 0xd11c20, 0xc0002978c0, 0xc000548ea0, 0xc000548ea0, 0xc000154a80, 0xd11c20)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/common/connect/server.go:81 +0xaf
github.com/networkservicemesh/sdk/pkg/networkservice/core/trace.(*traceServer).Close(0xc0002fdbc0, 0xd11c20, 0xc000297650, 0xc000548ea0, 0x0, 0x0, 0x0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/trace/server.go:77 +0x226
github.com/networkservicemesh/sdk/pkg/networkservice/core/next.(*nextServer).Close(0xc000297350, 0xd11c20, 0xc000297620, 0xc000548ea0, 0xbc0200, 0xc0001a3580, 0xd11c20)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/next/server.go:84 +0xf3
github.com/networkservicemesh/sdk/pkg/networkservice/common/clienturl.(*clientURLServer).Close(0xc000010908, 0xd11c20, 0xc000297560, 0xc000548ea0, 0xc000548ea0, 0xc000154690, 0xd11c20)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/common/clienturl/server.go:48 +0xc7
github.com/networkservicemesh/sdk/pkg/networkservice/core/trace.(*traceServer).Close(0xc0002fdbb0, 0xd11c20, 0xc000297380, 0xc000548ea0, 0x0, 0x0, 0x0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/trace/server.go:77 +0x226
github.com/networkservicemesh/sdk/pkg/networkservice/core/next.(*nextServer).Close(0xc000297080, 0xd11c20, 0xc000297290, 0xc000548ea0, 0x0, 0x0, 0xc0001190f0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/next/server.go:82 +0x248
github.com/networkservicemesh/sdk/pkg/networkservice/common/updatetoken.(*updateTokenServer).Close(0xc000010920, 0xd11c20, 0xc000297290, 0xc000548ea0, 0xc000548ea0, 0xc000154310, 0xd11c20)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/common/updatetoken/server.go:58 +0xc6
github.com/networkservicemesh/sdk/pkg/networkservice/core/trace.(*traceServer).Close(0xc0002fdba0, 0xd11c20, 0xc0002970b0, 0xc000548ea0, 0x0, 0x0, 0x0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/trace/server.go:77 +0x226
github.com/networkservicemesh/sdk/pkg/networkservice/core/next.(*nextServer).Close(0xc000296d20, 0xd11c20, 0xc000296fc0, 0xc000548ea0, 0x0, 0xae72e0, 0xc000118eb0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/next/server.go:82 +0x248
github.com/networkservicemesh/sdk/pkg/networkservice/common/timeout.(*timeoutServer).Close(0xc000487290, 0xd11c20, 0xc000296fc0, 0xc000548ea0, 0xc000548ea0, 0xc00051be30, 0xd11c20)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/common/timeout/server.go:83 +0xbd
github.com/networkservicemesh/sdk/pkg/networkservice/core/trace.(*traceServer).Close(0xc0002fdb90, 0xd11c20, 0xc000296d50, 0xc000548ea0, 0x0, 0x0, 0x0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/trace/server.go:77 +0x226
github.com/networkservicemesh/sdk/pkg/networkservice/core/next.(*nextServer).Close(0xc000296840, 0xd11c20, 0xc000296b70, 0xc000548ea0, 0xa, 0x0, 0x0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/next/server.go:82 +0x248
github.com/networkservicemesh/sdk/pkg/networkservice/common/monitor.(*monitorServer).Close(0xc0000ab680, 0xd11c20, 0xc000296b70, 0xc000548ea0, 0xc000548ea0, 0xc00051b810, 0xd11c20)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/common/monitor/server.go:98 +0xa4
github.com/networkservicemesh/sdk/pkg/networkservice/core/trace.(*traceServer).Close(0xc0002fdb80, 0xd11c20, 0xc000296870, 0xc000548ea0, 0x0, 0x0, 0x0)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/trace/server.go:77 +0x226
github.com/networkservicemesh/sdk/pkg/networkservice/core/next.(*nextServer).Close(0xc000296480, 0xd11c20, 0xc000296750, 0xc000548ea0, 0x0, 0x0, 0xc000118680)
        /go/github.com/networkservicemesh/sdk/pkg/networkservice/core/next/server.go:82 +0x248
@xzfc xzfc added the bug Something isn't working label Oct 16, 2020
@edwarnicke
Copy link
Member

Good catch!

@edwarnicke
Copy link
Member

github.com/networkservicemesh/sdk/pkg/networkservice/common/heal.(*healClient).processEvent(0xc000335c70, 0xd068a0, 0xc0003445c0, 0xc0005ca380, 0xc0003b8fc0, 0x0, 0x0, 0x0, 0x0, 0x0)
/go/github.com/networkservicemesh/sdk/pkg/networkservice/common/heal/client.go:260 +0x26c

That's a very weird place to hit a panic of that sort, as thats:

_, err := (*f.onHeal).Request(ctx, request, opts...)

And in NewHeal its set to a non-nil value:

rv := &healClient{
		ctx:           ctx,
		client:        client,
		onHeal:        onHeal,
		cancelHealMap: make(map[string]func() <-chan error),
	}

	if rv.onHeal == nil {
		rv.onHeal = addressof.NetworkServiceClient(rv)
	}

	return rv

and that's the only place its written to...

@xzfc
Copy link
Contributor Author

xzfc commented Oct 16, 2020

rv.onHeal isn't nil, but *rv.onHeal is.

  rv := &healClient{
		ctx:           ctx,
		client:        client,
		onHeal:        onHeal,
		cancelHealMap: make(map[string]func() <-chan error),
	}

	if rv.onHeal == nil {
		rv.onHeal = addressof.NetworkServiceClient(rv)
	}
	if *rv.onHeal == nil {
		fmt.Println("*rv.onHeal == nil")
	}

	return rv

@edwarnicke
Copy link
Member

Hmm... that's now what I see. When I run this test:

func TestOnHeal(t *testing.T) {
	heal.NewClient(context.Background(),nil,nil)
}

against this code:

func NewClient(ctx context.Context, client networkservice.MonitorConnectionClient, onHeal *networkservice.NetworkServiceClient) networkservice.NetworkServiceClient {
	rv := &healClient{
		ctx:           ctx,
		client:        client,
		onHeal:        onHeal,
		cancelHealMap: make(map[string]func() <-chan error),
	}

	if rv.onHeal == nil {
		rv.onHeal = addressof.NetworkServiceClient(rv)
	}
	foo := *rv.onHeal
	if foo == nil {
		panic("*(rv.onHeal) == nil")
	}

	return rv
}

It passes, and I also can walk through with a debugger and see:
image

@haiodo
Copy link
Contributor

haiodo commented Dec 4, 2020

It is looks fixed.

@haiodo haiodo closed this as completed Dec 4, 2020
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

3 participants