From 28e47cf9fa3c7e4f57b7fd8f3b16b4dded55b88b Mon Sep 17 00:00:00 2001 From: Yakau Bubnou Date: Thu, 8 Dec 2016 19:48:15 +0300 Subject: [PATCH 1/2] Prevent panic errors in tombstone GC This patch fixes the race condition caused by concurrent call to the "SetEnabled" method and call of the timer handler. If the "SetEnabled" will acquire the lock earlier, it will erase the content of the "expires" map. So when the timer handler will acquire the lock, it will access to the non-existing interval (which is evaluated to nil), thus cause the panic. I added a check, that ensures the tombstone GC is enabled before sending anything to expire channel. --- consul/state/tombstone_gc.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/consul/state/tombstone_gc.go b/consul/state/tombstone_gc.go index 0d530eb696f5..d60ad9f2f216 100644 --- a/consul/state/tombstone_gc.go +++ b/consul/state/tombstone_gc.go @@ -142,9 +142,14 @@ func (t *TombstoneGC) expireTime(expires time.Time) { // Get the maximum index and clear the entry t.lock.Lock() exp := t.expires[expires] + enabled := t.enabled delete(t.expires, expires) t.lock.Unlock() + if !enabled { + return + } + // Notify the expires channel t.expireCh <- exp.maxIndex } From 0562871ff064a183208934935b58a1d230ecb2dd Mon Sep 17 00:00:00 2001 From: Yakau Bubnou Date: Fri, 9 Dec 2016 14:27:39 +0300 Subject: [PATCH 2/2] Create a global port reservation mapping This patch create a global reservation mapping used to remember the ports that have been already allocated for the consul agents. Also to release the reservations when the server will be stopped it ships a new function "releasePorts" that removes the ports from the mapping. --- testutil/server.go | 55 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/testutil/server.go b/testutil/server.go index 8ab196eca2e8..ef76cb2bf52d 100644 --- a/testutil/server.go +++ b/testutil/server.go @@ -23,6 +23,7 @@ import ( "os" "os/exec" "strings" + "sync" "github.com/hashicorp/consul/consul/structs" "github.com/hashicorp/go-cleanhttp" @@ -100,14 +101,57 @@ func defaultServerConfig() *TestServerConfig { } } +// Ports allocation state. +// +// We let the kernel choose random port number, so there's a good +// chance the port is unused. But kernel could allocate the same +// port twice, if agent did not have time to start listening on it. +// +// There is no obvious way to defend from the other processes in +// the system, which will use OS-assigned ports, but at least it +// will reduce duplicated ports shared by the agents. +var nmap = make(map[int]struct{}) +var nmapmu sync.Mutex + // randomPort asks the kernel for a random port to use. func randomPort() int { - l, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - panic(err) + makeport := func() (int, bool) { + l, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + panic(err) + } + + defer l.Close() + port := l.Addr().(*net.TCPAddr).Port + + nmapmu.Lock() + defer nmapmu.Unlock() + + _, dup := nmap[port] + nmap[port] = struct{}{} + return port, dup + } + + for i := 0; i < 100; i++ { + if port, dup := makeport(); !dup { + return port + } } - defer l.Close() - return l.Addr().(*net.TCPAddr).Port + + panic(fmt.Errorf("no usable ports")) +} + +// releasePorts removes the ports reserved by test server. +func releasePorts(c *TestPortConfig) { + nmapmu.Lock() + defer nmapmu.Unlock() + + delete(nmap, c.DNS) + delete(nmap, c.HTTP) + delete(nmap, c.RPC) + delete(nmap, c.SerfLan) + delete(nmap, c.SerfWan) + delete(nmap, c.Server) } // TestService is used to serialize a service definition. @@ -257,6 +301,7 @@ func NewTestServerConfig(t TestingT, cb ServerConfigCallback) *TestServer { // directory once we are done. func (s *TestServer) Stop() { defer os.RemoveAll(s.Config.DataDir) + defer releasePorts(s.Config.Ports) if err := s.cmd.Process.Kill(); err != nil { s.t.Errorf("err: %s", err)