Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: don't panic when logs waits for more than 5 seconds (v2) #1278

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 15 additions & 12 deletions docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ type DockerContainer struct {
terminationSignal chan bool
consumers []LogConsumer
raw *types.ContainerJSON
stopProducer chan bool
stopProducer context.CancelFunc
logger Logging
lifecycleHooks []ContainerLifecycleHooks
}
Expand Down Expand Up @@ -632,9 +632,9 @@ func (c *DockerContainer) StartLogProducer(ctx context.Context) error {
return errors.New("log producer already started")
}

c.stopProducer = make(chan bool)
ctx, c.stopProducer = context.WithCancel(ctx)

go func(stop <-chan bool) {
go func() {
since := ""
// if the socket is closed we will make additional logs request with updated Since timestamp
BEGIN:
Expand All @@ -645,20 +645,22 @@ func (c *DockerContainer) StartLogProducer(ctx context.Context) error {
Since: since,
}

ctx, cancel := context.WithTimeout(ctx, time.Second*5)
defer cancel()

r, err := c.provider.client.ContainerLogs(ctx, c.GetContainerID(), options)
if err != nil {
// if we can't get the logs, panic, we can't return an error to anything
// from within this goroutine
panic(err)
// if we can't get the logs, retry in one second.
c.logger.Printf("cannot get logs for container %q: %v", c.ID, err)
if ctx.Err() != nil {
// context done.
return
}
time.Sleep(1 * time.Second)
goto BEGIN
}
defer c.provider.Close()

for {
select {
case <-stop:
case <-ctx.Done():
err := r.Close()
if err != nil {
// we can't close the read closer, this should never happen
Expand Down Expand Up @@ -719,7 +721,7 @@ func (c *DockerContainer) StartLogProducer(ctx context.Context) error {
}
}
}
}(c.stopProducer)
}()

return nil
}
Expand All @@ -728,7 +730,8 @@ func (c *DockerContainer) StartLogProducer(ctx context.Context) error {
// and sending them to each added LogConsumer
func (c *DockerContainer) StopLogProducer() error {
if c.stopProducer != nil {
c.stopProducer <- true
// Cancel the producer's context.
c.stopProducer()
c.stopProducer = nil
}
return nil
Expand Down
97 changes: 73 additions & 24 deletions logconsumer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"io"
"net/http"
"strings"
"sync"
"testing"
"time"

Expand All @@ -20,8 +21,18 @@ import (
const lastMessage = "DONE"

type TestLogConsumer struct {
Msgs []string
Ack chan bool
mu sync.Mutex
msgs []string
Ack chan bool
waitingFor string
ackWait chan bool
}

func NewTestLogConsumer() *TestLogConsumer {
return &TestLogConsumer{
msgs: []string{},
Ack: make(chan bool),
}
}

func (g *TestLogConsumer) Accept(l Log) {
Expand All @@ -31,7 +42,41 @@ func (g *TestLogConsumer) Accept(l Log) {
return
}

g.Msgs = append(g.Msgs, s)
g.mu.Lock()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we add a comment here explaining why the synchronisation must be done at the test level? You already added it in #947 :)

defer g.mu.Unlock()
g.msgs = append(g.msgs, s)
if g.waitingFor != "" && s == fmt.Sprintf("echo %s\n", g.waitingFor) {
close(g.ackWait)
g.waitingFor = ""
}
}

// WaitFor waits for s to appear in the output.
// It returns an error if another wait is already in progress or the context is canceled.
func (g *TestLogConsumer) WaitFor(ctx context.Context, s string) error {
g.mu.Lock()
if g.waitingFor != "" {
g.mu.Unlock()
return fmt.Errorf("already waiting")
}
g.waitingFor = s
g.ackWait = make(chan bool)
g.mu.Unlock()
select {
case <-ctx.Done():
return ctx.Err()
case <-g.ackWait:
return nil
}
}

// Msgs returns messages received so far.
// The caller must not modify the contents of the slice.
func (g *TestLogConsumer) Msgs() []string {
g.mu.Lock()
v := g.msgs[0:len(g.msgs):len(g.msgs)]
g.mu.Unlock()
return v
}

func Test_LogConsumerGetsCalled(t *testing.T) {
Expand All @@ -56,12 +101,9 @@ func Test_LogConsumerGetsCalled(t *testing.T) {
ep, err := c.Endpoint(ctx, "http")
require.NoError(t, err)

g := TestLogConsumer{
Msgs: []string{},
Ack: make(chan bool),
}
g := NewTestLogConsumer()

c.FollowOutput(&g)
c.FollowOutput(g)

err = c.StartLogProducer(ctx)
require.NoError(t, err)
Expand All @@ -81,7 +123,7 @@ func Test_LogConsumerGetsCalled(t *testing.T) {
t.Fatal("never received final log message")
}
assert.Nil(t, c.StopLogProducer())
assert.Equal(t, []string{"ready\n", "echo hello\n", "echo there\n"}, g.Msgs)
assert.Equal(t, []string{"ready\n", "echo hello\n", "echo there\n"}, g.Msgs())

terminateContainerOnEnd(t, ctx, c)
}
Expand Down Expand Up @@ -173,11 +215,11 @@ func Test_MultipleLogConsumers(t *testing.T) {
ep, err := c.Endpoint(ctx, "http")
require.NoError(t, err)

first := TestLogConsumer{Msgs: []string{}, Ack: make(chan bool)}
second := TestLogConsumer{Msgs: []string{}, Ack: make(chan bool)}
first := NewTestLogConsumer()
second := NewTestLogConsumer()

c.FollowOutput(&first)
c.FollowOutput(&second)
c.FollowOutput(first)
c.FollowOutput(second)

err = c.StartLogProducer(ctx)
require.NoError(t, err)
Expand All @@ -192,8 +234,8 @@ func Test_MultipleLogConsumers(t *testing.T) {
<-second.Ack
assert.Nil(t, c.StopLogProducer())

assert.Equal(t, []string{"ready\n", "echo mlem\n"}, first.Msgs)
assert.Equal(t, []string{"ready\n", "echo mlem\n"}, second.Msgs)
assert.Equal(t, []string{"ready\n", "echo mlem\n"}, first.Msgs())
assert.Equal(t, []string{"ready\n", "echo mlem\n"}, second.Msgs())
assert.Nil(t, c.Terminate(ctx))
}

Expand All @@ -219,9 +261,9 @@ func Test_StartStop(t *testing.T) {
ep, err := c.Endpoint(ctx, "http")
require.NoError(t, err)

g := TestLogConsumer{Msgs: []string{}, Ack: make(chan bool)}
g := NewTestLogConsumer()

c.FollowOutput(&g)
c.FollowOutput(g)

require.NoError(t, c.StopLogProducer(), "nothing should happen even if the producer is not started")
require.NoError(t, c.StartLogProducer(ctx))
Expand All @@ -230,6 +272,11 @@ func Test_StartStop(t *testing.T) {
_, err = http.Get(ep + "/stdout?echo=mlem")
require.NoError(t, err)

waitCtx, cancelWait := context.WithTimeout(ctx, 5*time.Second)
err = g.WaitFor(waitCtx, "mlem")
cancelWait()
require.NoError(t, err)

require.NoError(t, c.StopLogProducer())
require.NoError(t, c.StartLogProducer(ctx))

Expand All @@ -248,7 +295,7 @@ func Test_StartStop(t *testing.T) {
"ready\n",
"echo mlem\n",
"echo mlem2\n",
}, g.Msgs)
}, g.Msgs())
assert.Nil(t, c.Terminate(ctx))
}

Expand Down Expand Up @@ -339,7 +386,7 @@ func TestContainerLogWithErrClosed(t *testing.T) {

// Gather the initial container logs
time.Sleep(time.Second * 1)
existingLogs := len(consumer.Msgs)
existingLogs := len(consumer.Msgs())

hitNginx := func() {
i, _, err := dind.Exec(ctx, []string{"wget", "--spider", "localhost:" + port.Port()})
Expand All @@ -350,10 +397,11 @@ func TestContainerLogWithErrClosed(t *testing.T) {

hitNginx()
time.Sleep(time.Second * 1)
if len(consumer.Msgs)-existingLogs != 1 {
t.Fatalf("logConsumer should have 1 new log message, instead has: %v", consumer.Msgs[existingLogs:])
logs2 := consumer.Msgs()
if len(logs2)-existingLogs != 1 {
t.Fatalf("logConsumer should have 1 new log message, instead has: %v", logs2[existingLogs:])
}
existingLogs = len(consumer.Msgs)
existingLogs = len(consumer.Msgs())

iptableArgs := []string{
"INPUT", "-p", "tcp", "--dport", "2375",
Expand All @@ -373,10 +421,11 @@ func TestContainerLogWithErrClosed(t *testing.T) {
hitNginx()
hitNginx()
time.Sleep(time.Second * 1)
if len(consumer.Msgs)-existingLogs != 2 {
logs3 := consumer.Msgs()
if len(logs3)-existingLogs != 2 {
t.Fatalf(
"LogConsumer should have 2 new log messages after detecting closed connection and"+
" re-requesting logs. Instead has:\n%s", consumer.Msgs[existingLogs:],
" re-requesting logs. Instead has:\n%s", logs3[existingLogs:],
)
}
}
Expand Down