-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Keep docker & k8s pod annotations while they are needed #5084
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,17 @@ | ||
package add_docker_metadata | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"net/http" | ||
"sync" | ||
"time" | ||
|
||
"github.com/docker/docker/api/types" | ||
"github.com/docker/docker/api/types/events" | ||
"github.com/docker/docker/api/types/filters" | ||
"github.com/docker/docker/client" | ||
"github.com/docker/go-connections/tlsconfig" | ||
"golang.org/x/net/context" | ||
|
||
"github.com/elastic/beats/libbeat/logp" | ||
) | ||
|
@@ -22,6 +24,9 @@ type Watcher interface { | |
// Start watching docker API for new containers | ||
Start() error | ||
|
||
// Stop watching docker API for new containers | ||
Stop() | ||
|
||
// Container returns the running container with the given ID or nil if unknown | ||
Container(ID string) *Container | ||
|
||
|
@@ -30,11 +35,15 @@ type Watcher interface { | |
} | ||
|
||
type watcher struct { | ||
client *client.Client | ||
sync.RWMutex | ||
client Client | ||
ctx context.Context | ||
stop context.CancelFunc | ||
containers map[string]*Container | ||
deleted map[string]time.Time // deleted annotations key -> last access time | ||
cleanupTimeout time.Duration | ||
lastValidTimestamp int64 | ||
stopped sync.WaitGroup | ||
} | ||
|
||
// Container info retrieved by the watcher | ||
|
@@ -45,6 +54,12 @@ type Container struct { | |
Labels map[string]string | ||
} | ||
|
||
// Client for docker interface | ||
type Client interface { | ||
ContainerList(ctx context.Context, options types.ContainerListOptions) ([]types.Container, error) | ||
Events(ctx context.Context, options types.EventsOptions) (<-chan events.Message, <-chan error) | ||
} | ||
|
||
type WatcherConstructor func(host string, tls *TLSConfig) (Watcher, error) | ||
|
||
// NewWatcher returns a watcher running for the given settings | ||
|
@@ -69,28 +84,51 @@ func NewWatcher(host string, tls *TLSConfig) (Watcher, error) { | |
} | ||
} | ||
|
||
cli, err := client.NewClient(host, dockerAPIVersion, httpClient, nil) | ||
client, err := client.NewClient(host, dockerAPIVersion, httpClient, nil) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return NewWatcherWithClient(client, 60*time.Second) | ||
} | ||
|
||
func NewWatcherWithClient(client Client, cleanupTimeout time.Duration) (*watcher, error) { | ||
ctx, cancel := context.WithCancel(context.Background()) | ||
return &watcher{ | ||
client: cli, | ||
ctx: ctx, | ||
stop: cancel, | ||
containers: make(map[string]*Container), | ||
client: client, | ||
ctx: ctx, | ||
stop: cancel, | ||
containers: make(map[string]*Container), | ||
deleted: make(map[string]time.Time), | ||
cleanupTimeout: cleanupTimeout, | ||
}, nil | ||
} | ||
|
||
// Container returns the running container with the given ID or nil if unknown | ||
func (w *watcher) Container(ID string) *Container { | ||
return w.containers[ID] | ||
w.RLock() | ||
container := w.containers[ID] | ||
w.RUnlock() | ||
|
||
// Update last access time if it's deleted | ||
if _, ok := w.deleted[ID]; ok { | ||
w.Lock() | ||
w.deleted[ID] = time.Now() | ||
w.Unlock() | ||
} | ||
|
||
return container | ||
} | ||
|
||
// Containers returns the list of known containers | ||
func (w *watcher) Containers() map[string]*Container { | ||
return w.containers | ||
w.RLock() | ||
defer w.RUnlock() | ||
res := make(map[string]*Container) | ||
for k, v := range w.containers { | ||
res[k] = v | ||
} | ||
return res | ||
} | ||
|
||
// Start watching docker API for new containers | ||
|
@@ -99,6 +137,8 @@ func (w *watcher) Start() error { | |
logp.Debug("docker", "Start docker containers scanner") | ||
w.lastValidTimestamp = time.Now().Unix() | ||
|
||
w.Lock() | ||
defer w.Unlock() | ||
containers, err := w.client.ContainerList(w.ctx, types.ContainerListOptions{}) | ||
if err != nil { | ||
return err | ||
|
@@ -113,11 +153,17 @@ func (w *watcher) Start() error { | |
} | ||
} | ||
|
||
w.stopped.Add(2) | ||
go w.watch() | ||
go w.cleanupWorker() | ||
|
||
return nil | ||
} | ||
|
||
func (w *watcher) Stop() { | ||
w.stop() | ||
} | ||
|
||
func (w *watcher) watch() { | ||
filters := filters.NewArgs() | ||
filters.Add("type", "container") | ||
|
@@ -138,22 +184,30 @@ func (w *watcher) watch() { | |
w.lastValidTimestamp = event.Time | ||
|
||
// Add / update | ||
if event.Action == "create" || event.Action == "update" { | ||
if event.Action == "start" || event.Action == "update" { | ||
name := event.Actor.Attributes["name"] | ||
image := event.Actor.Attributes["image"] | ||
delete(event.Actor.Attributes, "name") | ||
delete(event.Actor.Attributes, "image") | ||
|
||
w.Lock() | ||
w.containers[event.Actor.ID] = &Container{ | ||
ID: event.Actor.ID, | ||
Name: name, | ||
Image: image, | ||
Labels: event.Actor.Attributes, | ||
} | ||
|
||
// un-delete if it's flagged (in case of update or recreation) | ||
delete(w.deleted, event.Actor.ID) | ||
w.Unlock() | ||
} | ||
|
||
// Delete | ||
if event.Action == "die" || event.Action == "kill" { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If a container dies and gets started again it will still time out eventually and lose the metadata. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that case is treated, as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry you are right, I was talking about the Kubernetes watcher, not Docker, will apply the same fix, thanks! I'm also working on tests There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pushed b54187e There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When I restart a container, the events are: kill -> die -> stop -> start -> restart. So the container will not be un-deleted in this case. Looking at it, adding the metadata on "created" and removing on "die" does seem inconsistent to me. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let me do some tests, looks like it would be better to add it on start, isn't it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think adding on start is the better approach. |
||
delete(w.containers, event.Actor.ID) | ||
w.Lock() | ||
w.deleted[event.Actor.ID] = time.Now() | ||
w.Unlock() | ||
} | ||
|
||
case err := <-errors: | ||
|
@@ -164,8 +218,43 @@ func (w *watcher) watch() { | |
|
||
case <-w.ctx.Done(): | ||
logp.Debug("docker", "Watcher stopped") | ||
w.stopped.Done() | ||
return | ||
} | ||
} | ||
} | ||
} | ||
|
||
// Clean up deleted containers after they are not used anymore | ||
func (w *watcher) cleanupWorker() { | ||
for { | ||
// Wait a full period | ||
time.Sleep(w.cleanupTimeout) | ||
|
||
select { | ||
case <-w.ctx.Done(): | ||
w.stopped.Done() | ||
return | ||
default: | ||
// Check entries for timeout | ||
var toDelete []string | ||
timeout := time.Now().Add(-w.cleanupTimeout) | ||
w.RLock() | ||
for key, lastSeen := range w.deleted { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we add some debug logging to this? Otherwise very hard to figure out what is still in memory and what was removed. |
||
if lastSeen.Before(timeout) { | ||
logp.Debug("docker", "Removing container %s after cool down timeout") | ||
toDelete = append(toDelete, key) | ||
} | ||
} | ||
w.RUnlock() | ||
|
||
// Delete timed out entries: | ||
w.Lock() | ||
for _, key := range toDelete { | ||
delete(w.deleted, key) | ||
delete(w.containers, key) | ||
} | ||
w.Unlock() | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm aware the code was here before, but should we check here if
ok
if theID
actually exists?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We get
nil
if the container is not there, and we treat that case