Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log the open file count once a minute to aid debugging. #506

Merged
merged 6 commits into from
Jul 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions internal/atomiccounter/atomiccounter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package atomiccounter
adam-mateen marked this conversation as resolved.
Show resolved Hide resolved

import (
"sync/atomic"
)

type AtomicCounter struct {
val int64
}

// NewAtomicCounter returns a new counter with the default value of 0.
func NewAtomicCounter() AtomicCounter {
return AtomicCounter{}
}

func (ac *AtomicCounter) Increment() {
atomic.AddInt64(&ac.val, 1)
}

func (ac *AtomicCounter) Decrement() {
atomic.AddInt64(&ac.val, -1)
}

// Get is not safe to use for synchronizing work between goroutines.
// It is just for logging the current value.
SaxyPandaBear marked this conversation as resolved.
Show resolved Hide resolved
func (ac *AtomicCounter) Get() int64 {
return ac.val
}
2 changes: 2 additions & 0 deletions logs/logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"log"
"time"

"github.com/aws/amazon-cloudwatch-agent/plugins/inputs/logfile/tail"
"github.com/influxdata/telegraf/config"
)

Expand Down Expand Up @@ -95,6 +96,7 @@ func (l *LogAgent) Run(ctx context.Context) {
for {
select {
case <-t.C:
log.Printf("D! [logagent] open file count, %v", tail.OpenFileCount.Get())
for _, c := range l.collections {
srcs := c.FindLogSrc()
for _, src := range srcs {
Expand Down
5 changes: 5 additions & 0 deletions plugins/inputs/logfile/tail/tail.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"sync"
"time"

"github.com/aws/amazon-cloudwatch-agent/internal/atomiccounter"
"github.com/aws/amazon-cloudwatch-agent/plugins/inputs/logfile/tail/watch"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/models"
Expand All @@ -23,6 +24,7 @@ var (
ErrDeletedNotReOpen = errors.New("File was deleted, tail should now stop")
exitOnDeletionCheckDuration = time.Minute
exitOnDeletionWaitDuration = 5 * time.Minute
OpenFileCount = atomiccounter.NewAtomicCounter()
)

type Line struct {
Expand Down Expand Up @@ -120,6 +122,7 @@ func TailFile(filename string, config Config) (*Tail, error) {
if err != nil {
return nil, err
}
OpenFileCount.Increment()
}

if !config.ReOpen {
Expand Down Expand Up @@ -181,6 +184,7 @@ func (tail *Tail) closeFile() {
if tail.file != nil {
tail.file.Close()
tail.file = nil
OpenFileCount.Decrement()
}
}

Expand All @@ -205,6 +209,7 @@ func (tail *Tail) reopen() error {
}
break
}
OpenFileCount.Increment()
return nil
}

Expand Down
6 changes: 5 additions & 1 deletion plugins/inputs/logfile/tail/tail_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ func setup(t *testing.T) (*os.File, *Tail, *testLogger) {
if err != nil {
t.Fatalf("failed to tail file %v: %v", tmpfile.Name(), err)
}

// Cannot expect OpenFileCount.Get() to be 1 because the TailFile struct
// was not created with MustExist=true, so file may not yet be opened.
return tmpfile, tail, &tl
}

Expand All @@ -163,6 +164,8 @@ func readThreelines(t *testing.T, tail *Tail) {
t.Errorf("wrong line from tail found: '%v'", line.Text)
}
}
// If file was readable, then expect it to exist.
assert.Equal(t, int64(1), OpenFileCount.Get())
}

func verifyTailerLogging(t *testing.T, tlog *testLogger, expectedErrorMsg string) {
Expand All @@ -179,6 +182,7 @@ func verifyTailerLogging(t *testing.T, tlog *testLogger, expectedErrorMsg string
func verifyTailerExited(t *testing.T, tail *Tail) {
select {
case <-tail.Dead():
assert.Equal(t, int64(0), OpenFileCount.Get())
return
default:
t.Errorf("Tailer is still alive after file removed and wait period")
Expand Down
10 changes: 7 additions & 3 deletions plugins/inputs/logfile/tailersrc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func TestTailerSrc(t *testing.T) {
if err != nil {
t.Errorf("Failed to create temp file: %v", err)
}

beforeCount := tail.OpenFileCount.Get()
tailer, err := tail.TailFile(file.Name(),
tail.Config{
ReOpen: false,
Expand All @@ -63,7 +63,7 @@ func TestTailerSrc(t *testing.T) {
t.Errorf("Failed to create tailer src for file %v with error: %v", file, err)
return
}

assert.Equal(t, beforeCount + 1, tail.OpenFileCount.Get())
ts := NewTailerSrc(
"groupName", "streamName",
"destination",
Expand Down Expand Up @@ -144,11 +144,15 @@ func TestTailerSrc(t *testing.T) {
fmt.Fprintln(file, l)
}

// Removal of log file should stop tailersrc
// Removal of log file should stop tailerSrc and Tail.
if err := os.Remove(file.Name()); err != nil {
t.Errorf("failed to remove log file '%v': %v", file.Name(), err)
}
<-done
// Most test functions do not wait for the Tail to close the file.
// They rely on Tail to detect file deletion and close the file.
// So the count might be nonzero due to previous test cases.
assert.LessOrEqual(t, tail.OpenFileCount.Get(), beforeCount)
}

func TestOffsetDoneCallBack(t *testing.T) {
Expand Down
4 changes: 0 additions & 4 deletions profiler/profiler.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ func (p *profiler) AddStats(key []string, value float64) {
p.Lock()
defer p.Unlock()
k := strings.Join(key, "_")

if _, ok := p.stats[k]; !ok {
p.stats[k] = 0
}
SaxyPandaBear marked this conversation as resolved.
Show resolved Hide resolved
p.stats[k] += value
}

Expand Down