From e9ccebedb4adda7035ab43ff8d2e9a2002001caa Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Tue, 21 May 2024 16:06:34 -0400 Subject: [PATCH 01/30] working on S3 ack tracking --- .../input/awss3/input_benchmark_test.go | 2 +- x-pack/filebeat/input/awss3/interfaces.go | 15 +--- x-pack/filebeat/input/awss3/s3_input.go | 67 ++++++++------- x-pack/filebeat/input/awss3/s3_objects.go | 82 +++++++++---------- .../filebeat/input/awss3/s3_objects_test.go | 28 +++---- x-pack/filebeat/input/awss3/s3_test.go | 4 +- x-pack/filebeat/input/awss3/sqs_input.go | 2 +- x-pack/filebeat/input/awss3/sqs_s3_event.go | 4 +- 8 files changed, 99 insertions(+), 105 deletions(-) diff --git a/x-pack/filebeat/input/awss3/input_benchmark_test.go b/x-pack/filebeat/input/awss3/input_benchmark_test.go index 09b7c8bd9d2..b1061923c1d 100644 --- a/x-pack/filebeat/input/awss3/input_benchmark_test.go +++ b/x-pack/filebeat/input/awss3/input_benchmark_test.go @@ -338,7 +338,7 @@ func benchmarkInputS3(t *testing.T, numberOfWorkers int) testing.BenchmarkResult states, err := newStates(nil, store) assert.NoError(t, err, "states creation should succeed") - s3EventHandlerFactory := newS3ObjectProcessorFactory(log.Named("s3"), metrics, s3API, config.FileSelectors, backupConfig{}) + s3EventHandlerFactory := newS3ObjectProcessorFactory(metrics, s3API, config.FileSelectors, backupConfig{}) s3Poller := &s3PollerInput{ log: logp.NewLogger(inputName), config: config, diff --git a/x-pack/filebeat/input/awss3/interfaces.go b/x-pack/filebeat/input/awss3/interfaces.go index 1f1390c4f2f..4f5440cd91f 100644 --- a/x-pack/filebeat/input/awss3/interfaces.go +++ b/x-pack/filebeat/input/awss3/interfaces.go @@ -100,25 +100,18 @@ type s3ObjectHandlerFactory interface { // Create returns a new s3ObjectHandler that can be used to process the // specified S3 object. If the handler is not configured to process the // given S3 object (based on key name) then it will return nil. - Create(ctx context.Context, log *logp.Logger, client beat.Client, acker *awscommon.EventACKTracker, obj s3EventV2) s3ObjectHandler + Create(ctx context.Context, acker *awscommon.EventACKTracker, obj s3EventV2) s3ObjectHandler } type s3ObjectHandler interface { // ProcessS3Object downloads the S3 object, parses it, creates events, and - // publishes them. It returns when processing finishes or when it encounters - // an unrecoverable error. It does not wait for the events to be ACKed by - // the publisher before returning (use eventACKTracker's Wait() method to - // determine this). - ProcessS3Object() error + // sends them to the given channel. It returns when processing finishes or + // when it encounters an unrecoverable error. + ProcessS3Object(log *logp.Logger, eventChan chan<- *beat.Event) error // FinalizeS3Object finalizes processing of an S3 object after the current // batch is finished. FinalizeS3Object() error - - // Wait waits for every event published by ProcessS3Object() to be ACKed - // by the publisher before returning. Internally it uses the - // s3ObjectHandler eventACKTracker's Wait() method - Wait() } // ------ diff --git a/x-pack/filebeat/input/awss3/s3_input.go b/x-pack/filebeat/input/awss3/s3_input.go index 999b27da534..5cf1d55fb7f 100644 --- a/x-pack/filebeat/input/awss3/s3_input.go +++ b/x-pack/filebeat/input/awss3/s3_input.go @@ -28,13 +28,13 @@ var readerLoopMaxCircuitBreaker = 10 type s3PollerInput struct { log *logp.Logger + pipeline beat.Pipeline config config awsConfig awssdk.Config store beater.StateStore provider string s3 s3API metrics *inputMetrics - client beat.Client s3ObjectHandler s3ObjectHandlerFactory states *states } @@ -69,6 +69,7 @@ func (in *s3PollerInput) Run( pipeline beat.Pipeline, ) error { in.log = inputContext.Logger.Named("s3") + in.pipeline = pipeline var err error // Load the persistent S3 polling state. @@ -78,13 +79,6 @@ func (in *s3PollerInput) Run( } defer in.states.Close() - // Create client for publishing events and receive notification of their ACKs. - in.client, err = createPipelineClient(pipeline) - if err != nil { - return fmt.Errorf("failed to create pipeline client: %w", err) - } - defer in.client.Close() - ctx := v2.GoContextFromCanceler(inputContext.Cancelation) in.s3, err = createS3API(ctx, in.config, in.awsConfig) if err != nil { @@ -95,7 +89,6 @@ func (in *s3PollerInput) Run( defer in.metrics.Close() in.s3ObjectHandler = newS3ObjectProcessorFactory( - in.log, in.metrics, in.s3, in.config.getFileSelectors(), @@ -117,7 +110,7 @@ func (in *s3PollerInput) run(ctx context.Context) { func (in *s3PollerInput) runPoll(ctx context.Context) { var workerWg sync.WaitGroup - workChan := make(chan *s3FetchTask) + workChan := make(chan state) // Start the worker goroutines to listen on the work channel for i := 0; i < in.config.NumberOfWorkers; i++ { @@ -133,15 +126,34 @@ func (in *s3PollerInput) runPoll(ctx context.Context) { workerWg.Wait() } -func (in *s3PollerInput) workerLoop(ctx context.Context, workChan <-chan *s3FetchTask) { +func (in *s3PollerInput) workerLoop(ctx context.Context, workChan <-chan state) { + // Create client for publishing events and receive notification of their ACKs. + client, err := createPipelineClient(in.pipeline) + if err != nil { + in.log.Errorf("failed to create pipeline client: %v", err.Error()) + return + } + defer client.Close() + rateLimitWaiter := backoff.NewEqualJitterBackoff(ctx.Done(), 1, 120) - for s3ObjectPayload := range workChan { - objHandler := s3ObjectPayload.s3ObjectHandler - state := s3ObjectPayload.objectState + for state := range workChan { + event := in.s3EventForState(state) + + acker := awscommon.NewEventACKTracker(ctx) + objHandler := in.s3ObjectHandler.Create(ctx, acker, event) + if objHandler == nil { + in.log.Debugw("empty s3 processor (no matching reader configs).", "state", state) + continue + } + + eventChan := make(chan *beat.Event) + go func() { + //for + }() // Process S3 object (download, parse, create events). - err := objHandler.ProcessS3Object() + err := objHandler.ProcessS3Object(in.log, eventChan) if errors.Is(err, errS3DownloadFailed) { // Download errors are ephemeral. Add a backoff delay, then skip to the // next iteration so we don't mark the object as permanently failed. @@ -152,7 +164,8 @@ func (in *s3PollerInput) workerLoop(ctx context.Context, workChan <-chan *s3Fetc rateLimitWaiter.Reset() // Wait for downloaded objects to be ACKed. - objHandler.Wait() + acker.Wait() + //objHandler.Wait() if err != nil { in.log.Errorf("failed processing S3 event for object key %q in bucket %q: %v", @@ -175,7 +188,7 @@ func (in *s3PollerInput) workerLoop(ctx context.Context, workChan <-chan *s3Fetc } } -func (in *s3PollerInput) readerLoop(ctx context.Context, workChan chan<- *s3FetchTask) { +func (in *s3PollerInput) readerLoop(ctx context.Context, workChan chan<- state) { defer close(workChan) bucketName := getBucketNameFromARN(in.config.getBucketARN()) @@ -216,31 +229,27 @@ func (in *s3PollerInput) readerLoop(ctx context.Context, workChan chan<- *s3Fetc continue } - s3Processor := in.createS3ObjectProcessor(ctx, state) - if s3Processor == nil { - in.log.Debugw("empty s3 processor.", "state", state) - continue - } - - workChan <- &s3FetchTask{ - s3ObjectHandler: s3Processor, - objectState: state, - } + workChan <- state in.metrics.s3ObjectsProcessedTotal.Inc() } } } -func (in *s3PollerInput) createS3ObjectProcessor(ctx context.Context, state state) s3ObjectHandler { +func (in *s3PollerInput) s3EventForState(state state) s3EventV2 { event := s3EventV2{} event.AWSRegion = in.awsConfig.Region event.Provider = in.provider event.S3.Bucket.Name = state.Bucket event.S3.Bucket.ARN = in.config.getBucketARN() event.S3.Object.Key = state.Key + return event +} + +func (in *s3PollerInput) createS3ObjectProcessor(ctx context.Context, state state) s3ObjectHandler { + event := in.s3EventForState(state) acker := awscommon.NewEventACKTracker(ctx) - return in.s3ObjectHandler.Create(ctx, in.log, in.client, acker, event) + return in.s3ObjectHandler.Create(ctx, acker, event) } diff --git a/x-pack/filebeat/input/awss3/s3_objects.go b/x-pack/filebeat/input/awss3/s3_objects.go index 05ee572343f..3d45a53c177 100644 --- a/x-pack/filebeat/input/awss3/s3_objects.go +++ b/x-pack/filebeat/input/awss3/s3_objects.go @@ -30,25 +30,39 @@ import ( "github.com/elastic/elastic-agent-libs/mapstr" ) -const ( - contentTypeJSON = "application/json" - contentTypeNDJSON = "application/x-ndjson" -) - type s3ObjectProcessorFactory struct { - log *logp.Logger metrics *inputMetrics s3 s3API fileSelectors []fileSelectorConfig backupConfig backupConfig } +type s3ObjectProcessor struct { + *s3ObjectProcessorFactory + + ctx context.Context + // The processor sends decoded events from its object to this channel + eventChan chan *beat.Event + acker *awscommon.EventACKTracker // ACKer tied to the SQS message (multiple S3 readers share an ACKer when the S3 notification event contains more than one S3 object). + readerConfig *readerConfig // Config about how to process the object. + s3Obj s3EventV2 // S3 object information. + s3ObjHash string + s3RequestURL string + + s3Metadata map[string]interface{} // S3 object metadata. +} + +const ( + contentTypeJSON = "application/json" + contentTypeNDJSON = "application/x-ndjson" +) + // errS3DownloadFailed reports problems downloading an S3 object. Download errors // should never treated as permanent, they are just an indication to apply a // retry backoff until the connection is healthy again. var errS3DownloadFailed = errors.New("S3 download failure") -func newS3ObjectProcessorFactory(log *logp.Logger, metrics *inputMetrics, s3 s3API, sel []fileSelectorConfig, backupConfig backupConfig) *s3ObjectProcessorFactory { +func newS3ObjectProcessorFactory(metrics *inputMetrics, s3 s3API, sel []fileSelectorConfig, backupConfig backupConfig) *s3ObjectProcessorFactory { if metrics == nil { // Metrics are optional. Initialize a stub. metrics = newInputMetrics("", nil, 0) @@ -59,7 +73,6 @@ func newS3ObjectProcessorFactory(log *logp.Logger, metrics *inputMetrics, s3 s3A } } return &s3ObjectProcessorFactory{ - log: log, metrics: metrics, s3: s3, fileSelectors: sel, @@ -78,22 +91,16 @@ func (f *s3ObjectProcessorFactory) findReaderConfig(key string) *readerConfig { // Create returns a new s3ObjectProcessor. It returns nil when no file selectors // match the S3 object key. -func (f *s3ObjectProcessorFactory) Create(ctx context.Context, log *logp.Logger, client beat.Client, ack *awscommon.EventACKTracker, obj s3EventV2) s3ObjectHandler { - log = log.With( - "bucket_arn", obj.S3.Bucket.Name, - "object_key", obj.S3.Object.Key) - +func (f *s3ObjectProcessorFactory) Create(ctx context.Context, ack *awscommon.EventACKTracker, obj s3EventV2) s3ObjectHandler { readerConfig := f.findReaderConfig(obj.S3.Object.Key) if readerConfig == nil { - log.Debug("Skipping S3 object processing. No file_selectors are a match.") + // No file_selectors are a match, skip. return nil } return &s3ObjectProcessor{ s3ObjectProcessorFactory: f, - log: log, ctx: ctx, - publisher: client, acker: ack, readerConfig: readerConfig, s3Obj: obj, @@ -101,32 +108,24 @@ func (f *s3ObjectProcessorFactory) Create(ctx context.Context, log *logp.Logger, } } -type s3ObjectProcessor struct { - *s3ObjectProcessorFactory - - log *logp.Logger - ctx context.Context - publisher beat.Client - acker *awscommon.EventACKTracker // ACKer tied to the SQS message (multiple S3 readers share an ACKer when the S3 notification event contains more than one S3 object). - readerConfig *readerConfig // Config about how to process the object. - s3Obj s3EventV2 // S3 object information. - s3ObjHash string - s3RequestURL string - - s3Metadata map[string]interface{} // S3 object metadata. -} - func (p *s3ObjectProcessor) Wait() { p.acker.Wait() } -func (p *s3ObjectProcessor) ProcessS3Object() error { +func (p *s3ObjectProcessor) Done() <-chan struct{} { + return nil +} + +func (p *s3ObjectProcessor) ProcessS3Object(log *logp.Logger, eventChan chan<- *beat.Event) error { if p == nil { return nil } + log = log.With( + "bucket_arn", p.s3Obj.S3.Bucket.Name, + "object_key", p.s3Obj.S3.Object.Key) // Metrics and Logging - p.log.Debug("Begin S3 object processing.") + log.Debug("Begin S3 object processing.") p.metrics.s3ObjectsRequestedTotal.Inc() p.metrics.s3ObjectsInflight.Inc() start := time.Now() @@ -134,7 +133,7 @@ func (p *s3ObjectProcessor) ProcessS3Object() error { elapsed := time.Since(start) p.metrics.s3ObjectsInflight.Dec() p.metrics.s3ObjectProcessingTime.Update(elapsed.Nanoseconds()) - p.log.Debugw("End S3 object processing.", "elapsed_time_ns", elapsed) + log.Debugw("End S3 object processing.", "elapsed_time_ns", elapsed) }() // Request object (download). @@ -256,7 +255,7 @@ func (p *s3ObjectProcessor) readJSON(r io.Reader) error { data, _ := item.MarshalJSON() evt := p.createEvent(string(data), offset) - p.publish(p.acker, &evt) + p.eventChan <- &evt } return nil @@ -291,7 +290,7 @@ func (p *s3ObjectProcessor) readJSONSlice(r io.Reader, evtOffset int64) (int64, data, _ := item.MarshalJSON() evt := p.createEvent(string(data), evtOffset) - p.publish(p.acker, &evt) + p.eventChan <- &evt evtOffset++ } @@ -336,7 +335,7 @@ func (p *s3ObjectProcessor) splitEventList(key string, raw json.RawMessage, offs data, _ := item.MarshalJSON() p.s3ObjHash = objHash evt := p.createEvent(string(data), offset+arrayOffset) - p.publish(p.acker, &evt) + p.eventChan <- &evt } return nil @@ -376,7 +375,7 @@ func (p *s3ObjectProcessor) readFile(r io.Reader) error { event := p.createEvent(string(message.Content), offset) event.Fields.DeepUpdate(message.Fields) offset += int64(message.Bytes) - p.publish(p.acker, &event) + p.eventChan <- &event } if errors.Is(err, io.EOF) { @@ -391,13 +390,6 @@ func (p *s3ObjectProcessor) readFile(r io.Reader) error { return nil } -func (p *s3ObjectProcessor) publish(ack *awscommon.EventACKTracker, event *beat.Event) { - ack.Add() - event.Private = ack - p.metrics.s3EventsCreatedTotal.Inc() - p.publisher.Publish(*event) -} - func (p *s3ObjectProcessor) createEvent(message string, offset int64) beat.Event { event := beat.Event{ Timestamp: time.Now().UTC(), diff --git a/x-pack/filebeat/input/awss3/s3_objects_test.go b/x-pack/filebeat/input/awss3/s3_objects_test.go index df50726823f..d7db5d027ee 100644 --- a/x-pack/filebeat/input/awss3/s3_objects_test.go +++ b/x-pack/filebeat/input/awss3/s3_objects_test.go @@ -156,9 +156,9 @@ func TestS3ObjectProcessor(t *testing.T) { GetObject(gomock.Any(), gomock.Eq(s3Event.S3.Bucket.Name), gomock.Eq(s3Event.S3.Object.Key)). Return(nil, errFakeConnectivityFailure) - s3ObjProc := newS3ObjectProcessorFactory(logp.NewLogger(inputName), nil, mockS3API, nil, backupConfig{}) + s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, nil, backupConfig{}) ack := awscommon.NewEventACKTracker(ctx) - err := s3ObjProc.Create(ctx, logp.NewLogger(inputName), mockPublisher, ack, s3Event).ProcessS3Object() + err := s3ObjProc.Create(ctx, mockPublisher, ack, s3Event).ProcessS3Object(logp.NewLogger(inputName)) require.Error(t, err) assert.True(t, errors.Is(err, errS3DownloadFailed), "expected errS3DownloadFailed") }) @@ -178,9 +178,9 @@ func TestS3ObjectProcessor(t *testing.T) { GetObject(gomock.Any(), gomock.Eq(s3Event.S3.Bucket.Name), gomock.Eq(s3Event.S3.Object.Key)). Return(nil, nil) - s3ObjProc := newS3ObjectProcessorFactory(logp.NewLogger(inputName), nil, mockS3API, nil, backupConfig{}) + s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, nil, backupConfig{}) ack := awscommon.NewEventACKTracker(ctx) - err := s3ObjProc.Create(ctx, logp.NewLogger(inputName), mockPublisher, ack, s3Event).ProcessS3Object() + err := s3ObjProc.Create(ctx, mockPublisher, ack, s3Event).ProcessS3Object(logp.NewLogger(inputName)) require.Error(t, err) }) @@ -205,9 +205,9 @@ func TestS3ObjectProcessor(t *testing.T) { Times(2), ) - s3ObjProc := newS3ObjectProcessorFactory(logp.NewLogger(inputName), nil, mockS3API, nil, backupConfig{}) + s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, nil, backupConfig{}) ack := awscommon.NewEventACKTracker(ctx) - err := s3ObjProc.Create(ctx, logp.NewLogger(inputName), mockPublisher, ack, s3Event).ProcessS3Object() + err := s3ObjProc.Create(ctx, mockPublisher, ack, s3Event).ProcessS3Object(logp.NewLogger(inputName)) require.NoError(t, err) }) @@ -231,9 +231,9 @@ func TestS3ObjectProcessor(t *testing.T) { Return(nil, nil), ) - s3ObjProc := newS3ObjectProcessorFactory(logp.NewLogger(inputName), nil, mockS3API, nil, backupCfg) + s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, nil, backupCfg) ack := awscommon.NewEventACKTracker(ctx) - err := s3ObjProc.Create(ctx, logp.NewLogger(inputName), mockPublisher, ack, s3Event).FinalizeS3Object() + err := s3ObjProc.Create(ctx, mockPublisher, ack, s3Event).FinalizeS3Object() require.NoError(t, err) }) @@ -261,9 +261,9 @@ func TestS3ObjectProcessor(t *testing.T) { Return(nil, nil), ) - s3ObjProc := newS3ObjectProcessorFactory(logp.NewLogger(inputName), nil, mockS3API, nil, backupCfg) + s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, nil, backupCfg) ack := awscommon.NewEventACKTracker(ctx) - err := s3ObjProc.Create(ctx, logp.NewLogger(inputName), mockPublisher, ack, s3Event).FinalizeS3Object() + err := s3ObjProc.Create(ctx, mockPublisher, ack, s3Event).FinalizeS3Object() require.NoError(t, err) }) @@ -288,9 +288,9 @@ func TestS3ObjectProcessor(t *testing.T) { Return(nil, nil), ) - s3ObjProc := newS3ObjectProcessorFactory(logp.NewLogger(inputName), nil, mockS3API, nil, backupCfg) + s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, nil, backupCfg) ack := awscommon.NewEventACKTracker(ctx) - err := s3ObjProc.Create(ctx, logp.NewLogger(inputName), mockPublisher, ack, s3Event).FinalizeS3Object() + err := s3ObjProc.Create(ctx, mockPublisher, ack, s3Event).FinalizeS3Object() require.NoError(t, err) }) @@ -334,9 +334,9 @@ func _testProcessS3Object(t testing.TB, file, contentType string, numEvents int, Times(numEvents), ) - s3ObjProc := newS3ObjectProcessorFactory(logp.NewLogger(inputName), nil, mockS3API, selectors, backupConfig{}) + s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, selectors, backupConfig{}) ack := awscommon.NewEventACKTracker(ctx) - err := s3ObjProc.Create(ctx, logp.NewLogger(inputName), mockPublisher, ack, s3Event).ProcessS3Object() + err := s3ObjProc.Create(ctx, mockPublisher, ack, s3Event).ProcessS3Object(logp.NewLogger(inputName)) if !expectErr { require.NoError(t, err) diff --git a/x-pack/filebeat/input/awss3/s3_test.go b/x-pack/filebeat/input/awss3/s3_test.go index 216d9866e73..05b699b6ccc 100644 --- a/x-pack/filebeat/input/awss3/s3_test.go +++ b/x-pack/filebeat/input/awss3/s3_test.go @@ -126,7 +126,7 @@ func TestS3Poller(t *testing.T) { GetObject(gomock.Any(), gomock.Eq(bucket), gomock.Eq("2024-02-08T08:35:00+00:02.json.gz")). Return(nil, errFakeConnectivityFailure) - s3ObjProc := newS3ObjectProcessorFactory(logp.NewLogger(inputName), nil, mockAPI, nil, backupConfig{}) + s3ObjProc := newS3ObjectProcessorFactory(nil, mockAPI, nil, backupConfig{}) states, err := newStates(nil, store) require.NoError(t, err, "states creation must succeed") poller := &s3PollerInput{ @@ -264,7 +264,7 @@ func TestS3Poller(t *testing.T) { GetObject(gomock.Any(), gomock.Eq(bucket), gomock.Eq("key5")). Return(nil, errFakeConnectivityFailure) - s3ObjProc := newS3ObjectProcessorFactory(logp.NewLogger(inputName), nil, mockS3, nil, backupConfig{}) + s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3, nil, backupConfig{}) states, err := newStates(nil, store) require.NoError(t, err, "states creation must succeed") poller := &s3PollerInput{ diff --git a/x-pack/filebeat/input/awss3/sqs_input.go b/x-pack/filebeat/input/awss3/sqs_input.go index e524cf9fd1c..7faa52c2a92 100644 --- a/x-pack/filebeat/input/awss3/sqs_input.go +++ b/x-pack/filebeat/input/awss3/sqs_input.go @@ -221,7 +221,7 @@ func (in *sqsReaderInput) logConfigSummary() { func (in *sqsReaderInput) createEventProcessor(pipeline beat.Pipeline) (sqsProcessor, error) { fileSelectors := in.config.getFileSelectors() - s3EventHandlerFactory := newS3ObjectProcessorFactory(in.log.Named("s3"), in.metrics, in.s3, fileSelectors, in.config.BackupConfig) + s3EventHandlerFactory := newS3ObjectProcessorFactory(in.metrics, in.s3, fileSelectors, in.config.BackupConfig) script, err := newScriptFromConfig(in.log.Named("sqs_script"), in.config.SQSScript) if err != nil { diff --git a/x-pack/filebeat/input/awss3/sqs_s3_event.go b/x-pack/filebeat/input/awss3/sqs_s3_event.go index db893e443ac..3fe0dcf2867 100644 --- a/x-pack/filebeat/input/awss3/sqs_s3_event.go +++ b/x-pack/filebeat/input/awss3/sqs_s3_event.go @@ -327,13 +327,13 @@ func (p *sqsS3EventProcessor) processS3Events(ctx context.Context, log *logp.Log var errs []error var handles []s3ObjectHandler for i, event := range s3Events { - s3Processor := p.s3ObjectHandler.Create(ctx, log, client, acker, event) + s3Processor := p.s3ObjectHandler.Create(ctx, acker, event) if s3Processor == nil { continue } // Process S3 object (download, parse, create events). - if err := s3Processor.ProcessS3Object(); err != nil { + if err := s3Processor.ProcessS3Object(log, client); err != nil { errs = append(errs, fmt.Errorf( "failed processing S3 event for object key %q in bucket %q (object record %d of %d in SQS notification): %w", event.S3.Object.Key, event.S3.Bucket.Name, i+1, len(s3Events), err)) From b17f7b54965b5178e3164806a42d28b22e587e54 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Wed, 22 May 2024 11:57:17 -0400 Subject: [PATCH 02/30] handle S3 acks asynchronously --- libbeat/common/fifo/fifo.go | 68 +++++++++++++++++++++ x-pack/filebeat/input/awss3/acks.go | 65 ++++++++++++++++++++ x-pack/filebeat/input/awss3/interfaces.go | 5 +- x-pack/filebeat/input/awss3/s3.go | 5 +- x-pack/filebeat/input/awss3/s3_input.go | 43 ++++++------- x-pack/filebeat/input/awss3/s3_objects.go | 37 ++++------- x-pack/filebeat/input/awss3/sqs_s3_event.go | 7 ++- 7 files changed, 174 insertions(+), 56 deletions(-) create mode 100644 libbeat/common/fifo/fifo.go create mode 100644 x-pack/filebeat/input/awss3/acks.go diff --git a/libbeat/common/fifo/fifo.go b/libbeat/common/fifo/fifo.go new file mode 100644 index 00000000000..ec5779a7ff4 --- /dev/null +++ b/libbeat/common/fifo/fifo.go @@ -0,0 +1,68 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package fifo + +import "errors" + +var errFIFOEmpty = errors.New("tried to read from an empty FIFO queue") + +// FIFO is a minimal first-in first-out queue based on a singly linked list. +type FIFO[T any] struct { + first *node[T] + last *node[T] +} + +type node[T any] struct { + next *node[T] + value T +} + +func (f *FIFO[T]) Add(value T) { + newNode := &node[T]{value: value} + if f.first == nil { + f.first = newNode + } else { + f.last.next = newNode + } + f.last = newNode +} + +func (f *FIFO[T]) Empty() bool { + return f.first == nil +} + +// Return the first value (if present) without removing it from the queue. +// If the queue is empty a default value is returned, to detect this case +// use f.Empty(). +func (f *FIFO[T]) First() T { + if f.first == nil { + var none T + return none + } + return f.first.value +} + +// Remove the first entry in the queue. Does nothing if the FIFO is empty. +func (f *FIFO[T]) Remove() { + if f.first != nil { + f.first = f.first.next + if f.first == nil { + f.last = nil + } + } +} diff --git a/x-pack/filebeat/input/awss3/acks.go b/x-pack/filebeat/input/awss3/acks.go new file mode 100644 index 00000000000..9afb23fc72d --- /dev/null +++ b/x-pack/filebeat/input/awss3/acks.go @@ -0,0 +1,65 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package awss3 + +import ( + "sync" + + "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/beats/v7/libbeat/common/acker" + "github.com/elastic/beats/v7/libbeat/common/fifo" +) + +type s3ACKHandler struct { + sync.Mutex + pendingACKs fifo.FIFO[publishedS3Object] + ackedCount int +} + +type publishedS3Object struct { + publishCount int + ackCallback func() +} + +func (ah *s3ACKHandler) Add(publishCount int, ackCallback func()) { + ah.Lock() + defer ah.Unlock() + ah.pendingACKs.Add(publishedS3Object{ + publishCount: publishCount, + ackCallback: ackCallback, + }) +} + +func (ah *s3ACKHandler) ACK(count int) { + ah.Lock() + ah.ackedCount += count + callbacks := ah.advance() + ah.Unlock() + for _, c := range callbacks { + c() + } +} + +// Advance the acks list based on the current ackedCount, invoking any +// acknowledgment callbacks for completed objects. +func (ah *s3ACKHandler) advance() []func() { + var callbacks []func() + for !ah.pendingACKs.Empty() { + nextObj := ah.pendingACKs.First() + if nextObj.publishCount > ah.ackedCount { + // This object hasn't been fully acknowledged yet + break + } + callbacks = append(callbacks, nextObj.ackCallback) + ah.pendingACKs.Remove() + } + return callbacks +} + +func (ah *s3ACKHandler) pipelineEventListener() beat.EventListener { + return acker.TrackingCounter(func(_ int, total int) { + ah.ACK(total) + }) +} diff --git a/x-pack/filebeat/input/awss3/interfaces.go b/x-pack/filebeat/input/awss3/interfaces.go index 4f5440cd91f..489adb54b2a 100644 --- a/x-pack/filebeat/input/awss3/interfaces.go +++ b/x-pack/filebeat/input/awss3/interfaces.go @@ -16,7 +16,6 @@ import ( "github.com/aws/smithy-go/middleware" "github.com/elastic/beats/v7/libbeat/beat" - awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws" awssdk "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/s3" @@ -100,14 +99,14 @@ type s3ObjectHandlerFactory interface { // Create returns a new s3ObjectHandler that can be used to process the // specified S3 object. If the handler is not configured to process the // given S3 object (based on key name) then it will return nil. - Create(ctx context.Context, acker *awscommon.EventACKTracker, obj s3EventV2) s3ObjectHandler + Create(ctx context.Context, obj s3EventV2) s3ObjectHandler } type s3ObjectHandler interface { // ProcessS3Object downloads the S3 object, parses it, creates events, and // sends them to the given channel. It returns when processing finishes or // when it encounters an unrecoverable error. - ProcessS3Object(log *logp.Logger, eventChan chan<- *beat.Event) error + ProcessS3Object(log *logp.Logger, eventCallback func(e beat.Event)) error // FinalizeS3Object finalizes processing of an S3 object after the current // batch is finished. diff --git a/x-pack/filebeat/input/awss3/s3.go b/x-pack/filebeat/input/awss3/s3.go index eb8e19c2cf9..f790baee62c 100644 --- a/x-pack/filebeat/input/awss3/s3.go +++ b/x-pack/filebeat/input/awss3/s3.go @@ -14,7 +14,6 @@ import ( "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/elastic/beats/v7/libbeat/beat" - awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws" ) func createS3API(ctx context.Context, config config, awsConfig awssdk.Config) (*awsS3API, error) { @@ -34,9 +33,9 @@ func createS3API(ctx context.Context, config config, awsConfig awssdk.Config) (* }, nil } -func createPipelineClient(pipeline beat.Pipeline) (beat.Client, error) { +func createPipelineClient(pipeline beat.Pipeline, acks *s3ACKHandler) (beat.Client, error) { return pipeline.ConnectWith(beat.ClientConfig{ - EventListener: awscommon.NewEventACKHandler(), + EventListener: acks.pipelineEventListener(), Processing: beat.ProcessingConfig{ // This input only produces events with basic types so normalization // is not required. diff --git a/x-pack/filebeat/input/awss3/s3_input.go b/x-pack/filebeat/input/awss3/s3_input.go index 5cf1d55fb7f..4ca1a36b5e8 100644 --- a/x-pack/filebeat/input/awss3/s3_input.go +++ b/x-pack/filebeat/input/awss3/s3_input.go @@ -17,7 +17,6 @@ import ( v2 "github.com/elastic/beats/v7/filebeat/input/v2" "github.com/elastic/beats/v7/libbeat/beat" "github.com/elastic/beats/v7/libbeat/common/backoff" - awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/go-concert/timed" ) @@ -127,8 +126,9 @@ func (in *s3PollerInput) runPoll(ctx context.Context) { } func (in *s3PollerInput) workerLoop(ctx context.Context, workChan <-chan state) { + var acks s3ACKHandler // Create client for publishing events and receive notification of their ACKs. - client, err := createPipelineClient(in.pipeline) + client, err := createPipelineClient(in.pipeline, &acks) if err != nil { in.log.Errorf("failed to create pipeline client: %v", err.Error()) return @@ -140,20 +140,18 @@ func (in *s3PollerInput) workerLoop(ctx context.Context, workChan <-chan state) for state := range workChan { event := in.s3EventForState(state) - acker := awscommon.NewEventACKTracker(ctx) - objHandler := in.s3ObjectHandler.Create(ctx, acker, event) + objHandler := in.s3ObjectHandler.Create(ctx, event) if objHandler == nil { in.log.Debugw("empty s3 processor (no matching reader configs).", "state", state) continue } - eventChan := make(chan *beat.Event) - go func() { - //for - }() - // Process S3 object (download, parse, create events). - err := objHandler.ProcessS3Object(in.log, eventChan) + publishCount := 0 + err := objHandler.ProcessS3Object(in.log, func(e beat.Event) { + client.Publish(e) + publishCount++ + }) if errors.Is(err, errS3DownloadFailed) { // Download errors are ephemeral. Add a backoff delay, then skip to the // next iteration so we don't mark the object as permanently failed. @@ -163,10 +161,7 @@ func (in *s3PollerInput) workerLoop(ctx context.Context, workChan <-chan state) // Reset the rate limit delay on results that aren't download errors. rateLimitWaiter.Reset() - // Wait for downloaded objects to be ACKed. - acker.Wait() - //objHandler.Wait() - + // Update state, but don't persist it until this object is acknowledged. if err != nil { in.log.Errorf("failed processing S3 event for object key %q in bucket %q: %v", state.Key, state.Bucket, err.Error()) @@ -177,14 +172,16 @@ func (in *s3PollerInput) workerLoop(ctx context.Context, workChan <-chan state) state.Stored = true } - // Persist the result, report any errors - err = in.states.AddState(state) - if err != nil { - in.log.Errorf("saving completed object state: %v", err.Error()) - } + // Add the cleanup handling to the acks helper + acks.Add(publishCount, func() { + err := in.states.AddState(state) + if err != nil { + in.log.Errorf("saving completed object state: %v", err.Error()) + } - // Metrics - in.metrics.s3ObjectsAckedTotal.Inc() + // Metrics + in.metrics.s3ObjectsAckedTotal.Inc() + }) } } @@ -249,7 +246,5 @@ func (in *s3PollerInput) s3EventForState(state state) s3EventV2 { func (in *s3PollerInput) createS3ObjectProcessor(ctx context.Context, state state) s3ObjectHandler { event := in.s3EventForState(state) - acker := awscommon.NewEventACKTracker(ctx) - - return in.s3ObjectHandler.Create(ctx, acker, event) + return in.s3ObjectHandler.Create(ctx, event) } diff --git a/x-pack/filebeat/input/awss3/s3_objects.go b/x-pack/filebeat/input/awss3/s3_objects.go index 3d45a53c177..35f9f46fa21 100644 --- a/x-pack/filebeat/input/awss3/s3_objects.go +++ b/x-pack/filebeat/input/awss3/s3_objects.go @@ -25,7 +25,6 @@ import ( "github.com/elastic/beats/v7/libbeat/reader" "github.com/elastic/beats/v7/libbeat/reader/readfile" "github.com/elastic/beats/v7/libbeat/reader/readfile/encoding" - awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/mapstr" ) @@ -40,14 +39,12 @@ type s3ObjectProcessorFactory struct { type s3ObjectProcessor struct { *s3ObjectProcessorFactory - ctx context.Context - // The processor sends decoded events from its object to this channel - eventChan chan *beat.Event - acker *awscommon.EventACKTracker // ACKer tied to the SQS message (multiple S3 readers share an ACKer when the S3 notification event contains more than one S3 object). - readerConfig *readerConfig // Config about how to process the object. - s3Obj s3EventV2 // S3 object information. - s3ObjHash string - s3RequestURL string + ctx context.Context + eventCallback func(beat.Event) + readerConfig *readerConfig // Config about how to process the object. + s3Obj s3EventV2 // S3 object information. + s3ObjHash string + s3RequestURL string s3Metadata map[string]interface{} // S3 object metadata. } @@ -91,7 +88,7 @@ func (f *s3ObjectProcessorFactory) findReaderConfig(key string) *readerConfig { // Create returns a new s3ObjectProcessor. It returns nil when no file selectors // match the S3 object key. -func (f *s3ObjectProcessorFactory) Create(ctx context.Context, ack *awscommon.EventACKTracker, obj s3EventV2) s3ObjectHandler { +func (f *s3ObjectProcessorFactory) Create(ctx context.Context, obj s3EventV2) s3ObjectHandler { readerConfig := f.findReaderConfig(obj.S3.Object.Key) if readerConfig == nil { // No file_selectors are a match, skip. @@ -101,25 +98,17 @@ func (f *s3ObjectProcessorFactory) Create(ctx context.Context, ack *awscommon.Ev return &s3ObjectProcessor{ s3ObjectProcessorFactory: f, ctx: ctx, - acker: ack, readerConfig: readerConfig, s3Obj: obj, s3ObjHash: s3ObjectHash(obj), } } -func (p *s3ObjectProcessor) Wait() { - p.acker.Wait() -} - -func (p *s3ObjectProcessor) Done() <-chan struct{} { - return nil -} - -func (p *s3ObjectProcessor) ProcessS3Object(log *logp.Logger, eventChan chan<- *beat.Event) error { +func (p *s3ObjectProcessor) ProcessS3Object(log *logp.Logger, eventCallback func(e beat.Event)) error { if p == nil { return nil } + p.eventCallback = eventCallback log = log.With( "bucket_arn", p.s3Obj.S3.Bucket.Name, "object_key", p.s3Obj.S3.Object.Key) @@ -255,7 +244,7 @@ func (p *s3ObjectProcessor) readJSON(r io.Reader) error { data, _ := item.MarshalJSON() evt := p.createEvent(string(data), offset) - p.eventChan <- &evt + p.eventCallback(evt) } return nil @@ -290,7 +279,7 @@ func (p *s3ObjectProcessor) readJSONSlice(r io.Reader, evtOffset int64) (int64, data, _ := item.MarshalJSON() evt := p.createEvent(string(data), evtOffset) - p.eventChan <- &evt + p.eventCallback(evt) evtOffset++ } @@ -335,7 +324,7 @@ func (p *s3ObjectProcessor) splitEventList(key string, raw json.RawMessage, offs data, _ := item.MarshalJSON() p.s3ObjHash = objHash evt := p.createEvent(string(data), offset+arrayOffset) - p.eventChan <- &evt + p.eventCallback(evt) } return nil @@ -375,7 +364,7 @@ func (p *s3ObjectProcessor) readFile(r io.Reader) error { event := p.createEvent(string(message.Content), offset) event.Fields.DeepUpdate(message.Fields) offset += int64(message.Bytes) - p.eventChan <- &event + p.eventCallback(event) } if errors.Is(err, io.EOF) { diff --git a/x-pack/filebeat/input/awss3/sqs_s3_event.go b/x-pack/filebeat/input/awss3/sqs_s3_event.go index 3fe0dcf2867..9d7250d5287 100644 --- a/x-pack/filebeat/input/awss3/sqs_s3_event.go +++ b/x-pack/filebeat/input/awss3/sqs_s3_event.go @@ -327,13 +327,16 @@ func (p *sqsS3EventProcessor) processS3Events(ctx context.Context, log *logp.Log var errs []error var handles []s3ObjectHandler for i, event := range s3Events { - s3Processor := p.s3ObjectHandler.Create(ctx, acker, event) + s3Processor := p.s3ObjectHandler.Create(ctx, event) if s3Processor == nil { continue } // Process S3 object (download, parse, create events). - if err := s3Processor.ProcessS3Object(log, client); err != nil { + if err := s3Processor.ProcessS3Object(log, func(e beat.Event) { + // hi fae, this section is unfinished + client.Publish(e) + }); err != nil { errs = append(errs, fmt.Errorf( "failed processing S3 event for object key %q in bucket %q (object record %d of %d in SQS notification): %w", event.S3.Object.Key, event.S3.Bucket.Name, i+1, len(s3Events), err)) From d023027f98116ed7fcaa448be0a2313a884c8849 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Wed, 22 May 2024 18:25:10 -0400 Subject: [PATCH 03/30] fix loop variable scope --- x-pack/filebeat/input/awss3/acks.go | 8 ++++---- x-pack/filebeat/input/awss3/s3_input.go | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/x-pack/filebeat/input/awss3/acks.go b/x-pack/filebeat/input/awss3/acks.go index 9afb23fc72d..88e6f3518dc 100644 --- a/x-pack/filebeat/input/awss3/acks.go +++ b/x-pack/filebeat/input/awss3/acks.go @@ -48,10 +48,12 @@ func (ah *s3ACKHandler) advance() []func() { var callbacks []func() for !ah.pendingACKs.Empty() { nextObj := ah.pendingACKs.First() - if nextObj.publishCount > ah.ackedCount { + if ah.ackedCount < nextObj.publishCount { // This object hasn't been fully acknowledged yet break } + //fmt.Printf("\033[94mhi fae, ack handler finished an object\033[0m\n") + ah.ackedCount -= nextObj.publishCount callbacks = append(callbacks, nextObj.ackCallback) ah.pendingACKs.Remove() } @@ -59,7 +61,5 @@ func (ah *s3ACKHandler) advance() []func() { } func (ah *s3ACKHandler) pipelineEventListener() beat.EventListener { - return acker.TrackingCounter(func(_ int, total int) { - ah.ACK(total) - }) + return acker.TrackingCounter(func(_ int, total int) { ah.ACK(total) }) } diff --git a/x-pack/filebeat/input/awss3/s3_input.go b/x-pack/filebeat/input/awss3/s3_input.go index 4ca1a36b5e8..b71df1a5b09 100644 --- a/x-pack/filebeat/input/awss3/s3_input.go +++ b/x-pack/filebeat/input/awss3/s3_input.go @@ -137,7 +137,8 @@ func (in *s3PollerInput) workerLoop(ctx context.Context, workChan <-chan state) rateLimitWaiter := backoff.NewEqualJitterBackoff(ctx.Done(), 1, 120) - for state := range workChan { + for _state := range workChan { + state := _state event := in.s3EventForState(state) objHandler := in.s3ObjectHandler.Create(ctx, event) @@ -174,6 +175,7 @@ func (in *s3PollerInput) workerLoop(ctx context.Context, workChan <-chan state) // Add the cleanup handling to the acks helper acks.Add(publishCount, func() { + fmt.Printf("\033[94mhi fae, receiving callback for \033[0m\n" err := in.states.AddState(state) if err != nil { in.log.Errorf("saving completed object state: %v", err.Error()) From ed4953a9dca3d64780a6ea459f1628ec43d7dc42 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Fri, 31 May 2024 15:34:25 -0400 Subject: [PATCH 04/30] remove leftover print --- x-pack/filebeat/input/awss3/s3_input.go | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/filebeat/input/awss3/s3_input.go b/x-pack/filebeat/input/awss3/s3_input.go index b71df1a5b09..aa0cb6b4e35 100644 --- a/x-pack/filebeat/input/awss3/s3_input.go +++ b/x-pack/filebeat/input/awss3/s3_input.go @@ -175,7 +175,6 @@ func (in *s3PollerInput) workerLoop(ctx context.Context, workChan <-chan state) // Add the cleanup handling to the acks helper acks.Add(publishCount, func() { - fmt.Printf("\033[94mhi fae, receiving callback for \033[0m\n" err := in.states.AddState(state) if err != nil { in.log.Errorf("saving completed object state: %v", err.Error()) From 7e70ea6f27899278fde7fe9b7b2cddcb71e68232 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Thu, 6 Jun 2024 16:03:18 -0400 Subject: [PATCH 05/30] cleanup --- x-pack/filebeat/input/awss3/interfaces.go | 16 ---------------- x-pack/filebeat/input/awss3/sqs_input.go | 2 +- x-pack/filebeat/input/awss3/sqs_s3_event.go | 9 +++++---- 3 files changed, 6 insertions(+), 21 deletions(-) diff --git a/x-pack/filebeat/input/awss3/interfaces.go b/x-pack/filebeat/input/awss3/interfaces.go index 489adb54b2a..4d2f66b0f95 100644 --- a/x-pack/filebeat/input/awss3/interfaces.go +++ b/x-pack/filebeat/input/awss3/interfaces.go @@ -37,25 +37,9 @@ import ( const s3RequestURLMetadataKey = `x-beat-s3-request-url` type sqsAPI interface { - sqsReceiver - sqsDeleter - sqsVisibilityChanger - sqsAttributeGetter -} - -type sqsReceiver interface { ReceiveMessage(ctx context.Context, maxMessages int) ([]types.Message, error) -} - -type sqsDeleter interface { DeleteMessage(ctx context.Context, msg *types.Message) error -} - -type sqsVisibilityChanger interface { ChangeMessageVisibility(ctx context.Context, msg *types.Message, timeout time.Duration) error -} - -type sqsAttributeGetter interface { GetQueueAttributes(ctx context.Context, attr []types.QueueAttributeName) (map[string]string, error) } diff --git a/x-pack/filebeat/input/awss3/sqs_input.go b/x-pack/filebeat/input/awss3/sqs_input.go index 7faa52c2a92..7d05e54b383 100644 --- a/x-pack/filebeat/input/awss3/sqs_input.go +++ b/x-pack/filebeat/input/awss3/sqs_input.go @@ -34,7 +34,7 @@ type sqsReaderInput struct { // Workers send on workRequestChan to indicate they're ready for the next // message, and the reader loop replies on workResponseChan. - workRequestChan chan struct{} + //workRequestChan chan struct{} workResponseChan chan types.Message // workerWg is used to wait on worker goroutines during shutdown diff --git a/x-pack/filebeat/input/awss3/sqs_s3_event.go b/x-pack/filebeat/input/awss3/sqs_s3_event.go index 9d7250d5287..0fafb450544 100644 --- a/x-pack/filebeat/input/awss3/sqs_s3_event.go +++ b/x-pack/filebeat/input/awss3/sqs_s3_event.go @@ -132,7 +132,10 @@ func (p *sqsS3EventProcessor) ProcessSQS(ctx context.Context, msg *types.Message // Start SQS keepalive worker. var keepaliveWg sync.WaitGroup keepaliveWg.Add(1) - go p.keepalive(keepaliveCtx, log, &keepaliveWg, msg) + go func() { + defer keepaliveWg.Done() + p.keepalive(keepaliveCtx, log, msg) + }() receiveCount := getSQSReceiveCount(msg.Attributes) if receiveCount == 1 { @@ -195,9 +198,7 @@ func (p *sqsS3EventProcessor) ProcessSQS(ctx context.Context, msg *types.Message return fmt.Errorf("failed processing SQS message (it will return to queue after visibility timeout): %w", processingErr) } -func (p *sqsS3EventProcessor) keepalive(ctx context.Context, log *logp.Logger, wg *sync.WaitGroup, msg *types.Message) { - defer wg.Done() - +func (p *sqsS3EventProcessor) keepalive(ctx context.Context, log *logp.Logger, msg *types.Message) { t := time.NewTicker(p.sqsVisibilityTimeout / 2) defer t.Stop() From 2d18b286171e091699148a23100fb7c553bf8aed Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Mon, 24 Jun 2024 15:31:05 -0400 Subject: [PATCH 06/30] restore struct field --- x-pack/filebeat/input/awss3/sqs_input.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/filebeat/input/awss3/sqs_input.go b/x-pack/filebeat/input/awss3/sqs_input.go index 7d05e54b383..7faa52c2a92 100644 --- a/x-pack/filebeat/input/awss3/sqs_input.go +++ b/x-pack/filebeat/input/awss3/sqs_input.go @@ -34,7 +34,7 @@ type sqsReaderInput struct { // Workers send on workRequestChan to indicate they're ready for the next // message, and the reader loop replies on workResponseChan. - //workRequestChan chan struct{} + workRequestChan chan struct{} workResponseChan chan types.Message // workerWg is used to wait on worker goroutines during shutdown From b4cc30e7996b0c39c9267227d2d4059eeaf57b44 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Thu, 1 Aug 2024 14:11:43 -0400 Subject: [PATCH 07/30] updates in progress --- libbeat/common/fifo/fifo.go | 13 ++- .../input/awss3/input_benchmark_test.go | 2 +- x-pack/filebeat/input/awss3/interfaces.go | 4 +- x-pack/filebeat/input/awss3/sqs_input.go | 80 +++++++++++---- x-pack/filebeat/input/awss3/sqs_s3_event.go | 98 +++++++++---------- 5 files changed, 122 insertions(+), 75 deletions(-) diff --git a/libbeat/common/fifo/fifo.go b/libbeat/common/fifo/fifo.go index ec5779a7ff4..ceedf7412dc 100644 --- a/libbeat/common/fifo/fifo.go +++ b/libbeat/common/fifo/fifo.go @@ -17,10 +17,6 @@ package fifo -import "errors" - -var errFIFOEmpty = errors.New("tried to read from an empty FIFO queue") - // FIFO is a minimal first-in first-out queue based on a singly linked list. type FIFO[T any] struct { first *node[T] @@ -57,6 +53,15 @@ func (f *FIFO[T]) First() T { return f.first.value } +// Return the first value (if present) and remove it from the queue. +// If the queue is empty a default value is returned, to detect this case +// use f.Empty(). +func (f *FIFO[T]) ConsumeFirst() T { + result := f.First() + f.Remove() + return result +} + // Remove the first entry in the queue. Does nothing if the FIFO is empty. func (f *FIFO[T]) Remove() { if f.first != nil { diff --git a/x-pack/filebeat/input/awss3/input_benchmark_test.go b/x-pack/filebeat/input/awss3/input_benchmark_test.go index b1061923c1d..ca3c8aa71f7 100644 --- a/x-pack/filebeat/input/awss3/input_benchmark_test.go +++ b/x-pack/filebeat/input/awss3/input_benchmark_test.go @@ -218,7 +218,7 @@ func benchmarkInputSQS(t *testing.T, maxMessagesInflight int) testing.BenchmarkR sqsReader.metrics = newInputMetrics("test_id", monitoring.NewRegistry(), maxMessagesInflight) sqsReader.sqs = newConstantSQS() sqsReader.s3 = newConstantS3(t) - sqsReader.msgHandler, err = sqsReader.createEventProcessor(pipeline) + sqsReader.msgHandler, err = sqsReader.createEventProcessor() require.NoError(t, err, "createEventProcessor must succeed") ctx, cancel := context.WithCancel(context.Background()) diff --git a/x-pack/filebeat/input/awss3/interfaces.go b/x-pack/filebeat/input/awss3/interfaces.go index 4d2f66b0f95..76e8158cae7 100644 --- a/x-pack/filebeat/input/awss3/interfaces.go +++ b/x-pack/filebeat/input/awss3/interfaces.go @@ -48,7 +48,7 @@ type sqsProcessor interface { // given message and is responsible for updating the message's visibility // timeout while it is being processed and for deleting it when processing // completes successfully. - ProcessSQS(ctx context.Context, msg *types.Message) error + ProcessSQS(ctx context.Context, msg *types.Message, eventCallback func(e beat.Event)) error } // ------ @@ -88,7 +88,7 @@ type s3ObjectHandlerFactory interface { type s3ObjectHandler interface { // ProcessS3Object downloads the S3 object, parses it, creates events, and - // sends them to the given channel. It returns when processing finishes or + // passes to the given callback. It returns when processing finishes or // when it encounters an unrecoverable error. ProcessS3Object(log *logp.Logger, eventCallback func(e beat.Event)) error diff --git a/x-pack/filebeat/input/awss3/sqs_input.go b/x-pack/filebeat/input/awss3/sqs_input.go index 7faa52c2a92..4bdc073af37 100644 --- a/x-pack/filebeat/input/awss3/sqs_input.go +++ b/x-pack/filebeat/input/awss3/sqs_input.go @@ -17,6 +17,7 @@ import ( v2 "github.com/elastic/beats/v7/filebeat/input/v2" "github.com/elastic/beats/v7/libbeat/beat" + awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws" "github.com/elastic/elastic-agent-libs/logp" ) @@ -29,6 +30,10 @@ type sqsReaderInput struct { log *logp.Logger metrics *inputMetrics + // The Beats pipeline, used to create clients for event publication when + // creating the worker goroutines. + pipeline beat.Pipeline + // The expected region based on the queue URL detectedRegion string @@ -83,6 +88,7 @@ func (in *sqsReaderInput) setup( pipeline beat.Pipeline, ) error { in.log = inputContext.Logger.With("queue_url", in.config.QueueURL) + in.pipeline = pipeline in.detectedRegion = getRegionFromQueueURL(in.config.QueueURL, in.config.AWSConfig.Endpoint) if in.config.RegionName != "" { @@ -110,7 +116,7 @@ func (in *sqsReaderInput) setup( in.metrics = newInputMetrics(inputContext.ID, nil, in.config.MaxNumberOfMessages) var err error - in.msgHandler, err = in.createEventProcessor(pipeline) + in.msgHandler, err = in.createEventProcessor() if err != nil { return fmt.Errorf("failed to initialize sqs reader: %w", err) } @@ -163,34 +169,69 @@ func (in *sqsReaderInput) readerLoop(ctx context.Context) { } } -func (in *sqsReaderInput) workerLoop(ctx context.Context) { +type sqsWorker struct { + input *sqsReaderInput + client beat.Client +} + +func (in *sqsReaderInput) newSQSWorker() (*sqsWorker, error) { + // Create a pipeline client scoped to this worker. + client, err := in.pipeline.ConnectWith(beat.ClientConfig{ + EventListener: awscommon.NewEventACKHandler(), + Processing: beat.ProcessingConfig{ + // This input only produces events with basic types so normalization + // is not required. + EventNormalization: boolPtr(false), + }, + }) + if err != nil { + return nil, fmt.Errorf("connecting to pipeline: %w", err) + } + return &sqsWorker{ + input: in, + client: client, + }, nil +} + +func (w *sqsWorker) run(ctx context.Context) { + defer w.client.Close() + for ctx.Err() == nil { // Send a work request select { case <-ctx.Done(): // Shutting down return - case in.workRequestChan <- struct{}{}: + case w.input.workRequestChan <- struct{}{}: } // The request is sent, wait for a response select { case <-ctx.Done(): return - case msg := <-in.workResponseChan: - start := time.Now() - - id := in.metrics.beginSQSWorker() - if err := in.msgHandler.ProcessSQS(ctx, &msg); err != nil { - in.log.Warnw("Failed processing SQS message.", - "error", err, - "message_id", *msg.MessageId, - "elapsed_time_ns", time.Since(start)) - } - in.metrics.endSQSWorker(id) + case msg := <-w.input.workResponseChan: + w.processMessage(ctx, msg) } } } +func (w *sqsWorker) processMessage(ctx context.Context, msg types.Message) { + publishCount := 0 + start := time.Now() + id := w.input.metrics.beginSQSWorker() + if err := w.input.msgHandler.ProcessSQS(ctx, &msg, func(e beat.Event) { + w.client.Publish(e) + publishCount++ + // hi fae, finish this function + }); err != nil { + w.input.log.Warnw("Failed processing SQS message.", + "error", err, + "message_id", *msg.MessageId, + "elapsed_time_ns", time.Since(start)) + } + w.input.metrics.endSQSWorker(id) + +} + func (in *sqsReaderInput) startWorkers(ctx context.Context) { // Start the worker goroutines that will fetch messages via workRequestChan // and workResponseChan until the input shuts down. @@ -198,7 +239,12 @@ func (in *sqsReaderInput) startWorkers(ctx context.Context) { in.workerWg.Add(1) go func() { defer in.workerWg.Done() - in.workerLoop(ctx) + worker, err := in.newSQSWorker() + if err != nil { + in.log.Error(err) + return + } + go worker.run(ctx) }() } } @@ -219,7 +265,7 @@ func (in *sqsReaderInput) logConfigSummary() { } } -func (in *sqsReaderInput) createEventProcessor(pipeline beat.Pipeline) (sqsProcessor, error) { +func (in *sqsReaderInput) createEventProcessor() (sqsProcessor, error) { fileSelectors := in.config.getFileSelectors() s3EventHandlerFactory := newS3ObjectProcessorFactory(in.metrics, in.s3, fileSelectors, in.config.BackupConfig) @@ -227,7 +273,7 @@ func (in *sqsReaderInput) createEventProcessor(pipeline beat.Pipeline) (sqsProce if err != nil { return nil, err } - return newSQSS3EventProcessor(in.log.Named("sqs_s3_event"), in.metrics, in.sqs, script, in.config.VisibilityTimeout, in.config.SQSMaxReceiveCount, pipeline, s3EventHandlerFactory), nil + return newSQSS3EventProcessor(in.log.Named("sqs_s3_event"), in.metrics, in.sqs, script, in.config.VisibilityTimeout, in.config.SQSMaxReceiveCount, s3EventHandlerFactory), nil } // Read all pending requests and return their count. If block is true, diff --git a/x-pack/filebeat/input/awss3/sqs_s3_event.go b/x-pack/filebeat/input/awss3/sqs_s3_event.go index 0fafb450544..0b3d6e1818c 100644 --- a/x-pack/filebeat/input/awss3/sqs_s3_event.go +++ b/x-pack/filebeat/input/awss3/sqs_s3_event.go @@ -20,7 +20,6 @@ import ( "go.uber.org/multierr" "github.com/elastic/beats/v7/libbeat/beat" - awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws" "github.com/elastic/elastic-agent-libs/logp" ) @@ -84,15 +83,18 @@ type s3EventV2 struct { } type sqsS3EventProcessor struct { - s3ObjectHandler s3ObjectHandlerFactory + s3HandlerFactory s3ObjectHandlerFactory sqsVisibilityTimeout time.Duration maxReceiveCount int sqs sqsAPI - pipeline beat.Pipeline // Pipeline creates clients for publishing events. log *logp.Logger warnOnce sync.Once metrics *inputMetrics script *script + + // Finalizer callbacks for the returned S3 events, invoked via + // finalizeS3Objects after all events are acknowledged. + s3Finalizers []func() error } func newSQSS3EventProcessor( @@ -102,7 +104,6 @@ func newSQSS3EventProcessor( script *script, sqsVisibilityTimeout time.Duration, maxReceiveCount int, - pipeline beat.Pipeline, s3 s3ObjectHandlerFactory, ) *sqsS3EventProcessor { if metrics == nil { @@ -110,18 +111,23 @@ func newSQSS3EventProcessor( metrics = newInputMetrics("", nil, 0) } return &sqsS3EventProcessor{ - s3ObjectHandler: s3, + s3HandlerFactory: s3, sqsVisibilityTimeout: sqsVisibilityTimeout, maxReceiveCount: maxReceiveCount, sqs: sqs, - pipeline: pipeline, log: log, metrics: metrics, script: script, } } -func (p *sqsS3EventProcessor) ProcessSQS(ctx context.Context, msg *types.Message) error { +type sqsProcessingResult struct { + msg *types.Message + keepaliveCancel context.CancelFunc + processingErr error +} + +func (p *sqsS3EventProcessor) ProcessSQS(ctx context.Context, msg *types.Message, eventCallback func(beat.Event)) sqsProcessingResult { log := p.log.With( "message_id", *msg.MessageId, "message_receipt_time", time.Now().UTC()) @@ -149,20 +155,30 @@ func (p *sqsS3EventProcessor) ProcessSQS(ctx context.Context, msg *types.Message } } - handles, processingErr := p.processS3Events(ctx, log, *msg.Body) + processingErr := p.processS3Events(ctx, log, *msg.Body, eventCallback) + + return sqsProcessingResult{ + msg: msg, + keepaliveCancel: keepaliveCancel, + processingErr: processingErr, + } +} +// Call Done to indicate that all events from this SQS message have been +// acknowledged and it is safe to stop the keepalive routine and +// delete / finalize the message. +func (p sqsProcessingResult) Done() { // Stop keepalive routine before changing visibility. - keepaliveCancel() - keepaliveWg.Wait() + p.keepaliveCancel() // No error. Delete SQS. - if processingErr == nil { + if p.processingErr == nil { if msgDelErr := p.sqs.DeleteMessage(context.Background(), msg); msgDelErr != nil { return fmt.Errorf("failed deleting message from SQS queue (it may be reprocessed): %w", msgDelErr) } p.metrics.sqsMessagesDeletedTotal.Inc() // SQS message finished and deleted, finalize s3 objects - if finalizeErr := p.finalizeS3Objects(handles); finalizeErr != nil { + if finalizeErr := p.finalizeS3Objects(); finalizeErr != nil { return fmt.Errorf("failed finalizing message from SQS queue (manual cleanup is required): %w", finalizeErr) } return nil @@ -291,76 +307,56 @@ func (*sqsS3EventProcessor) isObjectCreatedEvents(event s3EventV2) bool { return event.EventSource == "aws:s3" && strings.HasPrefix(event.EventName, "ObjectCreated:") } -func (p *sqsS3EventProcessor) processS3Events(ctx context.Context, log *logp.Logger, body string) ([]s3ObjectHandler, error) { +func (p *sqsS3EventProcessor) processS3Events( + ctx context.Context, + log *logp.Logger, + body string, + eventCallback func(beat.Event), +) error { s3Events, err := p.getS3Notifications(body) if err != nil { if errors.Is(err, context.Canceled) { // Messages that are in-flight at shutdown should be returned to SQS. - return nil, err + return err } - return nil, &nonRetryableError{err} + return &nonRetryableError{err} } log.Debugf("SQS message contained %d S3 event notifications.", len(s3Events)) defer log.Debug("End processing SQS S3 event notifications.") if len(s3Events) == 0 { - return nil, nil - } - - // Create a pipeline client scoped to this goroutine. - client, err := p.pipeline.ConnectWith(beat.ClientConfig{ - EventListener: awscommon.NewEventACKHandler(), - Processing: beat.ProcessingConfig{ - // This input only produces events with basic types so normalization - // is not required. - EventNormalization: boolPtr(false), - }, - }) - if err != nil { - return nil, err + return nil } - defer client.Close() - - // Wait for all events to be ACKed before proceeding. - acker := awscommon.NewEventACKTracker(ctx) - defer acker.Wait() var errs []error - var handles []s3ObjectHandler for i, event := range s3Events { - s3Processor := p.s3ObjectHandler.Create(ctx, event) + s3Processor := p.s3HandlerFactory.Create(ctx, event) if s3Processor == nil { + // A nil result generally means that this object key doesn't match the + // user-configured filters. continue } // Process S3 object (download, parse, create events). - if err := s3Processor.ProcessS3Object(log, func(e beat.Event) { - // hi fae, this section is unfinished - client.Publish(e) - }); err != nil { + if err := s3Processor.ProcessS3Object(log, eventCallback); err != nil { errs = append(errs, fmt.Errorf( "failed processing S3 event for object key %q in bucket %q (object record %d of %d in SQS notification): %w", event.S3.Object.Key, event.S3.Bucket.Name, i+1, len(s3Events), err)) } else { - handles = append(handles, s3Processor) + p.s3Finalizers = append(p.s3Finalizers, s3Processor.FinalizeS3Object) } } - // Make sure all s3 events were processed successfully - if len(handles) == len(s3Events) { - return handles, multierr.Combine(errs...) - } - - return nil, multierr.Combine(errs...) + return multierr.Combine(errs...) } -func (p *sqsS3EventProcessor) finalizeS3Objects(handles []s3ObjectHandler) error { +func (p *sqsS3EventProcessor) finalizeS3Objects() error { var errs []error - for i, handle := range handles { - if err := handle.FinalizeS3Object(); err != nil { + for i, finalize := range p.s3Finalizers { + if err := finalize(); err != nil { errs = append(errs, fmt.Errorf( "failed finalizing S3 event (object record %d of %d in SQS notification): %w", - i+1, len(handles), err)) + i+1, len(p.s3Finalizers), err)) } } return multierr.Combine(errs...) From d4ecc1f113165d37d06a3d38fd3b80f9225839a7 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Thu, 1 Aug 2024 15:56:17 -0400 Subject: [PATCH 08/30] leftover file --- x-pack/filebeat/input/awss3/sqs_s3_event.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/x-pack/filebeat/input/awss3/sqs_s3_event.go b/x-pack/filebeat/input/awss3/sqs_s3_event.go index 0b3d6e1818c..381dbb98dcd 100644 --- a/x-pack/filebeat/input/awss3/sqs_s3_event.go +++ b/x-pack/filebeat/input/awss3/sqs_s3_event.go @@ -91,10 +91,6 @@ type sqsS3EventProcessor struct { warnOnce sync.Once metrics *inputMetrics script *script - - // Finalizer callbacks for the returned S3 events, invoked via - // finalizeS3Objects after all events are acknowledged. - s3Finalizers []func() error } func newSQSS3EventProcessor( @@ -122,9 +118,14 @@ func newSQSS3EventProcessor( } type sqsProcessingResult struct { + processor *sqsS3EventProcessor msg *types.Message keepaliveCancel context.CancelFunc processingErr error + + // Finalizer callbacks for the returned S3 events, invoked via + // finalizeS3Objects after all events are acknowledged. + s3Finalizers []func() error } func (p *sqsS3EventProcessor) ProcessSQS(ctx context.Context, msg *types.Message, eventCallback func(beat.Event)) sqsProcessingResult { From 64a96d403c00cc2a5dfaa1c9c333ae240c3d500f Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Tue, 6 Aug 2024 14:51:03 -0400 Subject: [PATCH 09/30] use a common ack helper in s3 and sqs --- x-pack/filebeat/input/awss3/acks.go | 97 ++++++++++++--------- x-pack/filebeat/input/awss3/interfaces.go | 2 +- x-pack/filebeat/input/awss3/s3.go | 2 +- x-pack/filebeat/input/awss3/s3_input.go | 2 +- x-pack/filebeat/input/awss3/sqs_acks.go | 0 x-pack/filebeat/input/awss3/sqs_input.go | 12 ++- x-pack/filebeat/input/awss3/sqs_s3_event.go | 48 ++++++---- 7 files changed, 96 insertions(+), 67 deletions(-) create mode 100644 x-pack/filebeat/input/awss3/sqs_acks.go diff --git a/x-pack/filebeat/input/awss3/acks.go b/x-pack/filebeat/input/awss3/acks.go index 88e6f3518dc..486c041e00d 100644 --- a/x-pack/filebeat/input/awss3/acks.go +++ b/x-pack/filebeat/input/awss3/acks.go @@ -5,61 +5,76 @@ package awss3 import ( - "sync" - "github.com/elastic/beats/v7/libbeat/beat" "github.com/elastic/beats/v7/libbeat/common/acker" "github.com/elastic/beats/v7/libbeat/common/fifo" ) -type s3ACKHandler struct { - sync.Mutex - pendingACKs fifo.FIFO[publishedS3Object] - ackedCount int -} +type awsACKHandler struct { + pending fifo.FIFO[pendingACK] + ackedCount int -type publishedS3Object struct { - publishCount int - ackCallback func() + pendingChan chan pendingACK + ackChan chan int } -func (ah *s3ACKHandler) Add(publishCount int, ackCallback func()) { - ah.Lock() - defer ah.Unlock() - ah.pendingACKs.Add(publishedS3Object{ - publishCount: publishCount, - ackCallback: ackCallback, - }) +type pendingACK struct { + eventCount int + ackCallback func() } -func (ah *s3ACKHandler) ACK(count int) { - ah.Lock() - ah.ackedCount += count - callbacks := ah.advance() - ah.Unlock() - for _, c := range callbacks { - c() +func newAWSACKHandler() *awsACKHandler { + handler := &awsACKHandler{ + pendingChan: make(chan pendingACK, 10), + ackChan: make(chan int, 10), } + go handler.run() + return handler } -// Advance the acks list based on the current ackedCount, invoking any -// acknowledgment callbacks for completed objects. -func (ah *s3ACKHandler) advance() []func() { - var callbacks []func() - for !ah.pendingACKs.Empty() { - nextObj := ah.pendingACKs.First() - if ah.ackedCount < nextObj.publishCount { - // This object hasn't been fully acknowledged yet - break - } - //fmt.Printf("\033[94mhi fae, ack handler finished an object\033[0m\n") - ah.ackedCount -= nextObj.publishCount - callbacks = append(callbacks, nextObj.ackCallback) - ah.pendingACKs.Remove() +func (ah *awsACKHandler) Add(eventCount int, ackCallback func()) { + ah.pendingChan <- pendingACK{ + eventCount: eventCount, + ackCallback: ackCallback, } - return callbacks } -func (ah *s3ACKHandler) pipelineEventListener() beat.EventListener { - return acker.TrackingCounter(func(_ int, total int) { ah.ACK(total) }) +func (ah *awsACKHandler) pipelineEventListener() beat.EventListener { + return acker.TrackingCounter(func(_ int, total int) { + // Notify the ack handler goroutine + ah.ackChan <- total + }) +} + +// Listener that handles both incoming metadata and ACK +// confirmations. +func (ah *awsACKHandler) run() { + for { + select { + case result, ok := <-ah.pendingChan: + if ok { + ah.pending.Add(result) + } else { + // Channel is closed, reset so we don't receive any more values + ah.pendingChan = nil + } + case count := <-ah.ackChan: + ah.ackedCount += count + } + + // Finalize any objects that are now completed + for !ah.pending.Empty() && ah.ackedCount >= ah.pending.First().eventCount { + result := ah.pending.ConsumeFirst() + ah.ackedCount -= result.eventCount + // Run finalization asynchronously so we don't block the SQS worker + // or the queue by ignoring the ack handler's input channels. Ordering + // is no longer important at this point. + go result.ackCallback() + } + + // If the input is closed and all acks are completed, we're done + if ah.pending.Empty() && ah.pendingChan == nil { + return + } + } } diff --git a/x-pack/filebeat/input/awss3/interfaces.go b/x-pack/filebeat/input/awss3/interfaces.go index 76e8158cae7..6522a9f7574 100644 --- a/x-pack/filebeat/input/awss3/interfaces.go +++ b/x-pack/filebeat/input/awss3/interfaces.go @@ -48,7 +48,7 @@ type sqsProcessor interface { // given message and is responsible for updating the message's visibility // timeout while it is being processed and for deleting it when processing // completes successfully. - ProcessSQS(ctx context.Context, msg *types.Message, eventCallback func(e beat.Event)) error + ProcessSQS(ctx context.Context, msg *types.Message, eventCallback func(e beat.Event)) sqsProcessingResult } // ------ diff --git a/x-pack/filebeat/input/awss3/s3.go b/x-pack/filebeat/input/awss3/s3.go index f790baee62c..06d1668c3c3 100644 --- a/x-pack/filebeat/input/awss3/s3.go +++ b/x-pack/filebeat/input/awss3/s3.go @@ -33,7 +33,7 @@ func createS3API(ctx context.Context, config config, awsConfig awssdk.Config) (* }, nil } -func createPipelineClient(pipeline beat.Pipeline, acks *s3ACKHandler) (beat.Client, error) { +func createPipelineClient(pipeline beat.Pipeline, acks *awsACKHandler) (beat.Client, error) { return pipeline.ConnectWith(beat.ClientConfig{ EventListener: acks.pipelineEventListener(), Processing: beat.ProcessingConfig{ diff --git a/x-pack/filebeat/input/awss3/s3_input.go b/x-pack/filebeat/input/awss3/s3_input.go index aa0cb6b4e35..b0cd981e10d 100644 --- a/x-pack/filebeat/input/awss3/s3_input.go +++ b/x-pack/filebeat/input/awss3/s3_input.go @@ -126,7 +126,7 @@ func (in *s3PollerInput) runPoll(ctx context.Context) { } func (in *s3PollerInput) workerLoop(ctx context.Context, workChan <-chan state) { - var acks s3ACKHandler + var acks awsACKHandler // Create client for publishing events and receive notification of their ACKs. client, err := createPipelineClient(in.pipeline, &acks) if err != nil { diff --git a/x-pack/filebeat/input/awss3/sqs_acks.go b/x-pack/filebeat/input/awss3/sqs_acks.go new file mode 100644 index 00000000000..e69de29bb2d diff --git a/x-pack/filebeat/input/awss3/sqs_input.go b/x-pack/filebeat/input/awss3/sqs_input.go index 4bdc073af37..dbce54236cd 100644 --- a/x-pack/filebeat/input/awss3/sqs_input.go +++ b/x-pack/filebeat/input/awss3/sqs_input.go @@ -17,6 +17,7 @@ import ( v2 "github.com/elastic/beats/v7/filebeat/input/v2" "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/beats/v7/libbeat/common/fifo" awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws" "github.com/elastic/elastic-agent-libs/logp" ) @@ -170,8 +171,9 @@ func (in *sqsReaderInput) readerLoop(ctx context.Context) { } type sqsWorker struct { - input *sqsReaderInput - client beat.Client + input *sqsReaderInput + client beat.Client + pendingACKs fifo.FIFO[sqsProcessingResult] } func (in *sqsReaderInput) newSQSWorker() (*sqsWorker, error) { @@ -218,11 +220,13 @@ func (w *sqsWorker) processMessage(ctx context.Context, msg types.Message) { publishCount := 0 start := time.Now() id := w.input.metrics.beginSQSWorker() - if err := w.input.msgHandler.ProcessSQS(ctx, &msg, func(e beat.Event) { + result := w.input.msgHandler.ProcessSQS(ctx, &msg, func(e beat.Event) { w.client.Publish(e) publishCount++ // hi fae, finish this function - }); err != nil { + }) + + if result != nil { w.input.log.Warnw("Failed processing SQS message.", "error", err, "message_id", *msg.MessageId, diff --git a/x-pack/filebeat/input/awss3/sqs_s3_event.go b/x-pack/filebeat/input/awss3/sqs_s3_event.go index 381dbb98dcd..9fc28a9a504 100644 --- a/x-pack/filebeat/input/awss3/sqs_s3_event.go +++ b/x-pack/filebeat/input/awss3/sqs_s3_event.go @@ -120,14 +120,18 @@ func newSQSS3EventProcessor( type sqsProcessingResult struct { processor *sqsS3EventProcessor msg *types.Message + receiveCount int // How many times this SQS object has been read + eventCount int // How many events were generated from this SQS object keepaliveCancel context.CancelFunc processingErr error // Finalizer callbacks for the returned S3 events, invoked via // finalizeS3Objects after all events are acknowledged. - s3Finalizers []func() error + finalizers []finalizerFunc } +type finalizerFunc func() error + func (p *sqsS3EventProcessor) ProcessSQS(ctx context.Context, msg *types.Message, eventCallback func(beat.Event)) sqsProcessingResult { log := p.log.With( "message_id", *msg.MessageId, @@ -156,30 +160,35 @@ func (p *sqsS3EventProcessor) ProcessSQS(ctx context.Context, msg *types.Message } } - processingErr := p.processS3Events(ctx, log, *msg.Body, eventCallback) + finalizers, processingErr := p.processS3Events(ctx, log, *msg.Body, eventCallback) return sqsProcessingResult{ msg: msg, + receiveCount: receiveCount, keepaliveCancel: keepaliveCancel, processingErr: processingErr, + finalizers: finalizers, } } // Call Done to indicate that all events from this SQS message have been // acknowledged and it is safe to stop the keepalive routine and // delete / finalize the message. -func (p sqsProcessingResult) Done() { +func (r sqsProcessingResult) Done() error { + p := r.processor + processingErr := r.processingErr + // Stop keepalive routine before changing visibility. - p.keepaliveCancel() + r.keepaliveCancel() // No error. Delete SQS. - if p.processingErr == nil { - if msgDelErr := p.sqs.DeleteMessage(context.Background(), msg); msgDelErr != nil { + if processingErr == nil { + if msgDelErr := p.sqs.DeleteMessage(context.Background(), r.msg); msgDelErr != nil { return fmt.Errorf("failed deleting message from SQS queue (it may be reprocessed): %w", msgDelErr) } p.metrics.sqsMessagesDeletedTotal.Inc() // SQS message finished and deleted, finalize s3 objects - if finalizeErr := p.finalizeS3Objects(); finalizeErr != nil { + if finalizeErr := r.finalizeS3Objects(); finalizeErr != nil { return fmt.Errorf("failed finalizing message from SQS queue (manual cleanup is required): %w", finalizeErr) } return nil @@ -188,16 +197,16 @@ func (p sqsProcessingResult) Done() { if p.maxReceiveCount > 0 && !errors.Is(processingErr, &nonRetryableError{}) { // Prevent poison pill messages from consuming all workers. Check how // many times this message has been received before making a disposition. - if receiveCount >= p.maxReceiveCount { + if r.receiveCount >= p.maxReceiveCount { processingErr = nonRetryableErrorWrap(fmt.Errorf( "sqs ApproximateReceiveCount <%v> exceeds threshold %v: %w", - receiveCount, p.maxReceiveCount, processingErr)) + r.receiveCount, p.maxReceiveCount, processingErr)) } } // An error that reprocessing cannot correct. Delete SQS. if errors.Is(processingErr, &nonRetryableError{}) { - if msgDelErr := p.sqs.DeleteMessage(context.Background(), msg); msgDelErr != nil { + if msgDelErr := p.sqs.DeleteMessage(context.Background(), r.msg); msgDelErr != nil { return multierr.Combine( fmt.Errorf("failed processing SQS message (attempted to delete message): %w", processingErr), fmt.Errorf("failed deleting message from SQS queue (it may be reprocessed): %w", msgDelErr), @@ -313,23 +322,24 @@ func (p *sqsS3EventProcessor) processS3Events( log *logp.Logger, body string, eventCallback func(beat.Event), -) error { +) ([]finalizerFunc, error) { s3Events, err := p.getS3Notifications(body) if err != nil { if errors.Is(err, context.Canceled) { // Messages that are in-flight at shutdown should be returned to SQS. - return err + return nil, err } - return &nonRetryableError{err} + return nil, &nonRetryableError{err} } log.Debugf("SQS message contained %d S3 event notifications.", len(s3Events)) defer log.Debug("End processing SQS S3 event notifications.") if len(s3Events) == 0 { - return nil + return nil, nil } var errs []error + var finalizers []finalizerFunc for i, event := range s3Events { s3Processor := p.s3HandlerFactory.Create(ctx, event) if s3Processor == nil { @@ -344,20 +354,20 @@ func (p *sqsS3EventProcessor) processS3Events( "failed processing S3 event for object key %q in bucket %q (object record %d of %d in SQS notification): %w", event.S3.Object.Key, event.S3.Bucket.Name, i+1, len(s3Events), err)) } else { - p.s3Finalizers = append(p.s3Finalizers, s3Processor.FinalizeS3Object) + finalizers = append(finalizers, s3Processor.FinalizeS3Object) } } - return multierr.Combine(errs...) + return finalizers, multierr.Combine(errs...) } -func (p *sqsS3EventProcessor) finalizeS3Objects() error { +func (r sqsProcessingResult) finalizeS3Objects() error { var errs []error - for i, finalize := range p.s3Finalizers { + for i, finalize := range r.finalizers { if err := finalize(); err != nil { errs = append(errs, fmt.Errorf( "failed finalizing S3 event (object record %d of %d in SQS notification): %w", - i+1, len(p.s3Finalizers), err)) + i+1, len(r.finalizers), err)) } } return multierr.Combine(errs...) From 7a20250ee7228ab8fd05348a41261d0ed04eb1d9 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Wed, 7 Aug 2024 16:42:05 -0400 Subject: [PATCH 10/30] in progress --- x-pack/filebeat/input/awss3/acks.go | 6 ++++++ x-pack/filebeat/input/awss3/sqs_input.go | 22 +++++++++++---------- x-pack/filebeat/input/awss3/sqs_s3_event.go | 2 +- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/x-pack/filebeat/input/awss3/acks.go b/x-pack/filebeat/input/awss3/acks.go index 486c041e00d..bf6b16daac4 100644 --- a/x-pack/filebeat/input/awss3/acks.go +++ b/x-pack/filebeat/input/awss3/acks.go @@ -39,6 +39,12 @@ func (ah *awsACKHandler) Add(eventCount int, ackCallback func()) { } } +// Called when a worker is closing, to indicate to the ack handler that it +// should shut down as soon as the current pending list is acknowledged. +func (ah *awsACKHandler) Close() { + close(ah.pendingChan) +} + func (ah *awsACKHandler) pipelineEventListener() beat.EventListener { return acker.TrackingCounter(func(_ int, total int) { // Notify the ack handler goroutine diff --git a/x-pack/filebeat/input/awss3/sqs_input.go b/x-pack/filebeat/input/awss3/sqs_input.go index dbce54236cd..fb9fd2b09bb 100644 --- a/x-pack/filebeat/input/awss3/sqs_input.go +++ b/x-pack/filebeat/input/awss3/sqs_input.go @@ -17,8 +17,6 @@ import ( v2 "github.com/elastic/beats/v7/filebeat/input/v2" "github.com/elastic/beats/v7/libbeat/beat" - "github.com/elastic/beats/v7/libbeat/common/fifo" - awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws" "github.com/elastic/elastic-agent-libs/logp" ) @@ -171,15 +169,16 @@ func (in *sqsReaderInput) readerLoop(ctx context.Context) { } type sqsWorker struct { - input *sqsReaderInput - client beat.Client - pendingACKs fifo.FIFO[sqsProcessingResult] + input *sqsReaderInput + client beat.Client + ackHandler *awsACKHandler } func (in *sqsReaderInput) newSQSWorker() (*sqsWorker, error) { // Create a pipeline client scoped to this worker. + ackHandler := newAWSACKHandler() client, err := in.pipeline.ConnectWith(beat.ClientConfig{ - EventListener: awscommon.NewEventACKHandler(), + EventListener: ackHandler.pipelineEventListener(), Processing: beat.ProcessingConfig{ // This input only produces events with basic types so normalization // is not required. @@ -190,8 +189,9 @@ func (in *sqsReaderInput) newSQSWorker() (*sqsWorker, error) { return nil, fmt.Errorf("connecting to pipeline: %w", err) } return &sqsWorker{ - input: in, - client: client, + input: in, + client: client, + ackHandler: ackHandler, }, nil } @@ -226,14 +226,16 @@ func (w *sqsWorker) processMessage(ctx context.Context, msg types.Message) { // hi fae, finish this function }) - if result != nil { + if result.processingErr != nil { w.input.log.Warnw("Failed processing SQS message.", - "error", err, + "error", result.processingErr, "message_id", *msg.MessageId, "elapsed_time_ns", time.Since(start)) } w.input.metrics.endSQSWorker(id) + // Add this result's Done callback to the pending ACKs list + w.ackHandler.Add(publishCount, result.Done) } func (in *sqsReaderInput) startWorkers(ctx context.Context) { diff --git a/x-pack/filebeat/input/awss3/sqs_s3_event.go b/x-pack/filebeat/input/awss3/sqs_s3_event.go index 9fc28a9a504..25518f8bf07 100644 --- a/x-pack/filebeat/input/awss3/sqs_s3_event.go +++ b/x-pack/filebeat/input/awss3/sqs_s3_event.go @@ -174,7 +174,7 @@ func (p *sqsS3EventProcessor) ProcessSQS(ctx context.Context, msg *types.Message // Call Done to indicate that all events from this SQS message have been // acknowledged and it is safe to stop the keepalive routine and // delete / finalize the message. -func (r sqsProcessingResult) Done() error { +func (r sqsProcessingResult) Done() { p := r.processor processingErr := r.processingErr From 5282671942b0b808e5888a8360863a6e359d9fa0 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Fri, 30 Aug 2024 16:14:56 -0400 Subject: [PATCH 11/30] test updates --- .../input/awss3/input_benchmark_test.go | 4 +- .../filebeat/input/awss3/s3_objects_test.go | 26 +++----- x-pack/filebeat/input/awss3/s3_test.go | 8 +-- x-pack/filebeat/input/awss3/sqs_s3_event.go | 1 + .../filebeat/input/awss3/sqs_s3_event_test.go | 62 +++++++------------ 5 files changed, 39 insertions(+), 62 deletions(-) diff --git a/x-pack/filebeat/input/awss3/input_benchmark_test.go b/x-pack/filebeat/input/awss3/input_benchmark_test.go index 6998b9af6b8..7c1b69103ed 100644 --- a/x-pack/filebeat/input/awss3/input_benchmark_test.go +++ b/x-pack/filebeat/input/awss3/input_benchmark_test.go @@ -209,7 +209,6 @@ file_selectors: func benchmarkInputSQS(t *testing.T, maxMessagesInflight int) testing.BenchmarkResult { return testing.Benchmark(func(b *testing.B) { var err error - pipeline := &fakePipeline{} conf := makeBenchmarkConfig(t) conf.MaxNumberOfMessages = maxMessagesInflight @@ -303,6 +302,7 @@ func benchmarkInputS3(t *testing.T, numberOfWorkers int) testing.BenchmarkResult client := pubtest.NewChanClientWithCallback(100, func(event beat.Event) { event.Private.(*awscommon.EventACKTracker).ACK() }) + pipeline := pubtest.PublisherWithClient(client) defer func() { _ = client.Close() @@ -344,7 +344,7 @@ func benchmarkInputS3(t *testing.T, numberOfWorkers int) testing.BenchmarkResult config: config, metrics: metrics, s3: s3API, - client: client, + pipeline: pipeline, s3ObjectHandler: s3EventHandlerFactory, states: states, provider: "provider", diff --git a/x-pack/filebeat/input/awss3/s3_objects_test.go b/x-pack/filebeat/input/awss3/s3_objects_test.go index 670aae3ecc5..45dd03c47c4 100644 --- a/x-pack/filebeat/input/awss3/s3_objects_test.go +++ b/x-pack/filebeat/input/awss3/s3_objects_test.go @@ -148,7 +148,6 @@ func TestS3ObjectProcessor(t *testing.T) { ctrl, ctx := gomock.WithContext(ctx, t) defer ctrl.Finish() mockS3API := NewMockS3API(ctrl) - mockPublisher := NewMockBeatClient(ctrl) s3Event := newS3Event("log.txt") @@ -157,8 +156,7 @@ func TestS3ObjectProcessor(t *testing.T) { Return(nil, errFakeConnectivityFailure) s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, nil, backupConfig{}) - ack := awscommon.NewEventACKTracker(ctx) - err := s3ObjProc.Create(ctx, mockPublisher, ack, s3Event).ProcessS3Object(logp.NewLogger(inputName)) + err := s3ObjProc.Create(ctx, s3Event).ProcessS3Object(logp.NewLogger(inputName), func(_ beat.Event) {}) require.Error(t, err) assert.True(t, errors.Is(err, errS3DownloadFailed), "expected errS3DownloadFailed") }) @@ -170,7 +168,6 @@ func TestS3ObjectProcessor(t *testing.T) { ctrl, ctx := gomock.WithContext(ctx, t) defer ctrl.Finish() mockS3API := NewMockS3API(ctrl) - mockPublisher := NewMockBeatClient(ctrl) s3Event := newS3Event("log.txt") @@ -179,8 +176,7 @@ func TestS3ObjectProcessor(t *testing.T) { Return(nil, nil) s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, nil, backupConfig{}) - ack := awscommon.NewEventACKTracker(ctx) - err := s3ObjProc.Create(ctx, mockPublisher, ack, s3Event).ProcessS3Object(logp.NewLogger(inputName)) + err := s3ObjProc.Create(ctx, s3Event).ProcessS3Object(logp.NewLogger(inputName), func(_ beat.Event) {}) require.Error(t, err) }) @@ -206,8 +202,7 @@ func TestS3ObjectProcessor(t *testing.T) { ) s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, nil, backupConfig{}) - ack := awscommon.NewEventACKTracker(ctx) - err := s3ObjProc.Create(ctx, mockPublisher, ack, s3Event).ProcessS3Object(logp.NewLogger(inputName)) + err := s3ObjProc.Create(ctx, s3Event).ProcessS3Object(logp.NewLogger(inputName), func(_ beat.Event) {}) require.NoError(t, err) }) @@ -218,7 +213,6 @@ func TestS3ObjectProcessor(t *testing.T) { ctrl, ctx := gomock.WithContext(ctx, t) defer ctrl.Finish() mockS3API := NewMockS3API(ctrl) - mockPublisher := NewMockBeatClient(ctrl) s3Event, _ := newS3Object(t, "testdata/log.txt", "") backupCfg := backupConfig{ @@ -232,8 +226,7 @@ func TestS3ObjectProcessor(t *testing.T) { ) s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, nil, backupCfg) - ack := awscommon.NewEventACKTracker(ctx) - err := s3ObjProc.Create(ctx, mockPublisher, ack, s3Event).FinalizeS3Object() + err := s3ObjProc.Create(ctx, s3Event).FinalizeS3Object() require.NoError(t, err) }) @@ -244,7 +237,6 @@ func TestS3ObjectProcessor(t *testing.T) { ctrl, ctx := gomock.WithContext(ctx, t) defer ctrl.Finish() mockS3API := NewMockS3API(ctrl) - mockPublisher := NewMockBeatClient(ctrl) s3Event, _ := newS3Object(t, "testdata/log.txt", "") backupCfg := backupConfig{ @@ -262,8 +254,7 @@ func TestS3ObjectProcessor(t *testing.T) { ) s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, nil, backupCfg) - ack := awscommon.NewEventACKTracker(ctx) - err := s3ObjProc.Create(ctx, mockPublisher, ack, s3Event).FinalizeS3Object() + err := s3ObjProc.Create(ctx, s3Event).FinalizeS3Object() require.NoError(t, err) }) @@ -274,7 +265,6 @@ func TestS3ObjectProcessor(t *testing.T) { ctrl, ctx := gomock.WithContext(ctx, t) defer ctrl.Finish() mockS3API := NewMockS3API(ctrl) - mockPublisher := NewMockBeatClient(ctrl) s3Event, _ := newS3Object(t, "testdata/log.txt", "") backupCfg := backupConfig{ @@ -289,8 +279,7 @@ func TestS3ObjectProcessor(t *testing.T) { ) s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, nil, backupCfg) - ack := awscommon.NewEventACKTracker(ctx) - err := s3ObjProc.Create(ctx, mockPublisher, ack, s3Event).FinalizeS3Object() + err := s3ObjProc.Create(ctx, s3Event).FinalizeS3Object() require.NoError(t, err) }) @@ -336,7 +325,8 @@ func _testProcessS3Object(t testing.TB, file, contentType string, numEvents int, s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, selectors, backupConfig{}) ack := awscommon.NewEventACKTracker(ctx) - err := s3ObjProc.Create(ctx, mockPublisher, ack, s3Event).ProcessS3Object(logp.NewLogger(inputName)) + err := s3ObjProc.Create(ctx, s3Event).ProcessS3Object( + logp.NewLogger(inputName), func(_ beat.Event) {}) if !expectErr { require.NoError(t, err) diff --git a/x-pack/filebeat/input/awss3/s3_test.go b/x-pack/filebeat/input/awss3/s3_test.go index 1892ad78622..11f12ee3a24 100644 --- a/x-pack/filebeat/input/awss3/s3_test.go +++ b/x-pack/filebeat/input/awss3/s3_test.go @@ -36,7 +36,7 @@ func TestS3Poller(t *testing.T) { defer ctrl.Finish() mockAPI := NewMockS3API(ctrl) mockPager := NewMockS3Pager(ctrl) - mockPublisher := NewMockBeatClient(ctrl) + mockPipeline := NewMockBeatPipeline(ctrl) gomock.InOrder( mockAPI.EXPECT(). @@ -139,7 +139,7 @@ func TestS3Poller(t *testing.T) { RegionName: "region", }, s3: mockAPI, - client: mockPublisher, + pipeline: mockPipeline, s3ObjectHandler: s3ObjProc, states: states, provider: "provider", @@ -162,7 +162,7 @@ func TestS3Poller(t *testing.T) { mockS3 := NewMockS3API(ctrl) mockErrorPager := NewMockS3Pager(ctrl) mockSuccessPager := NewMockS3Pager(ctrl) - mockPublisher := NewMockBeatClient(ctrl) + mockPipeline := NewMockBeatPipeline(ctrl) gomock.InOrder( // Initial ListObjectPaginator gets an error. @@ -277,7 +277,7 @@ func TestS3Poller(t *testing.T) { RegionName: "region", }, s3: mockS3, - client: mockPublisher, + pipeline: mockPipeline, s3ObjectHandler: s3ObjProc, states: states, provider: "provider", diff --git a/x-pack/filebeat/input/awss3/sqs_s3_event.go b/x-pack/filebeat/input/awss3/sqs_s3_event.go index 25518f8bf07..d0401c284aa 100644 --- a/x-pack/filebeat/input/awss3/sqs_s3_event.go +++ b/x-pack/filebeat/input/awss3/sqs_s3_event.go @@ -118,6 +118,7 @@ func newSQSS3EventProcessor( } type sqsProcessingResult struct { + log *logp.Logger processor *sqsS3EventProcessor msg *types.Message receiveCount int // How many times this SQS object has been read diff --git a/x-pack/filebeat/input/awss3/sqs_s3_event_test.go b/x-pack/filebeat/input/awss3/sqs_s3_event_test.go index 65552525136..8d43824b362 100644 --- a/x-pack/filebeat/input/awss3/sqs_s3_event_test.go +++ b/x-pack/filebeat/input/awss3/sqs_s3_event_test.go @@ -8,7 +8,6 @@ import ( "context" "errors" "fmt" - "sync" "testing" "time" @@ -50,8 +49,9 @@ func TestSQSS3EventProcessor(t *testing.T) { mockAPI.EXPECT().DeleteMessage(gomock.Any(), gomock.Eq(&msg)).Return(nil), ) - p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, time.Minute, 5, mockBeatPipeline, mockS3HandlerFactory) - require.NoError(t, p.ProcessSQS(ctx, &msg)) + p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, time.Minute, 5, mockS3HandlerFactory) + result := p.ProcessSQS(ctx, &msg, func(_ beat.Event) {}) + require.NoError(t, result.processingErr) }) t.Run("invalid SQS JSON body does not retry", func(t *testing.T) { @@ -62,7 +62,6 @@ func TestSQSS3EventProcessor(t *testing.T) { defer ctrl.Finish() mockAPI := NewMockSQSAPI(ctrl) mockS3HandlerFactory := NewMockS3ObjectHandlerFactory(ctrl) - mockBeatPipeline := NewMockBeatPipeline(ctrl) invalidBodyMsg := newSQSMessage(newS3Event("log.json")) body := *invalidBodyMsg.Body @@ -73,10 +72,10 @@ func TestSQSS3EventProcessor(t *testing.T) { mockAPI.EXPECT().DeleteMessage(gomock.Any(), gomock.Eq(&invalidBodyMsg)).Return(nil), ) - p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, time.Minute, 5, mockBeatPipeline, mockS3HandlerFactory) - err := p.ProcessSQS(ctx, &invalidBodyMsg) - require.Error(t, err) - t.Log(err) + p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, time.Minute, 5, mockS3HandlerFactory) + result := p.ProcessSQS(ctx, &invalidBodyMsg, func(_ beat.Event) {}) + require.Error(t, result.processingErr) + t.Log(result.processingErr) }) t.Run("zero S3 events in body", func(t *testing.T) { @@ -87,7 +86,6 @@ func TestSQSS3EventProcessor(t *testing.T) { defer ctrl.Finish() mockAPI := NewMockSQSAPI(ctrl) mockS3HandlerFactory := NewMockS3ObjectHandlerFactory(ctrl) - mockBeatPipeline := NewMockBeatPipeline(ctrl) emptyRecordsMsg := newSQSMessage([]s3EventV2{}...) @@ -95,8 +93,9 @@ func TestSQSS3EventProcessor(t *testing.T) { mockAPI.EXPECT().DeleteMessage(gomock.Any(), gomock.Eq(&emptyRecordsMsg)).Return(nil), ) - p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, time.Minute, 5, mockBeatPipeline, mockS3HandlerFactory) - require.NoError(t, p.ProcessSQS(ctx, &emptyRecordsMsg)) + p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, time.Minute, 5, mockS3HandlerFactory) + result := p.ProcessSQS(ctx, &emptyRecordsMsg, func(_ beat.Event) {}) + require.NoError(t, result.processingErr) }) t.Run("visibility is extended after half expires", func(t *testing.T) { @@ -111,12 +110,10 @@ func TestSQSS3EventProcessor(t *testing.T) { mockS3HandlerFactory := NewMockS3ObjectHandlerFactory(ctrl) mockS3Handler := NewMockS3ObjectHandler(ctrl) mockClient := NewMockBeatClient(ctrl) - mockBeatPipeline := NewMockBeatPipeline(ctrl) mockAPI.EXPECT().ChangeMessageVisibility(gomock.Any(), gomock.Eq(&msg), gomock.Eq(visibilityTimeout)).AnyTimes().Return(nil) gomock.InOrder( - mockBeatPipeline.EXPECT().ConnectWith(gomock.Any()).Return(mockClient, nil), mockS3HandlerFactory.EXPECT().Create(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()). Do(func(ctx context.Context, _ *logp.Logger, _ beat.Client, _ *awscommon.EventACKTracker, _ s3EventV2) { require.NoError(t, timed.Wait(ctx, 5*visibilityTimeout)) @@ -127,8 +124,9 @@ func TestSQSS3EventProcessor(t *testing.T) { mockS3Handler.EXPECT().FinalizeS3Object().Return(nil), ) - p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, visibilityTimeout, 5, mockBeatPipeline, mockS3HandlerFactory) - require.NoError(t, p.ProcessSQS(ctx, &msg)) + p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, visibilityTimeout, 5, mockS3HandlerFactory) + result := p.ProcessSQS(ctx, &msg, func(_ beat.Event) {}) + require.NoError(t, result.processingErr) }) t.Run("message returns to queue on error", func(t *testing.T) { @@ -140,20 +138,16 @@ func TestSQSS3EventProcessor(t *testing.T) { mockAPI := NewMockSQSAPI(ctrl) mockS3HandlerFactory := NewMockS3ObjectHandlerFactory(ctrl) mockS3Handler := NewMockS3ObjectHandler(ctrl) - mockClient := NewMockBeatClient(ctrl) - mockBeatPipeline := NewMockBeatPipeline(ctrl) gomock.InOrder( - mockBeatPipeline.EXPECT().ConnectWith(gomock.Any()).Return(mockClient, nil), mockS3HandlerFactory.EXPECT().Create(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(mockS3Handler), mockS3Handler.EXPECT().ProcessS3Object().Return(errors.New("fake connectivity problem")), - mockClient.EXPECT().Close(), ) - p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, time.Minute, 5, mockBeatPipeline, mockS3HandlerFactory) - err := p.ProcessSQS(ctx, &msg) - t.Log(err) - require.Error(t, err) + p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, time.Minute, 5, mockS3HandlerFactory) + result := p.ProcessSQS(ctx, &msg, func(_ beat.Event) {}) + t.Log(result.processingErr) + require.Error(t, result.processingErr) }) t.Run("message is deleted after multiple receives", func(t *testing.T) { @@ -165,8 +159,6 @@ func TestSQSS3EventProcessor(t *testing.T) { mockAPI := NewMockSQSAPI(ctrl) mockS3HandlerFactory := NewMockS3ObjectHandlerFactory(ctrl) mockS3Handler := NewMockS3ObjectHandler(ctrl) - mockClient := NewMockBeatClient(ctrl) - mockBeatPipeline := NewMockBeatPipeline(ctrl) msg := msg msg.Attributes = map[string]string{ @@ -174,17 +166,15 @@ func TestSQSS3EventProcessor(t *testing.T) { } gomock.InOrder( - mockBeatPipeline.EXPECT().ConnectWith(gomock.Any()).Return(mockClient, nil), mockS3HandlerFactory.EXPECT().Create(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(mockS3Handler), mockS3Handler.EXPECT().ProcessS3Object().Return(errors.New("fake connectivity problem")), - mockClient.EXPECT().Close(), mockAPI.EXPECT().DeleteMessage(gomock.Any(), gomock.Eq(&msg)).Return(nil), ) - p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, time.Minute, 5, mockBeatPipeline, mockS3HandlerFactory) - err := p.ProcessSQS(ctx, &msg) - t.Log(err) - require.Error(t, err) + p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, time.Minute, 5, mockS3HandlerFactory) + result := p.ProcessSQS(ctx, &msg, func(_ beat.Event) {}) + t.Log(result.eventCount) + require.Error(t, result.processingErr) }) } @@ -222,16 +212,12 @@ func TestSqsProcessor_keepalive(t *testing.T) { defer ctrl.Finish() mockAPI := NewMockSQSAPI(ctrl) mockS3HandlerFactory := NewMockS3ObjectHandlerFactory(ctrl) - mockBeatPipeline := NewMockBeatPipeline(ctrl) mockAPI.EXPECT().ChangeMessageVisibility(gomock.Any(), gomock.Eq(&msg), gomock.Eq(visibilityTimeout)). Times(1).Return(tc.Err) - p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, visibilityTimeout, 5, mockBeatPipeline, mockS3HandlerFactory) - var wg sync.WaitGroup - wg.Add(1) - p.keepalive(ctx, p.log, &wg, &msg) - wg.Wait() + p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, visibilityTimeout, 5, mockS3HandlerFactory) + p.keepalive(ctx, p.log, &msg) }) } } @@ -239,7 +225,7 @@ func TestSqsProcessor_keepalive(t *testing.T) { func TestSqsProcessor_getS3Notifications(t *testing.T) { logp.TestingSetup() - p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, nil, nil, time.Minute, 5, nil, nil) + p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, nil, nil, time.Minute, 5, nil) t.Run("s3 key is url unescaped", func(t *testing.T) { msg := newSQSMessage(newS3Event("Happy+Face.jpg")) From 1548ac2396a5054085cabd65601eb88a1d7a025d Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Thu, 5 Sep 2024 17:42:02 -0400 Subject: [PATCH 12/30] fix/update error SQS error handling logic --- x-pack/filebeat/input/awss3/sqs_acks.go | 0 x-pack/filebeat/input/awss3/sqs_input.go | 19 ++++++------- x-pack/filebeat/input/awss3/sqs_s3_event.go | 30 ++++++++++----------- 3 files changed, 23 insertions(+), 26 deletions(-) delete mode 100644 x-pack/filebeat/input/awss3/sqs_acks.go diff --git a/x-pack/filebeat/input/awss3/sqs_acks.go b/x-pack/filebeat/input/awss3/sqs_acks.go deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/x-pack/filebeat/input/awss3/sqs_input.go b/x-pack/filebeat/input/awss3/sqs_input.go index 4a24ba0161a..85ead4d68f0 100644 --- a/x-pack/filebeat/input/awss3/sqs_input.go +++ b/x-pack/filebeat/input/awss3/sqs_input.go @@ -8,7 +8,6 @@ import ( "context" "fmt" "sync" - "time" awssdk "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/s3" @@ -195,6 +194,7 @@ func (in *sqsReaderInput) newSQSWorker() (*sqsWorker, error) { func (w *sqsWorker) run(ctx context.Context) { defer w.client.Close() + defer w.ackHandler.Close() for ctx.Err() == nil { // Send a work request @@ -216,24 +216,21 @@ func (w *sqsWorker) run(ctx context.Context) { func (w *sqsWorker) processMessage(ctx context.Context, msg types.Message) { publishCount := 0 - start := time.Now() id := w.input.metrics.beginSQSWorker() result := w.input.msgHandler.ProcessSQS(ctx, &msg, func(e beat.Event) { w.client.Publish(e) publishCount++ - // hi fae, finish this function }) - if result.processingErr != nil { - w.input.log.Warnw("Failed processing SQS message.", - "error", result.processingErr, - "message_id", *msg.MessageId, - "elapsed_time_ns", time.Since(start)) + if publishCount == 0 { + // No events made it through (probably an error state), wrap up immediately + result.Done() + } else { + // Add this result's Done callback to the pending ACKs list + w.ackHandler.Add(publishCount, result.Done) } - w.input.metrics.endSQSWorker(id) - // Add this result's Done callback to the pending ACKs list - w.ackHandler.Add(publishCount, result.Done) + w.input.metrics.endSQSWorker(id) } func (in *sqsReaderInput) startWorkers(ctx context.Context) { diff --git a/x-pack/filebeat/input/awss3/sqs_s3_event.go b/x-pack/filebeat/input/awss3/sqs_s3_event.go index d0401c284aa..c917af9a92a 100644 --- a/x-pack/filebeat/input/awss3/sqs_s3_event.go +++ b/x-pack/filebeat/input/awss3/sqs_s3_event.go @@ -164,6 +164,7 @@ func (p *sqsS3EventProcessor) ProcessSQS(ctx context.Context, msg *types.Message finalizers, processingErr := p.processS3Events(ctx, log, *msg.Body, eventCallback) return sqsProcessingResult{ + log: p.log, msg: msg, receiveCount: receiveCount, keepaliveCancel: keepaliveCancel, @@ -185,36 +186,35 @@ func (r sqsProcessingResult) Done() { // No error. Delete SQS. if processingErr == nil { if msgDelErr := p.sqs.DeleteMessage(context.Background(), r.msg); msgDelErr != nil { - return fmt.Errorf("failed deleting message from SQS queue (it may be reprocessed): %w", msgDelErr) + p.log.Errorf("failed deleting message from SQS queue (it may be reprocessed): %v", msgDelErr.Error()) + return } p.metrics.sqsMessagesDeletedTotal.Inc() // SQS message finished and deleted, finalize s3 objects if finalizeErr := r.finalizeS3Objects(); finalizeErr != nil { - return fmt.Errorf("failed finalizing message from SQS queue (manual cleanup is required): %w", finalizeErr) + p.log.Errorf("failed finalizing message from SQS queue (manual cleanup is required): %v", finalizeErr.Error()) } - return nil + return } - if p.maxReceiveCount > 0 && !errors.Is(processingErr, &nonRetryableError{}) { + if p.maxReceiveCount > 0 && r.receiveCount >= p.maxReceiveCount { // Prevent poison pill messages from consuming all workers. Check how // many times this message has been received before making a disposition. - if r.receiveCount >= p.maxReceiveCount { - processingErr = nonRetryableErrorWrap(fmt.Errorf( - "sqs ApproximateReceiveCount <%v> exceeds threshold %v: %w", - r.receiveCount, p.maxReceiveCount, processingErr)) - } + processingErr = nonRetryableErrorWrap(fmt.Errorf( + "sqs ApproximateReceiveCount <%v> exceeds threshold %v: %w", + r.receiveCount, p.maxReceiveCount, processingErr)) } // An error that reprocessing cannot correct. Delete SQS. if errors.Is(processingErr, &nonRetryableError{}) { if msgDelErr := p.sqs.DeleteMessage(context.Background(), r.msg); msgDelErr != nil { - return multierr.Combine( - fmt.Errorf("failed processing SQS message (attempted to delete message): %w", processingErr), - fmt.Errorf("failed deleting message from SQS queue (it may be reprocessed): %w", msgDelErr), - ) + p.log.Errorf("failed processing SQS message (attempted to delete message): %v", processingErr.Error()) + p.log.Errorf("failed deleting message from SQS queue (it may be reprocessed): %v", msgDelErr.Error()) + return } p.metrics.sqsMessagesDeletedTotal.Inc() - return fmt.Errorf("failed processing SQS message (message was deleted): %w", processingErr) + p.log.Errorf("failed processing SQS message (message was deleted): %w", processingErr) + return } // An error that may be resolved by letting the visibility timeout @@ -222,7 +222,7 @@ func (r sqsProcessingResult) Done() { // queue is enabled then the message will eventually placed on the DLQ // after maximum receives is reached. p.metrics.sqsMessagesReturnedTotal.Inc() - return fmt.Errorf("failed processing SQS message (it will return to queue after visibility timeout): %w", processingErr) + p.log.Errorf("failed processing SQS message (it will return to queue after visibility timeout): %w", processingErr) } func (p *sqsS3EventProcessor) keepalive(ctx context.Context, log *logp.Logger, msg *types.Message) { From 2358241fa26d7fd9f9664c4e2fc3d3a26be9b1fe Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Thu, 5 Sep 2024 17:50:51 -0400 Subject: [PATCH 13/30] close ack handler in s3 worker --- x-pack/filebeat/input/awss3/s3_input.go | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/filebeat/input/awss3/s3_input.go b/x-pack/filebeat/input/awss3/s3_input.go index b0cd981e10d..1a95b870f77 100644 --- a/x-pack/filebeat/input/awss3/s3_input.go +++ b/x-pack/filebeat/input/awss3/s3_input.go @@ -134,6 +134,7 @@ func (in *s3PollerInput) workerLoop(ctx context.Context, workChan <-chan state) return } defer client.Close() + defer acks.Close() rateLimitWaiter := backoff.NewEqualJitterBackoff(ctx.Done(), 1, 120) From 8c447bc74b925d0db6e46b332db2994be36e8ff8 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Tue, 1 Oct 2024 15:54:24 -0400 Subject: [PATCH 14/30] Pass through all processing parameters --- x-pack/filebeat/input/awss3/sqs_s3_event.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/x-pack/filebeat/input/awss3/sqs_s3_event.go b/x-pack/filebeat/input/awss3/sqs_s3_event.go index c917af9a92a..bf4a9466f3e 100644 --- a/x-pack/filebeat/input/awss3/sqs_s3_event.go +++ b/x-pack/filebeat/input/awss3/sqs_s3_event.go @@ -161,12 +161,18 @@ func (p *sqsS3EventProcessor) ProcessSQS(ctx context.Context, msg *types.Message } } - finalizers, processingErr := p.processS3Events(ctx, log, *msg.Body, eventCallback) + eventCount := 0 + finalizers, processingErr := p.processS3Events(ctx, log, *msg.Body, func(e beat.Event) { + eventCount++ + eventCallback(e) + }) return sqsProcessingResult{ log: p.log, msg: msg, + processor: p, receiveCount: receiveCount, + eventCount: eventCount, keepaliveCancel: keepaliveCancel, processingErr: processingErr, finalizers: finalizers, From 8cde3262bb90d51081f6b54298216841169688b8 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Thu, 3 Oct 2024 16:10:14 -0400 Subject: [PATCH 15/30] regenerate mock interfaces --- .../input/awss3/mock_interfaces_test.go | 189 ++---------------- 1 file changed, 13 insertions(+), 176 deletions(-) diff --git a/x-pack/filebeat/input/awss3/mock_interfaces_test.go b/x-pack/filebeat/input/awss3/mock_interfaces_test.go index ccae48a59b2..086ca34136f 100644 --- a/x-pack/filebeat/input/awss3/mock_interfaces_test.go +++ b/x-pack/filebeat/input/awss3/mock_interfaces_test.go @@ -18,7 +18,6 @@ import ( gomock "github.com/golang/mock/gomock" beat "github.com/elastic/beats/v7/libbeat/beat" - aws "github.com/elastic/beats/v7/x-pack/libbeat/common/aws" logp "github.com/elastic/elastic-agent-libs/logp" ) @@ -103,156 +102,6 @@ func (mr *MockSQSAPIMockRecorder) ReceiveMessage(ctx, maxMessages interface{}) * return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ReceiveMessage", reflect.TypeOf((*MockSQSAPI)(nil).ReceiveMessage), ctx, maxMessages) } -// MocksqsReceiver is a mock of sqsReceiver interface. -type MocksqsReceiver struct { - ctrl *gomock.Controller - recorder *MocksqsReceiverMockRecorder -} - -// MocksqsReceiverMockRecorder is the mock recorder for MocksqsReceiver. -type MocksqsReceiverMockRecorder struct { - mock *MocksqsReceiver -} - -// NewMocksqsReceiver creates a new mock instance. -func NewMocksqsReceiver(ctrl *gomock.Controller) *MocksqsReceiver { - mock := &MocksqsReceiver{ctrl: ctrl} - mock.recorder = &MocksqsReceiverMockRecorder{mock} - return mock -} - -// EXPECT returns an object that allows the caller to indicate expected use. -func (m *MocksqsReceiver) EXPECT() *MocksqsReceiverMockRecorder { - return m.recorder -} - -// ReceiveMessage mocks base method. -func (m *MocksqsReceiver) ReceiveMessage(ctx context.Context, maxMessages int) ([]types.Message, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "ReceiveMessage", ctx, maxMessages) - ret0, _ := ret[0].([]types.Message) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// ReceiveMessage indicates an expected call of ReceiveMessage. -func (mr *MocksqsReceiverMockRecorder) ReceiveMessage(ctx, maxMessages interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ReceiveMessage", reflect.TypeOf((*MocksqsReceiver)(nil).ReceiveMessage), ctx, maxMessages) -} - -// MocksqsDeleter is a mock of sqsDeleter interface. -type MocksqsDeleter struct { - ctrl *gomock.Controller - recorder *MocksqsDeleterMockRecorder -} - -// MocksqsDeleterMockRecorder is the mock recorder for MocksqsDeleter. -type MocksqsDeleterMockRecorder struct { - mock *MocksqsDeleter -} - -// NewMocksqsDeleter creates a new mock instance. -func NewMocksqsDeleter(ctrl *gomock.Controller) *MocksqsDeleter { - mock := &MocksqsDeleter{ctrl: ctrl} - mock.recorder = &MocksqsDeleterMockRecorder{mock} - return mock -} - -// EXPECT returns an object that allows the caller to indicate expected use. -func (m *MocksqsDeleter) EXPECT() *MocksqsDeleterMockRecorder { - return m.recorder -} - -// DeleteMessage mocks base method. -func (m *MocksqsDeleter) DeleteMessage(ctx context.Context, msg *types.Message) error { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "DeleteMessage", ctx, msg) - ret0, _ := ret[0].(error) - return ret0 -} - -// DeleteMessage indicates an expected call of DeleteMessage. -func (mr *MocksqsDeleterMockRecorder) DeleteMessage(ctx, msg interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteMessage", reflect.TypeOf((*MocksqsDeleter)(nil).DeleteMessage), ctx, msg) -} - -// MocksqsVisibilityChanger is a mock of sqsVisibilityChanger interface. -type MocksqsVisibilityChanger struct { - ctrl *gomock.Controller - recorder *MocksqsVisibilityChangerMockRecorder -} - -// MocksqsVisibilityChangerMockRecorder is the mock recorder for MocksqsVisibilityChanger. -type MocksqsVisibilityChangerMockRecorder struct { - mock *MocksqsVisibilityChanger -} - -// NewMocksqsVisibilityChanger creates a new mock instance. -func NewMocksqsVisibilityChanger(ctrl *gomock.Controller) *MocksqsVisibilityChanger { - mock := &MocksqsVisibilityChanger{ctrl: ctrl} - mock.recorder = &MocksqsVisibilityChangerMockRecorder{mock} - return mock -} - -// EXPECT returns an object that allows the caller to indicate expected use. -func (m *MocksqsVisibilityChanger) EXPECT() *MocksqsVisibilityChangerMockRecorder { - return m.recorder -} - -// ChangeMessageVisibility mocks base method. -func (m *MocksqsVisibilityChanger) ChangeMessageVisibility(ctx context.Context, msg *types.Message, timeout time.Duration) error { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "ChangeMessageVisibility", ctx, msg, timeout) - ret0, _ := ret[0].(error) - return ret0 -} - -// ChangeMessageVisibility indicates an expected call of ChangeMessageVisibility. -func (mr *MocksqsVisibilityChangerMockRecorder) ChangeMessageVisibility(ctx, msg, timeout interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ChangeMessageVisibility", reflect.TypeOf((*MocksqsVisibilityChanger)(nil).ChangeMessageVisibility), ctx, msg, timeout) -} - -// MocksqsAttributeGetter is a mock of sqsAttributeGetter interface. -type MocksqsAttributeGetter struct { - ctrl *gomock.Controller - recorder *MocksqsAttributeGetterMockRecorder -} - -// MocksqsAttributeGetterMockRecorder is the mock recorder for MocksqsAttributeGetter. -type MocksqsAttributeGetterMockRecorder struct { - mock *MocksqsAttributeGetter -} - -// NewMocksqsAttributeGetter creates a new mock instance. -func NewMocksqsAttributeGetter(ctrl *gomock.Controller) *MocksqsAttributeGetter { - mock := &MocksqsAttributeGetter{ctrl: ctrl} - mock.recorder = &MocksqsAttributeGetterMockRecorder{mock} - return mock -} - -// EXPECT returns an object that allows the caller to indicate expected use. -func (m *MocksqsAttributeGetter) EXPECT() *MocksqsAttributeGetterMockRecorder { - return m.recorder -} - -// GetQueueAttributes mocks base method. -func (m *MocksqsAttributeGetter) GetQueueAttributes(ctx context.Context, attr []types.QueueAttributeName) (map[string]string, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetQueueAttributes", ctx, attr) - ret0, _ := ret[0].(map[string]string) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// GetQueueAttributes indicates an expected call of GetQueueAttributes. -func (mr *MocksqsAttributeGetterMockRecorder) GetQueueAttributes(ctx, attr interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetQueueAttributes", reflect.TypeOf((*MocksqsAttributeGetter)(nil).GetQueueAttributes), ctx, attr) -} - // MockSQSProcessor is a mock of sqsProcessor interface. type MockSQSProcessor struct { ctrl *gomock.Controller @@ -277,17 +126,17 @@ func (m *MockSQSProcessor) EXPECT() *MockSQSProcessorMockRecorder { } // ProcessSQS mocks base method. -func (m *MockSQSProcessor) ProcessSQS(ctx context.Context, msg *types.Message) error { +func (m *MockSQSProcessor) ProcessSQS(ctx context.Context, msg *types.Message, eventCallback func(beat.Event)) sqsProcessingResult { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "ProcessSQS", ctx, msg) - ret0, _ := ret[0].(error) + ret := m.ctrl.Call(m, "ProcessSQS", ctx, msg, eventCallback) + ret0, _ := ret[0].(sqsProcessingResult) return ret0 } // ProcessSQS indicates an expected call of ProcessSQS. -func (mr *MockSQSProcessorMockRecorder) ProcessSQS(ctx, msg interface{}) *gomock.Call { +func (mr *MockSQSProcessorMockRecorder) ProcessSQS(ctx, msg, eventCallback interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ProcessSQS", reflect.TypeOf((*MockSQSProcessor)(nil).ProcessSQS), ctx, msg) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ProcessSQS", reflect.TypeOf((*MockSQSProcessor)(nil).ProcessSQS), ctx, msg, eventCallback) } // MockS3API is a mock of s3API interface. @@ -581,17 +430,17 @@ func (m *MockS3ObjectHandlerFactory) EXPECT() *MockS3ObjectHandlerFactoryMockRec } // Create mocks base method. -func (m *MockS3ObjectHandlerFactory) Create(ctx context.Context, log *logp.Logger, client beat.Client, acker *aws.EventACKTracker, obj s3EventV2) s3ObjectHandler { +func (m *MockS3ObjectHandlerFactory) Create(ctx context.Context, obj s3EventV2) s3ObjectHandler { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "Create", ctx, log, client, acker, obj) + ret := m.ctrl.Call(m, "Create", ctx, obj) ret0, _ := ret[0].(s3ObjectHandler) return ret0 } // Create indicates an expected call of Create. -func (mr *MockS3ObjectHandlerFactoryMockRecorder) Create(ctx, log, client, acker, obj interface{}) *gomock.Call { +func (mr *MockS3ObjectHandlerFactoryMockRecorder) Create(ctx, obj interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Create", reflect.TypeOf((*MockS3ObjectHandlerFactory)(nil).Create), ctx, log, client, acker, obj) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Create", reflect.TypeOf((*MockS3ObjectHandlerFactory)(nil).Create), ctx, obj) } // MockS3ObjectHandler is a mock of s3ObjectHandler interface. @@ -632,27 +481,15 @@ func (mr *MockS3ObjectHandlerMockRecorder) FinalizeS3Object() *gomock.Call { } // ProcessS3Object mocks base method. -func (m *MockS3ObjectHandler) ProcessS3Object() error { +func (m *MockS3ObjectHandler) ProcessS3Object(log *logp.Logger, eventCallback func(beat.Event)) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "ProcessS3Object") + ret := m.ctrl.Call(m, "ProcessS3Object", log, eventCallback) ret0, _ := ret[0].(error) return ret0 } // ProcessS3Object indicates an expected call of ProcessS3Object. -func (mr *MockS3ObjectHandlerMockRecorder) ProcessS3Object() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ProcessS3Object", reflect.TypeOf((*MockS3ObjectHandler)(nil).ProcessS3Object)) -} - -// Wait mocks base method. -func (m *MockS3ObjectHandler) Wait() { - m.ctrl.T.Helper() - m.ctrl.Call(m, "Wait") -} - -// Wait indicates an expected call of Wait. -func (mr *MockS3ObjectHandlerMockRecorder) Wait() *gomock.Call { +func (mr *MockS3ObjectHandlerMockRecorder) ProcessS3Object(log, eventCallback interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Wait", reflect.TypeOf((*MockS3ObjectHandler)(nil).Wait)) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ProcessS3Object", reflect.TypeOf((*MockS3ObjectHandler)(nil).ProcessS3Object), log, eventCallback) } From c2131e1f5752ff91b6f5458467b8de51f7edf1be Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Thu, 3 Oct 2024 16:31:19 -0400 Subject: [PATCH 16/30] update mock calls --- x-pack/filebeat/input/awss3/sqs_s3_event_test.go | 14 +++++++------- x-pack/filebeat/input/awss3/sqs_test.go | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/x-pack/filebeat/input/awss3/sqs_s3_event_test.go b/x-pack/filebeat/input/awss3/sqs_s3_event_test.go index 2336f93b065..dafadf68f42 100644 --- a/x-pack/filebeat/input/awss3/sqs_s3_event_test.go +++ b/x-pack/filebeat/input/awss3/sqs_s3_event_test.go @@ -45,7 +45,7 @@ func TestSQSS3EventProcessor(t *testing.T) { gomock.InOrder( mockBeatPipeline.EXPECT().ConnectWith(gomock.Any()).Return(mockClient, nil), - mockS3HandlerFactory.EXPECT().Create(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil), + mockS3HandlerFactory.EXPECT().Create(gomock.Any(), gomock.Any()).Return(nil), mockClient.EXPECT().Close(), mockAPI.EXPECT().DeleteMessage(gomock.Any(), gomock.Eq(&msg)).Return(nil), ) @@ -118,11 +118,11 @@ func TestSQSS3EventProcessor(t *testing.T) { mockAPI.EXPECT().ChangeMessageVisibility(gomock.Any(), gomock.Eq(&msg), gomock.Eq(visibilityTimeout)).AnyTimes().Return(nil) gomock.InOrder( - mockS3HandlerFactory.EXPECT().Create(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()). + mockS3HandlerFactory.EXPECT().Create(gomock.Any(), gomock.Any()). Do(func(ctx context.Context, _ *logp.Logger, _ beat.Client, _ *awscommon.EventACKTracker, _ s3EventV2) { require.NoError(t, timed.Wait(ctx, 5*visibilityTimeout)) }).Return(mockS3Handler), - mockS3Handler.EXPECT().ProcessS3Object().Return(nil), + mockS3Handler.EXPECT().ProcessS3Object(gomock.Any(), gomock.Any()).Return(nil), mockClient.EXPECT().Close(), mockAPI.EXPECT().DeleteMessage(gomock.Any(), gomock.Eq(&msg)).Return(nil), mockS3Handler.EXPECT().FinalizeS3Object().Return(nil), @@ -144,8 +144,8 @@ func TestSQSS3EventProcessor(t *testing.T) { mockS3Handler := NewMockS3ObjectHandler(ctrl) gomock.InOrder( - mockS3HandlerFactory.EXPECT().Create(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(mockS3Handler), - mockS3Handler.EXPECT().ProcessS3Object().Return(errors.New("fake connectivity problem")), + mockS3HandlerFactory.EXPECT().Create(gomock.Any(), gomock.Any()).Return(mockS3Handler), + mockS3Handler.EXPECT().ProcessS3Object(gomock.Any(), gomock.Any()).Return(errors.New("fake connectivity problem")), ) p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, time.Minute, 5, mockS3HandlerFactory) @@ -170,8 +170,8 @@ func TestSQSS3EventProcessor(t *testing.T) { } gomock.InOrder( - mockS3HandlerFactory.EXPECT().Create(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(mockS3Handler), - mockS3Handler.EXPECT().ProcessS3Object().Return(errors.New("fake connectivity problem")), + mockS3HandlerFactory.EXPECT().Create(gomock.Any(), gomock.Any()).Return(mockS3Handler), + mockS3Handler.EXPECT().ProcessS3Object(gomock.Any(), gomock.Any()).Return(errors.New("fake connectivity problem")), mockAPI.EXPECT().DeleteMessage(gomock.Any(), gomock.Eq(&msg)).Return(nil), ) diff --git a/x-pack/filebeat/input/awss3/sqs_test.go b/x-pack/filebeat/input/awss3/sqs_test.go index fff17ebc1a6..9691763903d 100644 --- a/x-pack/filebeat/input/awss3/sqs_test.go +++ b/x-pack/filebeat/input/awss3/sqs_test.go @@ -74,7 +74,7 @@ func TestSQSReceiver(t *testing.T) { // Expect the one message returned to have been processed. mockMsgHandler.EXPECT(). - ProcessSQS(gomock.Any(), gomock.Eq(&msg)). + ProcessSQS(gomock.Any(), gomock.Eq(&msg), gomock.Any()). Times(1). Return(nil) From 1a07561d4e3b34aaead1c5767c750fe12cc3ca90 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Thu, 3 Oct 2024 16:40:32 -0400 Subject: [PATCH 17/30] clean up dead code --- x-pack/filebeat/input/awss3/s3_input.go | 12 ------------ x-pack/filebeat/input/awss3/s3_objects.go | 1 - 2 files changed, 13 deletions(-) diff --git a/x-pack/filebeat/input/awss3/s3_input.go b/x-pack/filebeat/input/awss3/s3_input.go index 58a807c4c74..92d7a52a128 100644 --- a/x-pack/filebeat/input/awss3/s3_input.go +++ b/x-pack/filebeat/input/awss3/s3_input.go @@ -38,12 +38,6 @@ type s3PollerInput struct { states *states } -// s3FetchTask contains metadata for one S3 object that a worker should fetch. -type s3FetchTask struct { - s3ObjectHandler s3ObjectHandler - objectState state -} - func newS3PollerInput( config config, awsConfig awssdk.Config, @@ -246,9 +240,3 @@ func (in *s3PollerInput) s3EventForState(state state) s3EventV2 { event.S3.Object.Key = state.Key return event } - -func (in *s3PollerInput) createS3ObjectProcessor(ctx context.Context, state state) s3ObjectHandler { - event := in.s3EventForState(state) - - return in.s3ObjectHandler.Create(ctx, event) -} diff --git a/x-pack/filebeat/input/awss3/s3_objects.go b/x-pack/filebeat/input/awss3/s3_objects.go index 9b0e76d21f7..222d6301b59 100644 --- a/x-pack/filebeat/input/awss3/s3_objects.go +++ b/x-pack/filebeat/input/awss3/s3_objects.go @@ -45,7 +45,6 @@ type s3ObjectProcessor struct { s3Obj s3EventV2 // S3 object information. s3ObjHash string s3RequestURL string - eventCount int64 s3Metadata map[string]interface{} // S3 object metadata. } From 597707223826d073b31f24c66eb487bd7efba2f4 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Mon, 14 Oct 2024 18:41:01 -0400 Subject: [PATCH 18/30] finish config/doc updates, address review comments --- go.mod | 1 + go.sum | 2 + .../filebeat.inputs.reference.xpack.yml.tmpl | 4 +- .../docs/inputs/input-aws-s3.asciidoc | 14 +- x-pack/filebeat/filebeat.reference.yml | 316 +++++++++--------- x-pack/filebeat/input/awss3/acks.go | 28 +- x-pack/filebeat/input/awss3/config.go | 58 ++-- x-pack/filebeat/input/awss3/config_test.go | 34 +- .../input/awss3/input_benchmark_test.go | 8 +- .../input/awss3/input_integration_test.go | 2 +- x-pack/filebeat/input/awss3/s3_input.go | 2 +- x-pack/filebeat/input/awss3/s3_objects.go | 3 +- x-pack/filebeat/input/awss3/sqs_input.go | 8 +- x-pack/filebeat/input/awss3/sqs_test.go | 6 +- .../module/aws/cloudtrail/config/aws-s3.yml | 4 - .../module/aws/cloudtrail/manifest.yml | 1 - .../module/aws/s3access/config/aws-s3.yml | 4 - .../filebeat/module/aws/s3access/manifest.yml | 1 - .../module/aws/vpcflow/config/input.yml | 4 - .../filebeat/module/aws/vpcflow/manifest.yml | 1 - 20 files changed, 230 insertions(+), 271 deletions(-) diff --git a/go.mod b/go.mod index 0cc96844c85..816991166ab 100644 --- a/go.mod +++ b/go.mod @@ -219,6 +219,7 @@ require ( github.com/shirou/gopsutil/v3 v3.22.10 github.com/tklauser/go-sysconf v0.3.10 github.com/xdg-go/scram v1.1.2 + github.com/zyedidia/generic v1.2.1 go.elastic.co/apm/module/apmelasticsearch/v2 v2.6.0 go.elastic.co/apm/module/apmhttp/v2 v2.6.0 go.elastic.co/apm/v2 v2.6.0 diff --git a/go.sum b/go.sum index 0872d531cd5..33669214caf 100644 --- a/go.sum +++ b/go.sum @@ -923,6 +923,8 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +github.com/zyedidia/generic v1.2.1 h1:Zv5KS/N2m0XZZiuLS82qheRG4X1o5gsWreGb0hR7XDc= +github.com/zyedidia/generic v1.2.1/go.mod h1:ly2RBz4mnz1yeuVbQA/VFwGjK3mnHGRj1JuoG336Bis= go.einride.tech/aip v0.67.1 h1:d/4TW92OxXBngkSOwWS2CH5rez869KpKMaN44mdxkFI= go.einride.tech/aip v0.67.1/go.mod h1:ZGX4/zKw8dcgzdLsrvpOOGxfxI2QSk12SlP7d6c0/XI= go.elastic.co/apm/module/apmelasticsearch/v2 v2.6.0 h1:ukMcwyMaDXsS1dRK2qRYXT2AsfwaUy74TOOYCqkWJow= diff --git a/x-pack/filebeat/_meta/config/filebeat.inputs.reference.xpack.yml.tmpl b/x-pack/filebeat/_meta/config/filebeat.inputs.reference.xpack.yml.tmpl index 8215bc3c389..198e8dafb22 100644 --- a/x-pack/filebeat/_meta/config/filebeat.inputs.reference.xpack.yml.tmpl +++ b/x-pack/filebeat/_meta/config/filebeat.inputs.reference.xpack.yml.tmpl @@ -79,8 +79,8 @@ # SQS queue URL to receive messages from (required). #queue_url: "https://sqs.us-east-1.amazonaws.com/1234/test-aws-s3-logs-queue" - # Maximum number of SQS messages that can be inflight at any time. - #max_number_of_messages: 5 + # Number of workers on S3 bucket or SQS queue + #number_of_workers: 5 # Maximum duration of an AWS API call (excluding S3 GetObject calls). #api_timeout: 120s diff --git a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc index 43d4b102f63..aa8ecbf7259 100644 --- a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc @@ -307,18 +307,6 @@ The maximum number of bytes that a single log message can have. All bytes after multiline log messages, which can get large. This only applies to non-JSON logs. The default is `10 MiB`. -[float] -==== `max_number_of_messages` - -The maximum number of SQS messages that can be inflight at any time. Defaults -to 5. Setting this parameter too high can overload Elastic Agent and cause -ingest failures in situations where the SQS messages contain many S3 objects -or the S3 objects themselves contain large numbers of messages. -We recommend to keep the default value 5 and use the `Balanced` or `Optimized for -Throughput` setting in the -{fleet-guide}/es-output-settings.html#es-output-settings-performance-tuning-settings[preset] -options to tune your Elastic Agent performance. - [id="input-{type}-parsers"] [float] ==== `parsers` @@ -504,7 +492,7 @@ Prefix to apply for the list request to the S3 bucket. Default empty. [float] ==== `number_of_workers` -Number of workers that will process the S3 objects listed. (Required when `bucket_arn` is set). +Number of workers that will process the S3 or SQS objects listed. Required when `bucket_arn` is set, otherwise (in the SQS case) defaults to 5. [float] diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml index 09a540aa21e..ed565d6747d 100644 --- a/x-pack/filebeat/filebeat.reference.yml +++ b/x-pack/filebeat/filebeat.reference.yml @@ -111,7 +111,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Process CloudTrail logs @@ -160,9 +160,6 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -184,7 +181,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -221,9 +218,6 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -245,7 +239,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -282,9 +276,6 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -306,7 +297,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -343,9 +334,6 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -367,7 +355,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -404,9 +392,6 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -428,7 +413,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -465,9 +450,6 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -1500,147 +1482,147 @@ filebeat.modules: #var.password: #------------------------------ Salesforce Module ------------------------------ -# Configuration file for Salesforce module in Filebeat - -# Common Configurations: -# - enabled: Set to true to enable ingestion of Salesforce module fileset -# - initial_interval: Initial interval for log collection. This setting determines the time period for which the logs will be initially collected when the ingestion process starts, i.e. 1d/h/m/s -# - api_version: API version for Salesforce, version should be greater than 46.0 - -# Authentication Configurations: -# User-Password Authentication: -# - enabled: Set to true to enable user-password authentication -# - client.id: Client ID for user-password authentication -# - client.secret: Client secret for user-password authentication -# - token_url: Token URL for user-password authentication -# - username: Username for user-password authentication -# - password: Password for user-password authentication - -# JWT Authentication: -# - enabled: Set to true to enable JWT authentication -# - client.id: Client ID for JWT authentication -# - client.username: Username for JWT authentication -# - client.key_path: Path to client key for JWT authentication -# - url: Audience URL for JWT authentication - -# Event Monitoring: -# - real_time: Set to true to enable real-time logging using object type data collection -# - real_time_interval: Interval for real-time logging - -# Event Log File: -# - event_log_file: Set to true to enable event log file type data collection -# - elf_interval: Interval for event log file -# - log_file_interval: Interval type for log file collection, either Hourly or Daily - -- module: salesforce - - apex: - enabled: false - var.initial_interval: 1d - var.api_version: 56 - - var.authentication: - user_password_flow: - enabled: true - client.id: "" - client.secret: "" - token_url: "" - username: "" - password: "" - jwt_bearer_flow: - enabled: false - client.id: "" - client.username: "" - client.key_path: "" - url: "https://login.salesforce.com" - - var.url: "https://instance_id.my.salesforce.com" - - var.event_log_file: true - var.elf_interval: 1h - var.log_file_interval: "Hourly" - - login: - enabled: false - var.initial_interval: 1d - var.api_version: 56 - - var.authentication: - user_password_flow: - enabled: true - client.id: "" - client.secret: "client-secret" - token_url: "" - username: "" - password: "" - jwt_bearer_flow: - enabled: false - client.id: "" - client.username: "" - client.key_path: "" - url: "https://login.salesforce.com" - - var.url: "https://instance_id.my.salesforce.com" - - var.event_log_file: true - var.elf_interval: 1h - var.log_file_interval: "Hourly" - - var.real_time: true - var.real_time_interval: 5m - - logout: - enabled: false - var.initial_interval: 1d - var.api_version: 56 - - var.authentication: - user_password_flow: - enabled: true - client.id: "" - client.secret: "client-secret" - token_url: "" - username: "" - password: "" - jwt_bearer_flow: - enabled: false - client.id: "" - client.username: "" - client.key_path: "" - url: "https://login.salesforce.com" - - var.url: "https://instance_id.my.salesforce.com" - - var.event_log_file: true - var.elf_interval: 1h - var.log_file_interval: "Hourly" - - var.real_time: true - var.real_time_interval: 5m - - setupaudittrail: - enabled: false - var.initial_interval: 1d - var.api_version: 56 - - var.authentication: - user_password_flow: - enabled: true - client.id: "" - client.secret: "client-secret" - token_url: "" - username: "" - password: "" - jwt_bearer_flow: - enabled: false - client.id: "" - client.username: "" - client.key_path: "" - url: "https://login.salesforce.com" - - var.url: "https://instance_id.my.salesforce.com" - - var.real_time: true +# Configuration file for Salesforce module in Filebeat + +# Common Configurations: +# - enabled: Set to true to enable ingestion of Salesforce module fileset +# - initial_interval: Initial interval for log collection. This setting determines the time period for which the logs will be initially collected when the ingestion process starts, i.e. 1d/h/m/s +# - api_version: API version for Salesforce, version should be greater than 46.0 + +# Authentication Configurations: +# User-Password Authentication: +# - enabled: Set to true to enable user-password authentication +# - client.id: Client ID for user-password authentication +# - client.secret: Client secret for user-password authentication +# - token_url: Token URL for user-password authentication +# - username: Username for user-password authentication +# - password: Password for user-password authentication + +# JWT Authentication: +# - enabled: Set to true to enable JWT authentication +# - client.id: Client ID for JWT authentication +# - client.username: Username for JWT authentication +# - client.key_path: Path to client key for JWT authentication +# - url: Audience URL for JWT authentication + +# Event Monitoring: +# - real_time: Set to true to enable real-time logging using object type data collection +# - real_time_interval: Interval for real-time logging + +# Event Log File: +# - event_log_file: Set to true to enable event log file type data collection +# - elf_interval: Interval for event log file +# - log_file_interval: Interval type for log file collection, either Hourly or Daily + +- module: salesforce + + apex: + enabled: false + var.initial_interval: 1d + var.api_version: 56 + + var.authentication: + user_password_flow: + enabled: true + client.id: "" + client.secret: "" + token_url: "" + username: "" + password: "" + jwt_bearer_flow: + enabled: false + client.id: "" + client.username: "" + client.key_path: "" + url: "https://login.salesforce.com" + + var.url: "https://instance_id.my.salesforce.com" + + var.event_log_file: true + var.elf_interval: 1h + var.log_file_interval: "Hourly" + + login: + enabled: false + var.initial_interval: 1d + var.api_version: 56 + + var.authentication: + user_password_flow: + enabled: true + client.id: "" + client.secret: "client-secret" + token_url: "" + username: "" + password: "" + jwt_bearer_flow: + enabled: false + client.id: "" + client.username: "" + client.key_path: "" + url: "https://login.salesforce.com" + + var.url: "https://instance_id.my.salesforce.com" + + var.event_log_file: true + var.elf_interval: 1h + var.log_file_interval: "Hourly" + + var.real_time: true + var.real_time_interval: 5m + + logout: + enabled: false + var.initial_interval: 1d + var.api_version: 56 + + var.authentication: + user_password_flow: + enabled: true + client.id: "" + client.secret: "client-secret" + token_url: "" + username: "" + password: "" + jwt_bearer_flow: + enabled: false + client.id: "" + client.username: "" + client.key_path: "" + url: "https://login.salesforce.com" + + var.url: "https://instance_id.my.salesforce.com" + + var.event_log_file: true + var.elf_interval: 1h + var.log_file_interval: "Hourly" + + var.real_time: true + var.real_time_interval: 5m + + setupaudittrail: + enabled: false + var.initial_interval: 1d + var.api_version: 56 + + var.authentication: + user_password_flow: + enabled: true + client.id: "" + client.secret: "client-secret" + token_url: "" + username: "" + password: "" + jwt_bearer_flow: + enabled: false + client.id: "" + client.username: "" + client.key_path: "" + url: "https://login.salesforce.com" + + var.url: "https://instance_id.my.salesforce.com" + + var.real_time: true var.real_time_interval: 5m #----------------------------- Google Santa Module ----------------------------- - module: santa @@ -2985,8 +2967,8 @@ filebeat.inputs: # SQS queue URL to receive messages from (required). #queue_url: "https://sqs.us-east-1.amazonaws.com/1234/test-aws-s3-logs-queue" - # Maximum number of SQS messages that can be inflight at any time. - #max_number_of_messages: 5 + # Number of workers on S3 bucket or SQS queue + #number_of_workers: 5 # Maximum duration of an AWS API call (excluding S3 GetObject calls). #api_timeout: 120s diff --git a/x-pack/filebeat/input/awss3/acks.go b/x-pack/filebeat/input/awss3/acks.go index bf6b16daac4..f4817f06db8 100644 --- a/x-pack/filebeat/input/awss3/acks.go +++ b/x-pack/filebeat/input/awss3/acks.go @@ -5,13 +5,14 @@ package awss3 import ( + "github.com/zyedidia/generic/queue" + "github.com/elastic/beats/v7/libbeat/beat" "github.com/elastic/beats/v7/libbeat/common/acker" - "github.com/elastic/beats/v7/libbeat/common/fifo" ) type awsACKHandler struct { - pending fifo.FIFO[pendingACK] + pending *queue.Queue[pendingACK] ackedCount int pendingChan chan pendingACK @@ -25,6 +26,23 @@ type pendingACK struct { func newAWSACKHandler() *awsACKHandler { handler := &awsACKHandler{ + pending: queue.New[pendingACK](), + + // Channel buffer sizes are somewhat arbitrary: synchronous channels + // would be safe, but buffers slightly reduce scheduler overhead since + // the ack loop goroutine doesn't need to wake up as often. + // + // pendingChan receives one message each time an S3/SQS worker goroutine + // finishes processing an object. If it is full, workers will not be able + // to advance to the next object until the ack loop wakes up. + // + // ackChan receives approximately one message every time an acknowledged + // batch of events contains at least one event from this input. (Sometimes + // fewer if messages can be coalesced.) If it is full, acknowledgement + // notifications for inputs/queue will stall until the ack loop wakes up. + // (This is a much worse consequence than pendingChan, but ackChan also + // receives fewer messages than pendingChan by a factor of ~thousands, + // so in practice it's still low-impact.) pendingChan: make(chan pendingACK, 10), ackChan: make(chan int, 10), } @@ -59,7 +77,7 @@ func (ah *awsACKHandler) run() { select { case result, ok := <-ah.pendingChan: if ok { - ah.pending.Add(result) + ah.pending.Enqueue(result) } else { // Channel is closed, reset so we don't receive any more values ah.pendingChan = nil @@ -69,8 +87,8 @@ func (ah *awsACKHandler) run() { } // Finalize any objects that are now completed - for !ah.pending.Empty() && ah.ackedCount >= ah.pending.First().eventCount { - result := ah.pending.ConsumeFirst() + for !ah.pending.Empty() && ah.ackedCount >= ah.pending.Peek().eventCount { + result := ah.pending.Dequeue() ah.ackedCount -= result.eventCount // Run finalization asynchronously so we don't block the SQS worker // or the queue by ignoring the ack handler's input channels. Ordering diff --git a/x-pack/filebeat/input/awss3/config.go b/x-pack/filebeat/input/awss3/config.go index b85c3f3871c..44664810e85 100644 --- a/x-pack/filebeat/input/awss3/config.go +++ b/x-pack/filebeat/input/awss3/config.go @@ -24,37 +24,36 @@ import ( ) type config struct { - APITimeout time.Duration `config:"api_timeout"` - VisibilityTimeout time.Duration `config:"visibility_timeout"` - SQSWaitTime time.Duration `config:"sqs.wait_time"` // The max duration for which the SQS ReceiveMessage call waits for a message to arrive in the queue before returning. - SQSMaxReceiveCount int `config:"sqs.max_receive_count"` // The max number of times a message should be received (retried) before deleting it. - SQSScript *scriptConfig `config:"sqs.notification_parsing_script"` - MaxNumberOfMessages int `config:"max_number_of_messages"` - QueueURL string `config:"queue_url"` - RegionName string `config:"region"` - BucketARN string `config:"bucket_arn"` - NonAWSBucketName string `config:"non_aws_bucket_name"` - BucketListInterval time.Duration `config:"bucket_list_interval"` - BucketListPrefix string `config:"bucket_list_prefix"` - NumberOfWorkers int `config:"number_of_workers"` - AWSConfig awscommon.ConfigAWS `config:",inline"` - FileSelectors []fileSelectorConfig `config:"file_selectors"` - ReaderConfig readerConfig `config:",inline"` // Reader options to apply when no file_selectors are used. - PathStyle bool `config:"path_style"` - ProviderOverride string `config:"provider"` - BackupConfig backupConfig `config:",inline"` + APITimeout time.Duration `config:"api_timeout"` + VisibilityTimeout time.Duration `config:"visibility_timeout"` + SQSWaitTime time.Duration `config:"sqs.wait_time"` // The max duration for which the SQS ReceiveMessage call waits for a message to arrive in the queue before returning. + SQSMaxReceiveCount int `config:"sqs.max_receive_count"` // The max number of times a message should be received (retried) before deleting it. + SQSScript *scriptConfig `config:"sqs.notification_parsing_script"` + QueueURL string `config:"queue_url"` + RegionName string `config:"region"` + BucketARN string `config:"bucket_arn"` + NonAWSBucketName string `config:"non_aws_bucket_name"` + BucketListInterval time.Duration `config:"bucket_list_interval"` + BucketListPrefix string `config:"bucket_list_prefix"` + NumberOfWorkers int `config:"number_of_workers"` + AWSConfig awscommon.ConfigAWS `config:",inline"` + FileSelectors []fileSelectorConfig `config:"file_selectors"` + ReaderConfig readerConfig `config:",inline"` // Reader options to apply when no file_selectors are used. + PathStyle bool `config:"path_style"` + ProviderOverride string `config:"provider"` + BackupConfig backupConfig `config:",inline"` } func defaultConfig() config { c := config{ - APITimeout: 120 * time.Second, - VisibilityTimeout: 300 * time.Second, - BucketListInterval: 120 * time.Second, - BucketListPrefix: "", - SQSWaitTime: 20 * time.Second, - SQSMaxReceiveCount: 5, - MaxNumberOfMessages: 5, - PathStyle: false, + APITimeout: 120 * time.Second, + VisibilityTimeout: 300 * time.Second, + BucketListInterval: 120 * time.Second, + BucketListPrefix: "", + SQSWaitTime: 20 * time.Second, + SQSMaxReceiveCount: 5, + NumberOfWorkers: 5, + PathStyle: false, } c.ReaderConfig.InitDefaults() return c @@ -93,11 +92,6 @@ func (c *config) Validate() error { "less than or equal to 20s", c.SQSWaitTime) } - if c.QueueURL != "" && c.MaxNumberOfMessages <= 0 { - return fmt.Errorf("max_number_of_messages <%v> must be greater than 0", - c.MaxNumberOfMessages) - } - if c.QueueURL != "" && c.APITimeout < c.SQSWaitTime { return fmt.Errorf("api_timeout <%v> must be greater than the sqs.wait_time <%v", c.APITimeout, c.SQSWaitTime) diff --git a/x-pack/filebeat/input/awss3/config_test.go b/x-pack/filebeat/input/awss3/config_test.go index 651f8099d91..907a5854b28 100644 --- a/x-pack/filebeat/input/awss3/config_test.go +++ b/x-pack/filebeat/input/awss3/config_test.go @@ -30,17 +30,17 @@ func TestConfig(t *testing.T) { parserConf := parser.Config{} require.NoError(t, parserConf.Unpack(conf.MustNewConfigFrom(""))) return config{ - QueueURL: quequeURL, - BucketARN: s3Bucket, - NonAWSBucketName: nonAWSS3Bucket, - APITimeout: 120 * time.Second, - VisibilityTimeout: 300 * time.Second, - SQSMaxReceiveCount: 5, - SQSWaitTime: 20 * time.Second, - BucketListInterval: 120 * time.Second, - BucketListPrefix: "", - PathStyle: false, - MaxNumberOfMessages: 5, + QueueURL: quequeURL, + BucketARN: s3Bucket, + NonAWSBucketName: nonAWSS3Bucket, + APITimeout: 120 * time.Second, + VisibilityTimeout: 300 * time.Second, + SQSMaxReceiveCount: 5, + SQSWaitTime: 20 * time.Second, + BucketListInterval: 120 * time.Second, + BucketListPrefix: "", + PathStyle: false, + NumberOfWorkers: 5, ReaderConfig: readerConfig{ BufferSize: 16 * humanize.KiByte, MaxBytes: 10 * humanize.MiByte, @@ -304,18 +304,6 @@ func TestConfig(t *testing.T) { expectedErr: "number_of_workers <0> must be greater than 0", expectedCfg: nil, }, - { - name: "error on max_number_of_messages == 0", - queueURL: queueURL, - s3Bucket: "", - nonAWSS3Bucket: "", - config: mapstr.M{ - "queue_url": queueURL, - "max_number_of_messages": "0", - }, - expectedErr: "max_number_of_messages <0> must be greater than 0", - expectedCfg: nil, - }, { name: "error on buffer_size == 0 ", queueURL: queueURL, diff --git a/x-pack/filebeat/input/awss3/input_benchmark_test.go b/x-pack/filebeat/input/awss3/input_benchmark_test.go index db8054778d4..ce3708fb16b 100644 --- a/x-pack/filebeat/input/awss3/input_benchmark_test.go +++ b/x-pack/filebeat/input/awss3/input_benchmark_test.go @@ -208,15 +208,15 @@ file_selectors: return inputConfig } -func benchmarkInputSQS(t *testing.T, maxMessagesInflight int) testing.BenchmarkResult { +func benchmarkInputSQS(t *testing.T, workerCount int) testing.BenchmarkResult { return testing.Benchmark(func(b *testing.B) { var err error config := makeBenchmarkConfig(t) - config.MaxNumberOfMessages = maxMessagesInflight + config.NumberOfWorkers = workerCount sqsReader := newSQSReaderInput(config, aws.Config{}) sqsReader.log = log.Named("sqs") - sqsReader.metrics = newInputMetrics("test_id", monitoring.NewRegistry(), maxMessagesInflight) + sqsReader.metrics = newInputMetrics("test_id", monitoring.NewRegistry(), workerCount) sqsReader.sqs, err = newConstantSQS() require.NoError(t, err) sqsReader.s3 = newConstantS3(t) @@ -239,7 +239,7 @@ func benchmarkInputSQS(t *testing.T, maxMessagesInflight int) testing.BenchmarkR b.StopTimer() elapsed := time.Since(start) - b.ReportMetric(float64(maxMessagesInflight), "max_messages_inflight") + b.ReportMetric(float64(workerCount), "number_of_workers") b.ReportMetric(elapsed.Seconds(), "sec") b.ReportMetric(float64(sqsReader.metrics.s3EventsCreatedTotal.Get()), "events") diff --git a/x-pack/filebeat/input/awss3/input_integration_test.go b/x-pack/filebeat/input/awss3/input_integration_test.go index 88d81a9f0c8..9303c5c7259 100644 --- a/x-pack/filebeat/input/awss3/input_integration_test.go +++ b/x-pack/filebeat/input/awss3/input_integration_test.go @@ -112,7 +112,7 @@ file_selectors: func makeTestConfigSQS(queueURL string) *conf.C { return conf.MustNewConfigFrom(fmt.Sprintf(`--- queue_url: %s -max_number_of_messages: 1 +number_of_workers: 1 visibility_timeout: 30s region: us-east-1 file_selectors: diff --git a/x-pack/filebeat/input/awss3/s3_input.go b/x-pack/filebeat/input/awss3/s3_input.go index 92d7a52a128..7d72f3753a9 100644 --- a/x-pack/filebeat/input/awss3/s3_input.go +++ b/x-pack/filebeat/input/awss3/s3_input.go @@ -78,7 +78,7 @@ func (in *s3PollerInput) Run( return fmt.Errorf("failed to create S3 API: %w", err) } - in.metrics = newInputMetrics(inputContext.ID, nil, in.config.MaxNumberOfMessages) + in.metrics = newInputMetrics(inputContext.ID, nil, in.config.NumberOfWorkers) defer in.metrics.Close() in.s3ObjectHandler = newS3ObjectProcessorFactory( diff --git a/x-pack/filebeat/input/awss3/s3_objects.go b/x-pack/filebeat/input/awss3/s3_objects.go index 3b76eb8f6c8..5d73d8980fd 100644 --- a/x-pack/filebeat/input/awss3/s3_objects.go +++ b/x-pack/filebeat/input/awss3/s3_objects.go @@ -177,7 +177,8 @@ func (p *s3ObjectProcessor) ProcessS3Object(log *logp.Logger, eventCallback func return err } evt := p.createEvent(string(data), evtOffset) - p.publish(p.acker, &evt) + + p.eventCallback(evt) } case decoder: diff --git a/x-pack/filebeat/input/awss3/sqs_input.go b/x-pack/filebeat/input/awss3/sqs_input.go index 85ead4d68f0..a4308af45a8 100644 --- a/x-pack/filebeat/input/awss3/sqs_input.go +++ b/x-pack/filebeat/input/awss3/sqs_input.go @@ -49,7 +49,7 @@ func newSQSReaderInput(config config, awsConfig awssdk.Config) *sqsReaderInput { return &sqsReaderInput{ config: config, awsConfig: awsConfig, - workRequestChan: make(chan struct{}, config.MaxNumberOfMessages), + workRequestChan: make(chan struct{}, config.NumberOfWorkers), workResponseChan: make(chan types.Message), } } @@ -109,7 +109,7 @@ func (in *sqsReaderInput) setup( in.s3 = newAWSs3API(s3.NewFromConfig(in.awsConfig, in.config.s3ConfigModifier)) - in.metrics = newInputMetrics(inputContext.ID, nil, in.config.MaxNumberOfMessages) + in.metrics = newInputMetrics(inputContext.ID, nil, in.config.NumberOfWorkers) var err error in.msgHandler, err = in.createEventProcessor() @@ -236,7 +236,7 @@ func (w *sqsWorker) processMessage(ctx context.Context, msg types.Message) { func (in *sqsReaderInput) startWorkers(ctx context.Context) { // Start the worker goroutines that will fetch messages via workRequestChan // and workResponseChan until the input shuts down. - for i := 0; i < in.config.MaxNumberOfMessages; i++ { + for i := 0; i < in.config.NumberOfWorkers; i++ { in.workerWg.Add(1) go func() { defer in.workerWg.Done() @@ -258,7 +258,7 @@ func (in *sqsReaderInput) logConfigSummary() { log.Warnf("configured region disagrees with queue_url region (%q != %q): using %q", in.awsConfig.Region, in.detectedRegion, in.awsConfig.Region) } log.Infof("AWS SQS visibility_timeout is set to %v.", in.config.VisibilityTimeout) - log.Infof("AWS SQS max_number_of_messages is set to %v.", in.config.MaxNumberOfMessages) + log.Infof("AWS SQS number_of_workers is set to %v.", in.config.NumberOfWorkers) if in.config.BackupConfig.GetBucketName() != "" { log.Warnf("You have the backup_to_bucket functionality activated with SQS. Please make sure to set appropriate destination buckets " + diff --git a/x-pack/filebeat/input/awss3/sqs_test.go b/x-pack/filebeat/input/awss3/sqs_test.go index 9691763903d..eec2b67ec2c 100644 --- a/x-pack/filebeat/input/awss3/sqs_test.go +++ b/x-pack/filebeat/input/awss3/sqs_test.go @@ -33,7 +33,7 @@ func TestSQSReceiver(t *testing.T) { err := logp.TestingSetup() require.NoError(t, err) - const maxMessages = 5 + const workerCount = 5 t.Run("ReceiveMessage success", func(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), testTimeout) @@ -79,7 +79,7 @@ func TestSQSReceiver(t *testing.T) { Return(nil) // Execute sqsReader and verify calls/state. - sqsReader := newSQSReaderInput(config{MaxNumberOfMessages: maxMessages}, aws.Config{}) + sqsReader := newSQSReaderInput(config{NumberOfWorkers: workerCount}, aws.Config{}) sqsReader.log = logp.NewLogger(inputName) sqsReader.sqs = mockSQS sqsReader.msgHandler = mockMsgHandler @@ -120,7 +120,7 @@ func TestSQSReceiver(t *testing.T) { }).AnyTimes() // Execute SQSReader and verify calls/state. - sqsReader := newSQSReaderInput(config{MaxNumberOfMessages: maxMessages}, aws.Config{}) + sqsReader := newSQSReaderInput(config{NumberOfWorkers: workerCount}, aws.Config{}) sqsReader.log = logp.NewLogger(inputName) sqsReader.sqs = mockSQS sqsReader.msgHandler = mockMsgHandler diff --git a/x-pack/filebeat/module/aws/cloudtrail/config/aws-s3.yml b/x-pack/filebeat/module/aws/cloudtrail/config/aws-s3.yml index ada3a502fc2..0f395737a05 100644 --- a/x-pack/filebeat/module/aws/cloudtrail/config/aws-s3.yml +++ b/x-pack/filebeat/module/aws/cloudtrail/config/aws-s3.yml @@ -77,10 +77,6 @@ role_arn: {{ .role_arn }} fips_enabled: {{ .fips_enabled }} {{ end }} -{{ if .max_number_of_messages }} -max_number_of_messages: {{ .max_number_of_messages }} -{{ end }} - {{ if .proxy_url }} proxy_url: {{ .proxy_url }} {{ end }} diff --git a/x-pack/filebeat/module/aws/cloudtrail/manifest.yml b/x-pack/filebeat/module/aws/cloudtrail/manifest.yml index f19760eb637..84e6d906037 100644 --- a/x-pack/filebeat/module/aws/cloudtrail/manifest.yml +++ b/x-pack/filebeat/module/aws/cloudtrail/manifest.yml @@ -28,7 +28,6 @@ var: default: true - name: fips_enabled - name: proxy_url - - name: max_number_of_messages - name: ssl ingest_pipeline: ingest/pipeline.yml diff --git a/x-pack/filebeat/module/aws/s3access/config/aws-s3.yml b/x-pack/filebeat/module/aws/s3access/config/aws-s3.yml index 8ce1970290d..4c026080925 100644 --- a/x-pack/filebeat/module/aws/s3access/config/aws-s3.yml +++ b/x-pack/filebeat/module/aws/s3access/config/aws-s3.yml @@ -62,10 +62,6 @@ role_arn: {{ .role_arn }} fips_enabled: {{ .fips_enabled }} {{ end }} -{{ if .max_number_of_messages }} -max_number_of_messages: {{ .max_number_of_messages }} -{{ end }} - {{ if .proxy_url }} proxy_url: {{ .proxy_url }} {{ end }} diff --git a/x-pack/filebeat/module/aws/s3access/manifest.yml b/x-pack/filebeat/module/aws/s3access/manifest.yml index e52ba673757..dc17d116928 100644 --- a/x-pack/filebeat/module/aws/s3access/manifest.yml +++ b/x-pack/filebeat/module/aws/s3access/manifest.yml @@ -22,7 +22,6 @@ var: default: [forwarded] - name: fips_enabled - name: proxy_url - - name: max_number_of_messages - name: ssl ingest_pipeline: ingest/pipeline.yml diff --git a/x-pack/filebeat/module/aws/vpcflow/config/input.yml b/x-pack/filebeat/module/aws/vpcflow/config/input.yml index ecb1842be7a..34feb9880b6 100644 --- a/x-pack/filebeat/module/aws/vpcflow/config/input.yml +++ b/x-pack/filebeat/module/aws/vpcflow/config/input.yml @@ -64,10 +64,6 @@ role_arn: {{ .role_arn }} fips_enabled: {{ .fips_enabled }} {{ end }} -{{ if .max_number_of_messages }} -max_number_of_messages: {{ .max_number_of_messages }} -{{ end }} - {{ if .proxy_url }} proxy_url: {{ .proxy_url }} {{ end }} diff --git a/x-pack/filebeat/module/aws/vpcflow/manifest.yml b/x-pack/filebeat/module/aws/vpcflow/manifest.yml index de772408a86..0787eb019b7 100644 --- a/x-pack/filebeat/module/aws/vpcflow/manifest.yml +++ b/x-pack/filebeat/module/aws/vpcflow/manifest.yml @@ -22,7 +22,6 @@ var: default: [forwarded, preserve_original_event] - name: fips_enabled - name: proxy_url - - name: max_number_of_messages - name: ssl - name: format default: From 6fe1183b68e68dc329a61ed3c33782c0dbc600ab Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Mon, 14 Oct 2024 18:41:54 -0400 Subject: [PATCH 19/30] remove helper that was replaced with shared lib --- libbeat/common/fifo/fifo.go | 73 ------------------------------------- 1 file changed, 73 deletions(-) delete mode 100644 libbeat/common/fifo/fifo.go diff --git a/libbeat/common/fifo/fifo.go b/libbeat/common/fifo/fifo.go deleted file mode 100644 index ceedf7412dc..00000000000 --- a/libbeat/common/fifo/fifo.go +++ /dev/null @@ -1,73 +0,0 @@ -// Licensed to Elasticsearch B.V. under one or more contributor -// license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright -// ownership. Elasticsearch B.V. licenses this file to you under -// the Apache License, Version 2.0 (the "License"); you may -// not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package fifo - -// FIFO is a minimal first-in first-out queue based on a singly linked list. -type FIFO[T any] struct { - first *node[T] - last *node[T] -} - -type node[T any] struct { - next *node[T] - value T -} - -func (f *FIFO[T]) Add(value T) { - newNode := &node[T]{value: value} - if f.first == nil { - f.first = newNode - } else { - f.last.next = newNode - } - f.last = newNode -} - -func (f *FIFO[T]) Empty() bool { - return f.first == nil -} - -// Return the first value (if present) without removing it from the queue. -// If the queue is empty a default value is returned, to detect this case -// use f.Empty(). -func (f *FIFO[T]) First() T { - if f.first == nil { - var none T - return none - } - return f.first.value -} - -// Return the first value (if present) and remove it from the queue. -// If the queue is empty a default value is returned, to detect this case -// use f.Empty(). -func (f *FIFO[T]) ConsumeFirst() T { - result := f.First() - f.Remove() - return result -} - -// Remove the first entry in the queue. Does nothing if the FIFO is empty. -func (f *FIFO[T]) Remove() { - if f.first != nil { - f.first = f.first.next - if f.first == nil { - f.last = nil - } - } -} From ba6ecfe50c39c11869f816d0f8faddefef776e3c Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Mon, 14 Oct 2024 18:52:36 -0400 Subject: [PATCH 20/30] make check --- x-pack/filebeat/filebeat.reference.yml | 312 +++++++++++++------------ 1 file changed, 165 insertions(+), 147 deletions(-) diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml index ed565d6747d..0229a1a2fd0 100644 --- a/x-pack/filebeat/filebeat.reference.yml +++ b/x-pack/filebeat/filebeat.reference.yml @@ -111,7 +111,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket or SQS queue + # Number of workers on S3 bucket #var.number_of_workers: 5 # Process CloudTrail logs @@ -160,6 +160,9 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false + # The maximum number of messages to return from SQS. Valid values: 1 to 10. + #var.max_number_of_messages: 5 + # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -181,7 +184,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket or SQS queue + # Number of workers on S3 bucket #var.number_of_workers: 5 # Filename of AWS credential file @@ -218,6 +221,9 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false + # The maximum number of messages to return from SQS. Valid values: 1 to 10. + #var.max_number_of_messages: 5 + # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -239,7 +245,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket or SQS queue + # Number of workers on S3 bucket #var.number_of_workers: 5 # Filename of AWS credential file @@ -276,6 +282,9 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false + # The maximum number of messages to return from SQS. Valid values: 1 to 10. + #var.max_number_of_messages: 5 + # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -297,7 +306,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket or SQS queue + # Number of workers on S3 bucket #var.number_of_workers: 5 # Filename of AWS credential file @@ -334,6 +343,9 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false + # The maximum number of messages to return from SQS. Valid values: 1 to 10. + #var.max_number_of_messages: 5 + # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -355,7 +367,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket or SQS queue + # Number of workers on S3 bucket #var.number_of_workers: 5 # Filename of AWS credential file @@ -392,6 +404,9 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false + # The maximum number of messages to return from SQS. Valid values: 1 to 10. + #var.max_number_of_messages: 5 + # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -413,7 +428,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket or SQS queue + # Number of workers on S3 bucket #var.number_of_workers: 5 # Filename of AWS credential file @@ -450,6 +465,9 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false + # The maximum number of messages to return from SQS. Valid values: 1 to 10. + #var.max_number_of_messages: 5 + # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -1482,147 +1500,147 @@ filebeat.modules: #var.password: #------------------------------ Salesforce Module ------------------------------ -# Configuration file for Salesforce module in Filebeat - -# Common Configurations: -# - enabled: Set to true to enable ingestion of Salesforce module fileset -# - initial_interval: Initial interval for log collection. This setting determines the time period for which the logs will be initially collected when the ingestion process starts, i.e. 1d/h/m/s -# - api_version: API version for Salesforce, version should be greater than 46.0 - -# Authentication Configurations: -# User-Password Authentication: -# - enabled: Set to true to enable user-password authentication -# - client.id: Client ID for user-password authentication -# - client.secret: Client secret for user-password authentication -# - token_url: Token URL for user-password authentication -# - username: Username for user-password authentication -# - password: Password for user-password authentication - -# JWT Authentication: -# - enabled: Set to true to enable JWT authentication -# - client.id: Client ID for JWT authentication -# - client.username: Username for JWT authentication -# - client.key_path: Path to client key for JWT authentication -# - url: Audience URL for JWT authentication - -# Event Monitoring: -# - real_time: Set to true to enable real-time logging using object type data collection -# - real_time_interval: Interval for real-time logging - -# Event Log File: -# - event_log_file: Set to true to enable event log file type data collection -# - elf_interval: Interval for event log file -# - log_file_interval: Interval type for log file collection, either Hourly or Daily - -- module: salesforce - - apex: - enabled: false - var.initial_interval: 1d - var.api_version: 56 - - var.authentication: - user_password_flow: - enabled: true - client.id: "" - client.secret: "" - token_url: "" - username: "" - password: "" - jwt_bearer_flow: - enabled: false - client.id: "" - client.username: "" - client.key_path: "" - url: "https://login.salesforce.com" - - var.url: "https://instance_id.my.salesforce.com" - - var.event_log_file: true - var.elf_interval: 1h - var.log_file_interval: "Hourly" - - login: - enabled: false - var.initial_interval: 1d - var.api_version: 56 - - var.authentication: - user_password_flow: - enabled: true - client.id: "" - client.secret: "client-secret" - token_url: "" - username: "" - password: "" - jwt_bearer_flow: - enabled: false - client.id: "" - client.username: "" - client.key_path: "" - url: "https://login.salesforce.com" - - var.url: "https://instance_id.my.salesforce.com" - - var.event_log_file: true - var.elf_interval: 1h - var.log_file_interval: "Hourly" - - var.real_time: true - var.real_time_interval: 5m - - logout: - enabled: false - var.initial_interval: 1d - var.api_version: 56 - - var.authentication: - user_password_flow: - enabled: true - client.id: "" - client.secret: "client-secret" - token_url: "" - username: "" - password: "" - jwt_bearer_flow: - enabled: false - client.id: "" - client.username: "" - client.key_path: "" - url: "https://login.salesforce.com" - - var.url: "https://instance_id.my.salesforce.com" - - var.event_log_file: true - var.elf_interval: 1h - var.log_file_interval: "Hourly" - - var.real_time: true - var.real_time_interval: 5m - - setupaudittrail: - enabled: false - var.initial_interval: 1d - var.api_version: 56 - - var.authentication: - user_password_flow: - enabled: true - client.id: "" - client.secret: "client-secret" - token_url: "" - username: "" - password: "" - jwt_bearer_flow: - enabled: false - client.id: "" - client.username: "" - client.key_path: "" - url: "https://login.salesforce.com" - - var.url: "https://instance_id.my.salesforce.com" - - var.real_time: true +# Configuration file for Salesforce module in Filebeat + +# Common Configurations: +# - enabled: Set to true to enable ingestion of Salesforce module fileset +# - initial_interval: Initial interval for log collection. This setting determines the time period for which the logs will be initially collected when the ingestion process starts, i.e. 1d/h/m/s +# - api_version: API version for Salesforce, version should be greater than 46.0 + +# Authentication Configurations: +# User-Password Authentication: +# - enabled: Set to true to enable user-password authentication +# - client.id: Client ID for user-password authentication +# - client.secret: Client secret for user-password authentication +# - token_url: Token URL for user-password authentication +# - username: Username for user-password authentication +# - password: Password for user-password authentication + +# JWT Authentication: +# - enabled: Set to true to enable JWT authentication +# - client.id: Client ID for JWT authentication +# - client.username: Username for JWT authentication +# - client.key_path: Path to client key for JWT authentication +# - url: Audience URL for JWT authentication + +# Event Monitoring: +# - real_time: Set to true to enable real-time logging using object type data collection +# - real_time_interval: Interval for real-time logging + +# Event Log File: +# - event_log_file: Set to true to enable event log file type data collection +# - elf_interval: Interval for event log file +# - log_file_interval: Interval type for log file collection, either Hourly or Daily + +- module: salesforce + + apex: + enabled: false + var.initial_interval: 1d + var.api_version: 56 + + var.authentication: + user_password_flow: + enabled: true + client.id: "" + client.secret: "" + token_url: "" + username: "" + password: "" + jwt_bearer_flow: + enabled: false + client.id: "" + client.username: "" + client.key_path: "" + url: "https://login.salesforce.com" + + var.url: "https://instance_id.my.salesforce.com" + + var.event_log_file: true + var.elf_interval: 1h + var.log_file_interval: "Hourly" + + login: + enabled: false + var.initial_interval: 1d + var.api_version: 56 + + var.authentication: + user_password_flow: + enabled: true + client.id: "" + client.secret: "client-secret" + token_url: "" + username: "" + password: "" + jwt_bearer_flow: + enabled: false + client.id: "" + client.username: "" + client.key_path: "" + url: "https://login.salesforce.com" + + var.url: "https://instance_id.my.salesforce.com" + + var.event_log_file: true + var.elf_interval: 1h + var.log_file_interval: "Hourly" + + var.real_time: true + var.real_time_interval: 5m + + logout: + enabled: false + var.initial_interval: 1d + var.api_version: 56 + + var.authentication: + user_password_flow: + enabled: true + client.id: "" + client.secret: "client-secret" + token_url: "" + username: "" + password: "" + jwt_bearer_flow: + enabled: false + client.id: "" + client.username: "" + client.key_path: "" + url: "https://login.salesforce.com" + + var.url: "https://instance_id.my.salesforce.com" + + var.event_log_file: true + var.elf_interval: 1h + var.log_file_interval: "Hourly" + + var.real_time: true + var.real_time_interval: 5m + + setupaudittrail: + enabled: false + var.initial_interval: 1d + var.api_version: 56 + + var.authentication: + user_password_flow: + enabled: true + client.id: "" + client.secret: "client-secret" + token_url: "" + username: "" + password: "" + jwt_bearer_flow: + enabled: false + client.id: "" + client.username: "" + client.key_path: "" + url: "https://login.salesforce.com" + + var.url: "https://instance_id.my.salesforce.com" + + var.real_time: true var.real_time_interval: 5m #----------------------------- Google Santa Module ----------------------------- - module: santa From f1732f8e3d50e2fea35210b3597bda1601cc9625 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Mon, 14 Oct 2024 18:54:37 -0400 Subject: [PATCH 21/30] re-correct doc yml --- x-pack/filebeat/module/aws/_meta/config.yml | 30 +++++---------------- 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/x-pack/filebeat/module/aws/_meta/config.yml b/x-pack/filebeat/module/aws/_meta/config.yml index e92cb36e7b5..da0377b6e46 100644 --- a/x-pack/filebeat/module/aws/_meta/config.yml +++ b/x-pack/filebeat/module/aws/_meta/config.yml @@ -14,7 +14,7 @@ # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Process CloudTrail logs @@ -63,9 +63,6 @@ # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -87,7 +84,7 @@ # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -124,9 +121,6 @@ # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -148,7 +142,7 @@ # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -185,9 +179,6 @@ # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -209,7 +200,7 @@ # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -246,9 +237,6 @@ # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -270,7 +258,7 @@ # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -307,9 +295,6 @@ # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -331,7 +316,7 @@ # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -368,9 +353,6 @@ # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 From 83ba548f203c1f72fc76ff81a92ba34cf825de3c Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Mon, 14 Oct 2024 19:02:37 -0400 Subject: [PATCH 22/30] make check again --- x-pack/filebeat/filebeat.reference.yml | 30 +++++----------------- x-pack/filebeat/modules.d/aws.yml.disabled | 30 +++++----------------- 2 files changed, 12 insertions(+), 48 deletions(-) diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml index 0229a1a2fd0..beb92b51ef7 100644 --- a/x-pack/filebeat/filebeat.reference.yml +++ b/x-pack/filebeat/filebeat.reference.yml @@ -111,7 +111,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Process CloudTrail logs @@ -160,9 +160,6 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -184,7 +181,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -221,9 +218,6 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -245,7 +239,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -282,9 +276,6 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -306,7 +297,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -343,9 +334,6 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -367,7 +355,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -404,9 +392,6 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -428,7 +413,7 @@ filebeat.modules: # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -465,9 +450,6 @@ filebeat.modules: # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 diff --git a/x-pack/filebeat/modules.d/aws.yml.disabled b/x-pack/filebeat/modules.d/aws.yml.disabled index c730b8aea07..44d5e768ddc 100644 --- a/x-pack/filebeat/modules.d/aws.yml.disabled +++ b/x-pack/filebeat/modules.d/aws.yml.disabled @@ -17,7 +17,7 @@ # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Process CloudTrail logs @@ -66,9 +66,6 @@ # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -90,7 +87,7 @@ # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -127,9 +124,6 @@ # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -151,7 +145,7 @@ # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -188,9 +182,6 @@ # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -212,7 +203,7 @@ # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -249,9 +240,6 @@ # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -273,7 +261,7 @@ # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -310,9 +298,6 @@ # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 @@ -334,7 +319,7 @@ # Bucket list interval on S3 bucket #var.bucket_list_interval: 300s - # Number of workers on S3 bucket + # Number of workers on S3 bucket or SQS queue #var.number_of_workers: 5 # Filename of AWS credential file @@ -371,9 +356,6 @@ # Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. #var.fips_enabled: false - # The maximum number of messages to return from SQS. Valid values: 1 to 10. - #var.max_number_of_messages: 5 - # URL to proxy AWS API calls #var.proxy_url: http://proxy:3128 From 577759cc6e48ce9b38a2635103ecc6b6c393616e Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Mon, 14 Oct 2024 19:05:01 -0400 Subject: [PATCH 23/30] update changelog --- CHANGELOG.next.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 8f1d5dfab85..65aa9983422 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -45,6 +45,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - Redis: Added replication role as a field to submitted slowlogs - Added `container.image.name` to `journald` Filebeat input's Docker-specific translated fields. {pull}40450[40450] - Remove deprecated awscloudwatch field from Filebeat. {pull}41089[41089] +- `max_number_of_messages` config for S3 input's SQS mode is now ignored. Instead use `number_of_workers` to scale ingestion rate in both S3 and SQS modes. {pull}40699[40699] *Heartbeat* From a743c08cb9c256cfeba6522ef29d8fe5b0c1a26e Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Mon, 14 Oct 2024 19:29:17 -0400 Subject: [PATCH 24/30] make update --- NOTICE.txt | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/NOTICE.txt b/NOTICE.txt index bb5807f9a41..4447873499f 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -23112,6 +23112,38 @@ Contents of probable licence file $GOMODCACHE/github.com/xdg-go/scram@v1.1.2/LIC of your accepting any such warranty or additional liability. +-------------------------------------------------------------------------------- +Dependency : github.com/zyedidia/generic +Version: v1.2.1 +Licence type (autodetected): MIT +-------------------------------------------------------------------------------- + +Contents of probable licence file $GOMODCACHE/github.com/zyedidia/generic@v1.2.1/LICENSE: + +MIT License + +Copyright (c) 2021: Zachary Yedidia. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------------- Dependency : go.elastic.co/apm/module/apmelasticsearch/v2 Version: v2.6.0 From 48025b5ac21c15f5b28d43133470e27ef3acaab7 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Tue, 15 Oct 2024 15:24:07 -0400 Subject: [PATCH 25/30] linter / CI fixes --- x-pack/filebeat/input/awss3/config.go | 1 + x-pack/filebeat/input/awss3/s3.go | 1 + x-pack/filebeat/input/awss3/s3_objects.go | 2 +- .../filebeat/input/awss3/s3_objects_test.go | 23 ++++--------- x-pack/filebeat/input/awss3/sqs_s3_event.go | 8 +++-- x-pack/filebeat/input/awss3/sqs_test.go | 32 +++++++++++++++++-- 6 files changed, 44 insertions(+), 23 deletions(-) diff --git a/x-pack/filebeat/input/awss3/config.go b/x-pack/filebeat/input/awss3/config.go index 44664810e85..d80108590ce 100644 --- a/x-pack/filebeat/input/awss3/config.go +++ b/x-pack/filebeat/input/awss3/config.go @@ -246,6 +246,7 @@ func (c config) getBucketARN() string { // Should be provided as a parameter to s3.NewFromConfig. func (c config) s3ConfigModifier(o *s3.Options) { if c.NonAWSBucketName != "" { + //nolint:staticcheck // haven't migrated to the new interface yet o.EndpointResolver = nonAWSBucketResolver{endpoint: c.AWSConfig.Endpoint} } diff --git a/x-pack/filebeat/input/awss3/s3.go b/x-pack/filebeat/input/awss3/s3.go index e16d45bf84f..9901d5fe41d 100644 --- a/x-pack/filebeat/input/awss3/s3.go +++ b/x-pack/filebeat/input/awss3/s3.go @@ -116,5 +116,6 @@ type nonAWSBucketResolver struct { } func (n nonAWSBucketResolver) ResolveEndpoint(region string, options s3.EndpointResolverOptions) (awssdk.Endpoint, error) { + //nolint:staticcheck // haven't migrated to the new interface yet return awssdk.Endpoint{URL: n.endpoint, SigningRegion: region, HostnameImmutable: true, Source: awssdk.EndpointSourceCustom}, nil } diff --git a/x-pack/filebeat/input/awss3/s3_objects.go b/x-pack/filebeat/input/awss3/s3_objects.go index 5d73d8980fd..93219d9a640 100644 --- a/x-pack/filebeat/input/awss3/s3_objects.go +++ b/x-pack/filebeat/input/awss3/s3_objects.go @@ -167,7 +167,7 @@ func (p *s3ObjectProcessor) ProcessS3Object(log *logp.Logger, eventCallback func for dec.next() { val, err := dec.decodeValue() if err != nil { - if err == io.EOF { + if errors.Is(err, io.EOF) { return nil } break diff --git a/x-pack/filebeat/input/awss3/s3_objects_test.go b/x-pack/filebeat/input/awss3/s3_objects_test.go index 45dd03c47c4..d20d81ced6c 100644 --- a/x-pack/filebeat/input/awss3/s3_objects_test.go +++ b/x-pack/filebeat/input/awss3/s3_objects_test.go @@ -22,7 +22,6 @@ import ( "github.com/stretchr/testify/require" "github.com/elastic/beats/v7/libbeat/beat" - awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws" conf "github.com/elastic/elastic-agent-libs/config" "github.com/elastic/elastic-agent-libs/logp" ) @@ -187,22 +186,20 @@ func TestS3ObjectProcessor(t *testing.T) { ctrl, ctx := gomock.WithContext(ctx, t) defer ctrl.Finish() mockS3API := NewMockS3API(ctrl) - mockPublisher := NewMockBeatClient(ctrl) s3Event, s3Resp := newS3Object(t, "testdata/log.txt", "") - var events []beat.Event gomock.InOrder( mockS3API.EXPECT(). GetObject(gomock.Any(), gomock.Eq("us-east-1"), gomock.Eq(s3Event.S3.Bucket.Name), gomock.Eq(s3Event.S3.Object.Key)). Return(s3Resp, nil), - mockPublisher.EXPECT(). - Publish(gomock.Any()). - Do(func(event beat.Event) { events = append(events, event) }). - Times(2), ) + var events []beat.Event s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, nil, backupConfig{}) - err := s3ObjProc.Create(ctx, s3Event).ProcessS3Object(logp.NewLogger(inputName), func(_ beat.Event) {}) + err := s3ObjProc.Create(ctx, s3Event).ProcessS3Object(logp.NewLogger(inputName), func(event beat.Event) { + events = append(events, event) + }) + assert.Equal(t, 2, len(events)) require.NoError(t, err) }) @@ -309,7 +306,6 @@ func _testProcessS3Object(t testing.TB, file, contentType string, numEvents int, ctrl, ctx := gomock.WithContext(ctx, t) defer ctrl.Finish() mockS3API := NewMockS3API(ctrl) - mockPublisher := NewMockBeatClient(ctrl) s3Event, s3Resp := newS3Object(t, file, contentType) var events []beat.Event @@ -317,21 +313,16 @@ func _testProcessS3Object(t testing.TB, file, contentType string, numEvents int, mockS3API.EXPECT(). GetObject(gomock.Any(), gomock.Eq("us-east-1"), gomock.Eq(s3Event.S3.Bucket.Name), gomock.Eq(s3Event.S3.Object.Key)). Return(s3Resp, nil), - mockPublisher.EXPECT(). - Publish(gomock.Any()). - Do(func(event beat.Event) { events = append(events, event) }). - Times(numEvents), ) s3ObjProc := newS3ObjectProcessorFactory(nil, mockS3API, selectors, backupConfig{}) - ack := awscommon.NewEventACKTracker(ctx) err := s3ObjProc.Create(ctx, s3Event).ProcessS3Object( - logp.NewLogger(inputName), func(_ beat.Event) {}) + logp.NewLogger(inputName), + func(event beat.Event) { events = append(events, event) }) if !expectErr { require.NoError(t, err) assert.Equal(t, numEvents, len(events)) - assert.EqualValues(t, numEvents, ack.PendingACKs) } else { require.Error(t, err) } diff --git a/x-pack/filebeat/input/awss3/sqs_s3_event.go b/x-pack/filebeat/input/awss3/sqs_s3_event.go index 96ab9f1d42f..884cf7adbbc 100644 --- a/x-pack/filebeat/input/awss3/sqs_s3_event.go +++ b/x-pack/filebeat/input/awss3/sqs_s3_event.go @@ -151,7 +151,6 @@ func newSQSS3EventProcessor( } type sqsProcessingResult struct { - log *logp.Logger processor *sqsS3EventProcessor msg *types.Message receiveCount int // How many times this SQS object has been read @@ -201,7 +200,6 @@ func (p *sqsS3EventProcessor) ProcessSQS(ctx context.Context, msg *types.Message }) return sqsProcessingResult{ - log: p.log, msg: msg, processor: p, receiveCount: receiveCount, @@ -228,7 +226,11 @@ func (r sqsProcessingResult) Done() { p.log.Errorf("failed deleting message from SQS queue (it may be reprocessed): %v", msgDelErr.Error()) return } - p.metrics.sqsMessagesDeletedTotal.Inc() + if p.metrics != nil { + // This nil check always passes in production, but it's nice when unit + // tests don't have to initialize irrelevant fields + p.metrics.sqsMessagesDeletedTotal.Inc() + } // SQS message finished and deleted, finalize s3 objects if finalizeErr := r.finalizeS3Objects(); finalizeErr != nil { p.log.Errorf("failed finalizing message from SQS queue (manual cleanup is required): %v", finalizeErr.Error()) diff --git a/x-pack/filebeat/input/awss3/sqs_test.go b/x-pack/filebeat/input/awss3/sqs_test.go index eec2b67ec2c..ab347bd2d29 100644 --- a/x-pack/filebeat/input/awss3/sqs_test.go +++ b/x-pack/filebeat/input/awss3/sqs_test.go @@ -19,6 +19,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/elastic/beats/v7/libbeat/beat" "github.com/elastic/elastic-agent-libs/logp" ) @@ -72,19 +73,43 @@ func TestSQSReceiver(t *testing.T) { return map[string]string{sqsApproximateNumberOfMessages: "10000"}, nil }).AnyTimes() + mockSQS.EXPECT(). + DeleteMessage(gomock.Any(), gomock.Any()).Times(1).Do( + func(_ context.Context, _ *types.Message) { + cancel() + }) + + logger := logp.NewLogger(inputName) + // Expect the one message returned to have been processed. mockMsgHandler.EXPECT(). ProcessSQS(gomock.Any(), gomock.Eq(&msg), gomock.Any()). Times(1). - Return(nil) + DoAndReturn( + func(_ context.Context, _ *types.Message, _ func(e beat.Event)) sqsProcessingResult { + return sqsProcessingResult{ + keepaliveCancel: func() {}, + processor: &sqsS3EventProcessor{ + log: logger, + sqs: mockSQS, + }, + } + }) // Execute sqsReader and verify calls/state. sqsReader := newSQSReaderInput(config{NumberOfWorkers: workerCount}, aws.Config{}) - sqsReader.log = logp.NewLogger(inputName) + sqsReader.log = logger sqsReader.sqs = mockSQS - sqsReader.msgHandler = mockMsgHandler sqsReader.metrics = newInputMetrics("", nil, 0) + sqsReader.pipeline = &fakePipeline{} + sqsReader.msgHandler = mockMsgHandler sqsReader.run(ctx) + + select { + case <-ctx.Done(): + case <-time.After(time.Second): + require.Fail(t, "Never observed SQS DeleteMessage call") + } }) t.Run("retry after ReceiveMessage error", func(t *testing.T) { @@ -125,6 +150,7 @@ func TestSQSReceiver(t *testing.T) { sqsReader.sqs = mockSQS sqsReader.msgHandler = mockMsgHandler sqsReader.metrics = newInputMetrics("", nil, 0) + sqsReader.pipeline = &fakePipeline{} sqsReader.run(ctx) }) } From 10ecfd3358c2eae75ef073a67b53eb2c90281672 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Tue, 15 Oct 2024 16:39:36 -0400 Subject: [PATCH 26/30] Fix benchmark test ack counting --- x-pack/filebeat/input/awss3/acks.go | 4 +++- .../input/awss3/input_benchmark_test.go | 22 ++++++++++++++----- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/x-pack/filebeat/input/awss3/acks.go b/x-pack/filebeat/input/awss3/acks.go index f4817f06db8..a3850c01e87 100644 --- a/x-pack/filebeat/input/awss3/acks.go +++ b/x-pack/filebeat/input/awss3/acks.go @@ -93,7 +93,9 @@ func (ah *awsACKHandler) run() { // Run finalization asynchronously so we don't block the SQS worker // or the queue by ignoring the ack handler's input channels. Ordering // is no longer important at this point. - go result.ackCallback() + if result.ackCallback != nil { + go result.ackCallback() + } } // If the input is closed and all acks are completed, we're done diff --git a/x-pack/filebeat/input/awss3/input_benchmark_test.go b/x-pack/filebeat/input/awss3/input_benchmark_test.go index ce3708fb16b..6d8288ceafa 100644 --- a/x-pack/filebeat/input/awss3/input_benchmark_test.go +++ b/x-pack/filebeat/input/awss3/input_benchmark_test.go @@ -164,10 +164,18 @@ func (c constantS3) ListObjectsPaginator(string, string) s3Pager { var _ beat.Pipeline = (*fakePipeline)(nil) // fakePipeline returns new ackClients. -type fakePipeline struct{} +type fakePipeline struct { + ackHandler *awsACKHandler +} + +func newFakePipeline() *fakePipeline { + return &fakePipeline{ackHandler: newAWSACKHandler()} +} func (c *fakePipeline) ConnectWith(beat.ClientConfig) (beat.Client, error) { - return &ackClient{}, nil + return &ackClient{ + ackHandler: c.ackHandler, + }, nil } func (c *fakePipeline) Connect() (beat.Client, error) { @@ -177,13 +185,14 @@ func (c *fakePipeline) Connect() (beat.Client, error) { var _ beat.Client = (*ackClient)(nil) // ackClient is a fake beat.Client that ACKs the published messages. -type ackClient struct{} +type ackClient struct { + ackHandler *awsACKHandler +} func (c *ackClient) Close() error { return nil } func (c *ackClient) Publish(event beat.Event) { - // Fake the ACK handling. - event.Private.(*awscommon.EventACKTracker).ACK() + c.ackHandler.Add(1, nil) } func (c *ackClient) PublishAll(event []beat.Event) { @@ -216,6 +225,7 @@ func benchmarkInputSQS(t *testing.T, workerCount int) testing.BenchmarkResult { config.NumberOfWorkers = workerCount sqsReader := newSQSReaderInput(config, aws.Config{}) sqsReader.log = log.Named("sqs") + sqsReader.pipeline = newFakePipeline() sqsReader.metrics = newInputMetrics("test_id", monitoring.NewRegistry(), workerCount) sqsReader.sqs, err = newConstantSQS() require.NoError(t, err) @@ -306,7 +316,7 @@ func benchmarkInputS3(t *testing.T, numberOfWorkers int) testing.BenchmarkResult client := pubtest.NewChanClientWithCallback(100, func(event beat.Event) { event.Private.(*awscommon.EventACKTracker).ACK() }) - pipeline := pubtest.PublisherWithClient(client) + pipeline := newFakePipeline() defer func() { _ = client.Close() From 7a38f55b639f8d5e1f2ab9a79435703952e6aa12 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Tue, 15 Oct 2024 16:58:27 -0400 Subject: [PATCH 27/30] edit changelog --- CHANGELOG.next.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index ec38f36931d..45ad14f6029 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -46,7 +46,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - Added `container.image.name` to `journald` Filebeat input's Docker-specific translated fields. {pull}40450[40450] - Change log.file.path field in awscloudwatch input to nested object. {pull}41099[41099] - Remove deprecated awscloudwatch field from Filebeat. {pull}41089[41089] -- `max_number_of_messages` config for S3 input's SQS mode is now ignored. Instead use `number_of_workers` to scale ingestion rate in both S3 and SQS modes. {pull}40699[40699] +- The performance of ingesting SQS data with the S3 input has improved by up to 60x for queues with many small events. `max_number_of_messages` config for SQS mode is now ignored, as the new design no longer needs a manual cap on messages. Instead, use `number_of_workers` to scale ingestion rate in both S3 and SQS modes. The increased efficiency may increase network bandwidth consumption, which can be throttled by lowering `number_of_workers`. It may also increase number of events stored in memory, which can be throttled by lowering the configured size of the internal queue. {pull}40699[40699] - System module events now contain `input.type: systemlogs` instead of `input.type: log` when harvesting log files. {pull}41061[41061] From 84cb4224fbc7fdffafd38ebfb6e947c80c6f1ed2 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Tue, 15 Oct 2024 19:00:42 -0400 Subject: [PATCH 28/30] more mock test fixes --- .../input/awss3/input_benchmark_test.go | 22 +++++-------------- x-pack/filebeat/input/awss3/s3_input.go | 4 ++-- x-pack/filebeat/input/awss3/s3_test.go | 8 +++---- 3 files changed, 12 insertions(+), 22 deletions(-) diff --git a/x-pack/filebeat/input/awss3/input_benchmark_test.go b/x-pack/filebeat/input/awss3/input_benchmark_test.go index 6d8288ceafa..54e22773602 100644 --- a/x-pack/filebeat/input/awss3/input_benchmark_test.go +++ b/x-pack/filebeat/input/awss3/input_benchmark_test.go @@ -27,8 +27,6 @@ import ( "github.com/dustin/go-humanize" "github.com/olekukonko/tablewriter" - pubtest "github.com/elastic/beats/v7/libbeat/publisher/testing" - awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws" conf "github.com/elastic/elastic-agent-libs/config" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/monitoring" @@ -165,16 +163,15 @@ var _ beat.Pipeline = (*fakePipeline)(nil) // fakePipeline returns new ackClients. type fakePipeline struct { - ackHandler *awsACKHandler } func newFakePipeline() *fakePipeline { - return &fakePipeline{ackHandler: newAWSACKHandler()} + return &fakePipeline{} } -func (c *fakePipeline) ConnectWith(beat.ClientConfig) (beat.Client, error) { +func (c *fakePipeline) ConnectWith(config beat.ClientConfig) (beat.Client, error) { return &ackClient{ - ackHandler: c.ackHandler, + eventListener: config.EventListener, }, nil } @@ -186,13 +183,14 @@ var _ beat.Client = (*ackClient)(nil) // ackClient is a fake beat.Client that ACKs the published messages. type ackClient struct { - ackHandler *awsACKHandler + eventListener beat.EventListener } func (c *ackClient) Close() error { return nil } func (c *ackClient) Publish(event beat.Event) { - c.ackHandler.Add(1, nil) + c.eventListener.AddEvent(event, true) + go c.eventListener.ACKEvents(1) } func (c *ackClient) PublishAll(event []beat.Event) { @@ -312,16 +310,8 @@ func benchmarkInputS3(t *testing.T, numberOfWorkers int) testing.BenchmarkResult metricRegistry := monitoring.NewRegistry() metrics := newInputMetrics("test_id", metricRegistry, numberOfWorkers) - - client := pubtest.NewChanClientWithCallback(100, func(event beat.Event) { - event.Private.(*awscommon.EventACKTracker).ACK() - }) pipeline := newFakePipeline() - defer func() { - _ = client.Close() - }() - config := makeBenchmarkConfig(t) config.NumberOfWorkers = numberOfWorkers diff --git a/x-pack/filebeat/input/awss3/s3_input.go b/x-pack/filebeat/input/awss3/s3_input.go index 7d72f3753a9..c3a83c284a2 100644 --- a/x-pack/filebeat/input/awss3/s3_input.go +++ b/x-pack/filebeat/input/awss3/s3_input.go @@ -120,9 +120,9 @@ func (in *s3PollerInput) runPoll(ctx context.Context) { } func (in *s3PollerInput) workerLoop(ctx context.Context, workChan <-chan state) { - var acks awsACKHandler + acks := newAWSACKHandler() // Create client for publishing events and receive notification of their ACKs. - client, err := createPipelineClient(in.pipeline, &acks) + client, err := createPipelineClient(in.pipeline, acks) if err != nil { in.log.Errorf("failed to create pipeline client: %v", err.Error()) return diff --git a/x-pack/filebeat/input/awss3/s3_test.go b/x-pack/filebeat/input/awss3/s3_test.go index 11f12ee3a24..b0b19d82831 100644 --- a/x-pack/filebeat/input/awss3/s3_test.go +++ b/x-pack/filebeat/input/awss3/s3_test.go @@ -36,7 +36,7 @@ func TestS3Poller(t *testing.T) { defer ctrl.Finish() mockAPI := NewMockS3API(ctrl) mockPager := NewMockS3Pager(ctrl) - mockPipeline := NewMockBeatPipeline(ctrl) + pipeline := newFakePipeline() gomock.InOrder( mockAPI.EXPECT(). @@ -139,7 +139,7 @@ func TestS3Poller(t *testing.T) { RegionName: "region", }, s3: mockAPI, - pipeline: mockPipeline, + pipeline: pipeline, s3ObjectHandler: s3ObjProc, states: states, provider: "provider", @@ -162,7 +162,7 @@ func TestS3Poller(t *testing.T) { mockS3 := NewMockS3API(ctrl) mockErrorPager := NewMockS3Pager(ctrl) mockSuccessPager := NewMockS3Pager(ctrl) - mockPipeline := NewMockBeatPipeline(ctrl) + pipeline := newFakePipeline() gomock.InOrder( // Initial ListObjectPaginator gets an error. @@ -277,7 +277,7 @@ func TestS3Poller(t *testing.T) { RegionName: "region", }, s3: mockS3, - pipeline: mockPipeline, + pipeline: pipeline, s3ObjectHandler: s3ObjProc, states: states, provider: "provider", From b088ca7378ff99bce7d3059fe1631b204a764ac7 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Tue, 15 Oct 2024 19:18:19 -0400 Subject: [PATCH 29/30] fix more mock details --- .../filebeat/input/awss3/sqs_s3_event_test.go | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/x-pack/filebeat/input/awss3/sqs_s3_event_test.go b/x-pack/filebeat/input/awss3/sqs_s3_event_test.go index dafadf68f42..c7962bb2f0f 100644 --- a/x-pack/filebeat/input/awss3/sqs_s3_event_test.go +++ b/x-pack/filebeat/input/awss3/sqs_s3_event_test.go @@ -21,7 +21,6 @@ import ( "github.com/stretchr/testify/require" "github.com/elastic/beats/v7/libbeat/beat" - awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/go-concert/timed" ) @@ -40,19 +39,16 @@ func TestSQSS3EventProcessor(t *testing.T) { defer ctrl.Finish() mockAPI := NewMockSQSAPI(ctrl) mockS3HandlerFactory := NewMockS3ObjectHandlerFactory(ctrl) - mockClient := NewMockBeatClient(ctrl) - mockBeatPipeline := NewMockBeatPipeline(ctrl) gomock.InOrder( - mockBeatPipeline.EXPECT().ConnectWith(gomock.Any()).Return(mockClient, nil), mockS3HandlerFactory.EXPECT().Create(gomock.Any(), gomock.Any()).Return(nil), - mockClient.EXPECT().Close(), mockAPI.EXPECT().DeleteMessage(gomock.Any(), gomock.Eq(&msg)).Return(nil), ) p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, time.Minute, 5, mockS3HandlerFactory) result := p.ProcessSQS(ctx, &msg, func(_ beat.Event) {}) require.NoError(t, result.processingErr) + result.Done() }) t.Run("invalid SQS JSON body does not retry", func(t *testing.T) { @@ -71,14 +67,13 @@ func TestSQSS3EventProcessor(t *testing.T) { body = body[10:] invalidBodyMsg.Body = &body - gomock.InOrder( - mockAPI.EXPECT().DeleteMessage(gomock.Any(), gomock.Eq(&invalidBodyMsg)).Return(nil), - ) + mockAPI.EXPECT().DeleteMessage(gomock.Any(), gomock.Eq(&invalidBodyMsg)).Return(nil) p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, time.Minute, 5, mockS3HandlerFactory) result := p.ProcessSQS(ctx, &invalidBodyMsg, func(_ beat.Event) {}) require.Error(t, result.processingErr) t.Log(result.processingErr) + result.Done() }) t.Run("zero S3 events in body", func(t *testing.T) { @@ -93,13 +88,12 @@ func TestSQSS3EventProcessor(t *testing.T) { emptyRecordsMsg, err := newSQSMessage([]s3EventV2{}...) require.NoError(t, err) - gomock.InOrder( - mockAPI.EXPECT().DeleteMessage(gomock.Any(), gomock.Eq(&emptyRecordsMsg)).Return(nil), - ) + mockAPI.EXPECT().DeleteMessage(gomock.Any(), gomock.Eq(&emptyRecordsMsg)).Return(nil) p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, time.Minute, 5, mockS3HandlerFactory) result := p.ProcessSQS(ctx, &emptyRecordsMsg, func(_ beat.Event) {}) require.NoError(t, result.processingErr) + result.Done() }) t.Run("visibility is extended after half expires", func(t *testing.T) { @@ -113,17 +107,15 @@ func TestSQSS3EventProcessor(t *testing.T) { mockAPI := NewMockSQSAPI(ctrl) mockS3HandlerFactory := NewMockS3ObjectHandlerFactory(ctrl) mockS3Handler := NewMockS3ObjectHandler(ctrl) - mockClient := NewMockBeatClient(ctrl) mockAPI.EXPECT().ChangeMessageVisibility(gomock.Any(), gomock.Eq(&msg), gomock.Eq(visibilityTimeout)).AnyTimes().Return(nil) gomock.InOrder( mockS3HandlerFactory.EXPECT().Create(gomock.Any(), gomock.Any()). - Do(func(ctx context.Context, _ *logp.Logger, _ beat.Client, _ *awscommon.EventACKTracker, _ s3EventV2) { + Do(func(ctx context.Context, _ s3EventV2) { require.NoError(t, timed.Wait(ctx, 5*visibilityTimeout)) }).Return(mockS3Handler), mockS3Handler.EXPECT().ProcessS3Object(gomock.Any(), gomock.Any()).Return(nil), - mockClient.EXPECT().Close(), mockAPI.EXPECT().DeleteMessage(gomock.Any(), gomock.Eq(&msg)).Return(nil), mockS3Handler.EXPECT().FinalizeS3Object().Return(nil), ) @@ -131,6 +123,7 @@ func TestSQSS3EventProcessor(t *testing.T) { p := newSQSS3EventProcessor(logp.NewLogger(inputName), nil, mockAPI, nil, visibilityTimeout, 5, mockS3HandlerFactory) result := p.ProcessSQS(ctx, &msg, func(_ beat.Event) {}) require.NoError(t, result.processingErr) + result.Done() }) t.Run("message returns to queue on error", func(t *testing.T) { @@ -152,6 +145,7 @@ func TestSQSS3EventProcessor(t *testing.T) { result := p.ProcessSQS(ctx, &msg, func(_ beat.Event) {}) t.Log(result.processingErr) require.Error(t, result.processingErr) + result.Done() }) t.Run("message is deleted after multiple receives", func(t *testing.T) { @@ -179,6 +173,7 @@ func TestSQSS3EventProcessor(t *testing.T) { result := p.ProcessSQS(ctx, &msg, func(_ beat.Event) {}) t.Log(result.eventCount) require.Error(t, result.processingErr) + result.Done() }) } From bbbd1f986fa445bb54e50a44188f1cb66c47b4d6 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Tue, 15 Oct 2024 21:13:28 -0400 Subject: [PATCH 30/30] fix race condition in mock sequencing --- x-pack/filebeat/input/awss3/sqs_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/x-pack/filebeat/input/awss3/sqs_test.go b/x-pack/filebeat/input/awss3/sqs_test.go index ab347bd2d29..8bc25397eae 100644 --- a/x-pack/filebeat/input/awss3/sqs_test.go +++ b/x-pack/filebeat/input/awss3/sqs_test.go @@ -62,8 +62,6 @@ func TestSQSReceiver(t *testing.T) { ReceiveMessage(gomock.Any(), gomock.Any()). Times(1). DoAndReturn(func(_ context.Context, _ int) ([]types.Message, error) { - // Stop the test. - cancel() return nil, nil })