From 80204e857242c16d61ce82c7113aa0595a46de1d Mon Sep 17 00:00:00 2001 From: Himadri Bhattacharjee Date: Thu, 14 Sep 2023 14:39:21 +0530 Subject: [PATCH 1/4] feat: deduplicate bucket names when ingesting from file --- bucket/bucket.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/bucket/bucket.go b/bucket/bucket.go index 001b77a..3ecbd3d 100644 --- a/bucket/bucket.go +++ b/bucket/bucket.go @@ -155,21 +155,27 @@ func ReadFromFile(bucketFile string, bucketChan chan Bucket) error { } defer file.Close() + bucketsSeen := make(map[string]struct{}) fileScanner := bufio.NewScanner(file) for fileScanner.Scan() { bucketName := strings.TrimSpace(fileScanner.Text()) if !IsValidS3BucketName(bucketName) { log.Info(fmt.Sprintf("invalid | %s", bucketName)) - } else { - bucketChan <- NewBucket(strings.ToLower(bucketName)) + continue } + bucketName = strings.ToLower(bucketName) + if _, seen := bucketsSeen[bucketName]; seen { + continue + } + bucketsSeen[bucketName] = struct{}{} + bucketChan <- NewBucket(bucketName) } if ferr := fileScanner.Err(); ferr != nil { return ferr } - return err + return nil } // ParseAclOutputv2 TODO: probably move this to providers.go From 33503fe235b9a660e87e3ee510906287ff822047 Mon Sep 17 00:00:00 2001 From: Himadri Bhattacharjee Date: Sat, 16 Sep 2023 08:04:17 +0530 Subject: [PATCH 2/4] refactor: move reading buckets from a reader into its own function This will aid testing the function without having to create a file --- bucket/bucket.go | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/bucket/bucket.go b/bucket/bucket.go index 3ecbd3d..a529e86 100644 --- a/bucket/bucket.go +++ b/bucket/bucket.go @@ -3,15 +3,17 @@ package bucket import ( "bufio" "fmt" - "github.com/aws/aws-sdk-go-v2/service/s3" - "github.com/aws/aws-sdk-go-v2/service/s3/types" - "github.com/sa7mon/s3scanner/groups" - log "github.com/sirupsen/logrus" + "io" "os" "regexp" "strings" "time" "unicode" + + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/sa7mon/s3scanner/groups" + log "github.com/sirupsen/logrus" ) var BucketExists = uint8(1) @@ -148,17 +150,11 @@ func (bucket *Bucket) Permissions() map[*types.Grantee]map[string]uint8 { } } -func ReadFromFile(bucketFile string, bucketChan chan Bucket) error { - file, err := os.Open(bucketFile) - if err != nil { - return err - } - defer file.Close() - +func FromReader(r io.Reader, bucketChan chan Bucket) error { + scanner := bufio.NewScanner(r) bucketsSeen := make(map[string]struct{}) - fileScanner := bufio.NewScanner(file) - for fileScanner.Scan() { - bucketName := strings.TrimSpace(fileScanner.Text()) + for scanner.Scan() { + bucketName := strings.TrimSpace(scanner.Text()) if !IsValidS3BucketName(bucketName) { log.Info(fmt.Sprintf("invalid | %s", bucketName)) continue @@ -171,10 +167,22 @@ func ReadFromFile(bucketFile string, bucketChan chan Bucket) error { bucketChan <- NewBucket(bucketName) } - if ferr := fileScanner.Err(); ferr != nil { + if ferr := scanner.Err(); ferr != nil { return ferr } + return nil +} +func ReadFromFile(bucketFile string, bucketChan chan Bucket) error { + file, err := os.Open(bucketFile) + if err != nil { + return err + } + defer file.Close() + + if err := FromReader(file, bucketChan); err != nil { + return err + } return nil } From 7693b6cda363e118060ca7b4b70592ad442ae7fa Mon Sep 17 00:00:00 2001 From: Himadri Bhattacharjee Date: Sat, 16 Sep 2023 08:04:52 +0530 Subject: [PATCH 3/4] feat: add tests for bucket reader function --- bucket/bucket_test.go | 45 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/bucket/bucket_test.go b/bucket/bucket_test.go index f8e6386..6d8c3c8 100644 --- a/bucket/bucket_test.go +++ b/bucket/bucket_test.go @@ -3,15 +3,17 @@ package bucket import ( "context" "fmt" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/types" "github.com/sa7mon/s3scanner/groups" "github.com/stretchr/testify/assert" "golang.org/x/sync/errgroup" - "path/filepath" - "runtime" - "testing" - "time" ) func TestIsValidS3BucketName_Good(t *testing.T) { @@ -262,6 +264,41 @@ func TestBucket_ParseAclOutputv2(t *testing.T) { } } +func TestFromReader(t *testing.T) { + t.Parallel() + + reader := strings.NewReader(`test +bar +bucket +bar +test +foo +bucket +foo +bar`) + + testChan := make(chan Bucket) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + g, _ := errgroup.WithContext(ctx) + defer cancel() + + g.Go(func() error { + err := FromReader(reader, testChan) + close(testChan) + return err + }) + + i := 0 + for range testChan { + i++ + } + assert.Equal(t, 4, i) + + if err := g.Wait(); err != nil { + t.Error(err) + } +} + func TestReadFromFile(t *testing.T) { t.Parallel() From 281aa48156cabfa95fa7fefc841a4f1a3440ab05 Mon Sep 17 00:00:00 2001 From: Himadri Bhattacharjee Date: Tue, 19 Sep 2023 15:12:08 +0530 Subject: [PATCH 4/4] doc: documents bucket name deduplication in README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index df65d06..2ac8473 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,8 @@ assets image-uploads ``` +Bucket names listed multiple times will only be scanned once. + *`-mq`* -------