Skip to content

Commit

Permalink
fix: list objects trim common prefixes that match marker prefix
Browse files Browse the repository at this point in the history
This checks to see if the common prefix is before the marker and
thus would have been returned in earlier list objects request.

The error case was aws cli listing multiple entries for the same
common prefix when the listing required multiple pagination
requests.

Fixes #778
  • Loading branch information
benmcclelland committed Sep 18, 2024
1 parent 221440e commit 180df62
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 1 deletion.
9 changes: 8 additions & 1 deletion backend/walk.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,19 @@ func Walk(ctx context.Context, fileSystem fs.FS, prefix, delimiter, marker strin

// Common prefixes are a set, so should not have duplicates.
// These are abstractly a "directory", so need to include the
// delimiter at the end.
// delimiter at the end when we add to the map.
cprefNoDelim := prefix + before
cpref := prefix + before + delimiter
if cpref == marker {
pastMarker = true
return nil
}

if marker != "" && strings.HasPrefix(marker, cprefNoDelim) {
// skip common prefixes that are before the marker
return nil
}

cpmap[cpref] = struct{}{}
if (len(objects) + len(cpmap)) == int(max) {
newMarker = cpref
Expand Down
2 changes: 2 additions & 0 deletions tests/integration/group-tests.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ func TestListObjects(s *S3Conf) {
ListObjects_non_existing_bucket(s)
ListObjects_with_prefix(s)
ListObjects_truncated(s)
ListObjects_paginated(s)
ListObjects_invalid_max_keys(s)
ListObjects_max_keys_0(s)
ListObjects_delimiter(s)
Expand Down Expand Up @@ -612,6 +613,7 @@ func GetIntTests() IntTests {
"ListObjects_non_existing_bucket": ListObjects_non_existing_bucket,
"ListObjects_with_prefix": ListObjects_with_prefix,
"ListObjects_truncated": ListObjects_truncated,
"ListObjects_paginated": ListObjects_paginated,
"ListObjects_invalid_max_keys": ListObjects_invalid_max_keys,
"ListObjects_max_keys_0": ListObjects_max_keys_0,
"ListObjects_delimiter": ListObjects_delimiter,
Expand Down
27 changes: 27 additions & 0 deletions tests/integration/tests.go
Original file line number Diff line number Diff line change
Expand Up @@ -3590,6 +3590,33 @@ func ListObjects_with_prefix(s *S3Conf) error {
})
}

func ListObjects_paginated(s *S3Conf) error {
testName := "ListObjects_paginated"
return actionHandler(s, testName, func(s3client *s3.Client, bucket string) error {
_, err := putObjects(s3client, []string{"dir1/subdir/file.txt", "dir1/subdir.ext", "dir1/subdir1.ext", "dir1/subdir2.ext"}, bucket)
if err != nil {
return err
}

objs, prefixes, err := listObjects(s3client, bucket, "dir1/", "/", 2)
if err != nil {
return err
}

expected := []string{"dir1/subdir.ext", "dir1/subdir1.ext", "dir1/subdir2.ext"}
if !hasObjNames(objs, expected) {
return fmt.Errorf("expected objects %v, instead got %v", expected, objs)
}

expectedPrefix := []string{"dir1/subdir/"}
if !hasPrefixName(prefixes, expectedPrefix) {
return fmt.Errorf("expected prefixes %v, instead got %v", expectedPrefix, prefixes)
}

return nil
})
}

func ListObjects_truncated(s *S3Conf) error {
testName := "ListObjects_truncated"
return actionHandler(s, testName, func(s3client *s3.Client, bucket string) error {
Expand Down
70 changes: 70 additions & 0 deletions tests/integration/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,76 @@ func putObjects(client *s3.Client, objs []string, bucket string) ([]types.Object
return contents, nil
}

func listObjects(client *s3.Client, bucket, prefix, delimiter string, maxKeys int32) ([]types.Object, []types.CommonPrefix, error) {
var contents []types.Object
var commonPrefixes []types.CommonPrefix

var continuationToken *string

for {
ctx, cancel := context.WithTimeout(context.Background(), shortTimeout)
res, err := client.ListObjectsV2(ctx, &s3.ListObjectsV2Input{
Bucket: &bucket,
ContinuationToken: continuationToken,
Prefix: &prefix,
Delimiter: &delimiter,
MaxKeys: &maxKeys,
})
cancel()
if err != nil {
return nil, nil, err
}
contents = append(contents, res.Contents...)
commonPrefixes = append(commonPrefixes, res.CommonPrefixes...)
continuationToken = res.NextContinuationToken

if !*res.IsTruncated {
break
}
}

return contents, commonPrefixes, nil
}

func hasObjNames(objs []types.Object, names []string) bool {
if len(objs) != len(names) {
return false
}

for _, obj := range objs {
if contains(names, *obj.Key) {
continue
}
return false
}

return true
}

func hasPrefixName(prefixes []types.CommonPrefix, names []string) bool {
if len(prefixes) != len(names) {
return false
}

for _, prefix := range prefixes {
if contains(names, *prefix.Prefix) {
continue
}
return false
}

return true
}

func contains(s []string, e string) bool {
for _, a := range s {
if a == e {
return true
}
}
return false
}

func putObjectWithData(lgth int64, input *s3.PutObjectInput, client *s3.Client) (csum [32]byte, data []byte, err error) {
data = make([]byte, lgth)
rand.Read(data)
Expand Down

0 comments on commit 180df62

Please sign in to comment.