Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added ListObjectsV2 call #64

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 64 additions & 2 deletions src/AWSS3.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ export S3Path, s3_arn, s3_put, s3_get, s3_get_file, s3_exists, s3_delete, s3_cop
s3_create_bucket,
s3_put_cors,
s3_enable_versioning, s3_delete_bucket, s3_list_buckets,
s3_list_objects, s3_list_keys, s3_list_versions,
s3_list_objects, s3_list_objects_v2, s3_list_keys, s3_list_versions,
s3_get_meta, s3_purge_versions,
s3_sign_url, s3_begin_multipart_upload, s3_upload_part,
s3_complete_multipart_upload, s3_multipart_upload,
Expand Down Expand Up @@ -538,8 +538,70 @@ function s3_list_objects(aws::AWSConfig, bucket, path_prefix=""; delimiter="/",
end
end

s3_list_objects(a...) = s3_list_objects(default_aws_config(), a...)
s3_list_objects(a...; kwargs...) = s3_list_objects(default_aws_config(), a...; kwargs...)

"""
s3_list_objects_v2([::AWSConfig], bucket, [path_prefix]; delimiter="/", max_items=1000)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
s3_list_objects_v2([::AWSConfig], bucket, [path_prefix]; delimiter="/", max_items=1000)
s3_list_objects_v2([::AWSConfig], bucket, [path_prefix]; delimiter="/", start_after="", max_items=1000)


[List Objects](http://docs.aws.amazon.com/AmazonS3/latest/API/RESTBucketGET.html)
in `bucket` with optional `path_prefix`.

Returns an iterator of `Dict`s with keys `Key`, `LastModified`, `ETag`, `Size`,
`Owner`, `StorageClass`.

This uses the `ListObjectV2` function call.
"""
function s3_list_objects_v2(aws::AWSS3.AWSConfig, bucket, path_prefix=""; delimiter="/", max_items=nothing)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
function s3_list_objects_v2(aws::AWSS3.AWSConfig, bucket, path_prefix=""; delimiter="/", max_items=nothing)
function s3_list_objects_v2(aws::AWSS3.AWSConfig, bucket, path_prefix=""; delimiter="/", start_after="", max_items=nothing)

return Channel() do chnl
more = true
num_objects = 0
contoken = ""

while more
q = Dict{String, String}("list-type"=>"2")
if path_prefix != ""
q["prefix"] = path_prefix
end
if delimiter != ""
q["delimiter"] = delimiter
end
if contoken ≠ ""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For consistency we should probably keep using != instead of

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if contoken ""
if contoken != ""

q["continuation-token"] = contoken
end
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
end
end
if start_after != ""
q["start-after"] = start_after
end

if max_items !== nothing
q["max-keys"] = string(max_items - num_objects)
end

@repeat 4 try
# Request objects
r = s3(aws, "GET", bucket; query = q)

# Add each object from the response and update our object count / marker
if haskey(r, "Contents")
l = isa(r["Contents"], Vector) ? r["Contents"] : [r["Contents"]]
for object in l
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of iterating over these objects, could we not just put! the vector into the channel? (Not sure if this is possible).

And then just do num_objects += size(l)[1]

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't the xml_dict() function still need to be applied to each element in that vector before put!-ing the vector to the channel?

put!(chnl, xml_dict(object))
num_objects += 1
end
# It's possible that the response doesn't have "Contents" and just has a prefix,
# in which case we should just save the next marker and iterate.
elseif haskey(r, "Prefix")
put!(chnl, Dict("Key" => r["Prefix"]))
num_objects += 1
end

contoken = get(r, "NextContinuationToken", "")

# Continue looping if the results were truncated and we haven't exceeded out max_items (if specified)
more = r["IsTruncated"] == "true" && (max_items === nothing || num_objects < max_items)
catch e
@delay_retry if ecode(e) in ["NoSuchBucket"] end
end
end
end
end

s3_list_objects_v2(a...; kwargs...) = s3_list_objects_v2(default_aws_config(), a...; kwargs...)

"""
s3_list_keys([::AWSConfig], bucket, [path_prefix])
Expand Down