Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(plugins): creodias_s3 search and download when no asset #1425

Merged
merged 6 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions eodag/plugins/download/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,8 @@ def _get_bucket_names_and_prefixes(
ignore_assets: Optional[bool] = False,
) -> List[Tuple[str, Optional[str]]]:
"""
retrieves the bucket names and path prefixes for the assets
Retrieves the bucket names and path prefixes for the assets

:param product: product for which the assets shall be downloaded
:param asset_filter: text for which the assets should be filtered
:param ignore_assets: if product instead of individual assets should be used
Expand Down Expand Up @@ -731,12 +732,12 @@ def _stream_download_dict(
else sanitize(product.properties.get("id", "download"))
)

if len(assets_values) == 1:
if len(assets_values) <= 1:
first_chunks_tuple = next(chunks_tuples)
# update headers
filename = os.path.basename(list(unique_product_chunks)[0].key)
headers = {"content-disposition": f"attachment; filename={filename}"}
if assets_values[0].get("type", None):
if assets_values and assets_values[0].get("type", None):
headers["content-type"] = assets_values[0]["type"]

return StreamResponse(
Expand Down Expand Up @@ -799,7 +800,6 @@ def get_chunk_parts(
common_path = self._get_commonpath(
product, unique_product_chunks, build_safe
)

for product_chunk in unique_product_chunks:
try:
chunk_rel_path = self.get_chunk_dest_path(
Expand All @@ -817,8 +817,7 @@ def get_chunk_parts(
# out of SAFE format chunk
logger.warning(e)
continue

if len(assets_values) == 1:
if len(assets_values) <= 1:
yield from get_chunk_parts(product_chunk, progress_callback)
else:
yield (
Expand Down
29 changes: 29 additions & 0 deletions eodag/plugins/download/creodias_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List, Optional, Tuple

import boto3
from botocore.exceptions import ClientError

from eodag import EOProduct
from eodag.plugins.download.aws import AwsDownload
from eodag.utils.exceptions import MisconfiguredError

Expand Down Expand Up @@ -65,3 +67,30 @@ def _get_authenticated_objects_from_auth_keys(self, bucket_name, prefix, auth_di
list(objects.filter(Prefix=prefix).limit(1))
self.s3_session = s3_session
return objects

def _get_bucket_names_and_prefixes(
self,
product: EOProduct,
asset_filter: Optional[str] = None,
ignore_assets: Optional[bool] = False,
) -> List[Tuple[str, Optional[str]]]:
"""
Retrieves the bucket names and path prefixes for the assets

:param product: product for which the assets shall be downloaded
:param asset_filter: text for which the assets should be filtered
:param ignore_assets: if product instead of individual assets should be used
:return: tuples of bucket names and prefixes
"""
# if assets are defined, use them instead of scanning product.location
if len(product.assets) > 0 and not ignore_assets:
bucket_names_and_prefixes = super()._get_bucket_names_and_prefixes(
product, asset_filter, ignore_assets
)
else:
# if no assets are given, use productIdentifier to get S3 path for download
s3_url = "s3:/" + product.properties["productIdentifier"]
bucket_names_and_prefixes = [
self.get_product_bucket_name_and_prefix(product, s3_url)
]
return bucket_names_and_prefixes
43 changes: 24 additions & 19 deletions eodag/plugins/search/creodias_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def patched_register_downloader(self, downloader, authenticator):


def _update_assets(product: EOProduct, config: PluginConfig, auth: AwsAuth):
product.assets = {}
product.assets = AssetsDict(product)
prefix = (
product.properties.get("productIdentifier", None).replace("/eodata/", "") + "/"
)
Expand All @@ -82,27 +82,32 @@ def _update_assets(product: EOProduct, config: PluginConfig, auth: AwsAuth):
)
logger.debug("Listing assets in %s", prefix)
product.assets = AssetsDict(product)
for asset in auth.s3_client.list_objects(
s3_res = auth.s3_client.list_objects(
Bucket=config.s3_bucket, Prefix=prefix, MaxKeys=300
)["Contents"]:
asset_basename = (
asset["Key"].split("/")[-1] if "/" in asset["Key"] else asset["Key"]
)

if len(asset_basename) > 0 and asset_basename not in product.assets:
role = (
"data"
if asset_basename.split(".")[-1] in DATA_EXTENSIONS
else "metadata"
)
# check if product path has assets or is already a file
if "Contents" in s3_res:
for asset in s3_res["Contents"]:
asset_basename = (
asset["Key"].split("/")[-1]
if "/" in asset["Key"]
else asset["Key"]
)

product.assets[asset_basename] = {
"title": asset_basename,
"roles": [role],
"href": f"s3://{config.s3_bucket}/{asset['Key']}",
}
if mime_type := guess_file_type(asset["Key"]):
product.assets[asset_basename]["type"] = mime_type
if len(asset_basename) > 0 and asset_basename not in product.assets:
role = (
"data"
if asset_basename.split(".")[-1] in DATA_EXTENSIONS
else "metadata"
)

product.assets[asset_basename] = {
"title": asset_basename,
"roles": [role],
"href": f"s3://{config.s3_bucket}/{asset['Key']}",
}
if mime_type := guess_file_type(asset["Key"]):
product.assets[asset_basename]["type"] = mime_type
# update driver
product.driver = product.get_driver()

Expand Down
52 changes: 52 additions & 0 deletions tests/units/test_download_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -2252,3 +2252,55 @@ def test_plugins_download_creodias_s3(
self.assertEqual(mock_finalize_s2_safe_product.call_count, 0)
self.assertEqual(mock_check_manifest_file_list.call_count, 0)
self.assertEqual(mock_flatten_top_directories.call_count, 1)

@mock.patch("eodag.plugins.download.aws.flatten_top_directories", autospec=True)
@mock.patch(
"eodag.plugins.download.aws.AwsDownload.check_manifest_file_list", autospec=True
)
@mock.patch(
"eodag.plugins.download.aws.AwsDownload.finalize_s2_safe_product", autospec=True
)
@mock.patch(
"eodag.plugins.download.aws.AwsDownload.get_chunk_dest_path", autospec=True
)
@mock.patch(
"eodag.plugins.download.creodias_s3.CreodiasS3Download._get_authenticated_objects_from_auth_keys",
autospec=True,
)
@mock.patch("eodag.plugins.download.aws.requests.get", autospec=True)
def test_plugins_download_creodias_s3_without_assets(
self,
mock_requests_get,
mock_get_authenticated_objects,
mock_get_chunk_dest_path,
mock_finalize_s2_safe_product,
mock_check_manifest_file_list,
mock_flatten_top_directories,
):
product = EOProduct(
"creodias_s3",
dict(
geometry="POINT (0 0)",
title="dummy_product",
id="dummy",
productIdentifier="/eodata/01/a.tar",
),
)
product.location = product.remote_location = "a"
plugin = self.get_download_plugin(product)
product.properties["tileInfo"] = "http://example.com/tileInfo.json"
# authenticated objects mock
mock_get_authenticated_objects.return_value.keys.return_value = ["a.tar"]
mock_get_authenticated_objects.return_value.filter.side_effect = (
lambda *x, **y: [mock.Mock(size=0, key=y["Prefix"])]
)

plugin.download(product, output_dir=self.output_dir, auth={})

mock_get_authenticated_objects.assert_called_once_with(
plugin, "eodata", "01", {}
)
self.assertEqual(mock_get_chunk_dest_path.call_count, 1)
self.assertEqual(mock_finalize_s2_safe_product.call_count, 0)
self.assertEqual(mock_check_manifest_file_list.call_count, 0)
self.assertEqual(mock_flatten_top_directories.call_count, 1)
13 changes: 13 additions & 0 deletions tests/units/test_search_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from requests import RequestException
from typing_extensions import get_args

from eodag.api.product import AssetsDict
from eodag.api.product.metadata_mapping import get_queryable_from_provider
from eodag.utils import deepcopy
from eodag.utils.exceptions import UnsupportedProductType
Expand Down Expand Up @@ -2178,10 +2179,22 @@ def test_plugins_search_creodias_s3_links(self, mock_request):
}
product.register_downloader(download_plugin, auth_plugin)
assets = res[0][0].assets
self.assertEqual(3, len(assets))
# check if s3 links have been created correctly
for asset in assets.values():
self.assertIn("s3://eodata/Sentinel-1/SAR/GRD/2014/10/10", asset["href"])

# no occur should occur and assets should be empty if list_objects does not have content
# (this situation will occur if the product does not have assets but is a tar file)
stubber.add_response("list_objects", {})
download_plugin = self.plugins_manager.get_download_plugin(res[0][0])
auth_plugin = self.plugins_manager.get_auth_plugin(download_plugin, res[0][0])
res[0][0].driver = None
res[0][0].assets = AssetsDict(res[0][0])
res[0][0].register_downloader(download_plugin, auth_plugin)
self.assertIsNotNone(res[0][0].driver)
self.assertEqual(0, len(res[0][0].assets))

@mock.patch(
"eodag.plugins.search.qssearch.QueryStringSearch._request", autospec=True
)
Expand Down
Loading