Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: drivers from eodag-cube to core #1488

Draft
wants to merge 8 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions eodag/api/product/_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
except ImportError:
from eodag.api.product._assets import AssetsDict

from eodag.api.product.drivers import DRIVERS, NoDriver
from eodag.api.product.drivers import DRIVERS, LEGACY_DRIVERS, NoDriver
from eodag.api.product.metadata_mapping import (
DEFAULT_GEOMETRY,
NOT_AVAILABLE,
Expand Down Expand Up @@ -498,11 +498,16 @@ def get_driver(self) -> DatasetDriver:
try:
for driver_conf in DRIVERS:
if all([criteria(self) for criteria in driver_conf["criteria"]]):
return driver_conf["driver"]
driver = driver_conf["driver"]
break
# use legacy driver for deprecated get_data method usage
for driver_conf in LEGACY_DRIVERS:
if all([criteria(self) for criteria in driver_conf["criteria"]]):
driver.legacy = driver_conf["driver"]
break
return driver
except TypeError:
logger.warning(
"Drivers definition seems out-of-date, please update eodag-cube"
)
logger.info("No driver matching")
pass
return NoDriver()

Expand Down
71 changes: 64 additions & 7 deletions eodag/api/product/drivers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,71 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""EODAG drivers package"""
from __future__ import annotations

from typing import Callable, TypedDict

from eodag.api.product.drivers.base import DatasetDriver, NoDriver
from eodag.api.product.drivers.generic import GenericDriver
from eodag.api.product.drivers.sentinel1 import Sentinel1Driver
from eodag.api.product.drivers.sentinel2 import Sentinel2Driver
from eodag.api.product.drivers.stac_assets import StacAssets

DriverCriteria = TypedDict(
"DriverCriteria",
{
"criteria": list[Callable[..., bool]],
"driver": DatasetDriver,
},
)

DRIVERS: list[DriverCriteria] = [
{
"criteria": [
lambda prod: True
if (prod.product_type or "").startswith("S2_MSI_")
else False
],
"driver": Sentinel2Driver(),
},
{
"criteria": [
lambda prod: True
if (prod.product_type or "").startswith("S1_SAR_")
else False
],
"driver": Sentinel1Driver(),
},
{
"criteria": [lambda prod: True],
"driver": GenericDriver(),
},
]

try:
from eodag_cube.api.product.drivers import ( # pyright: ignore[reportMissingImports]
DRIVERS,
)
except ImportError:
DRIVERS = []
LEGACY_DRIVERS: list[DriverCriteria] = [
{
"criteria": [
lambda prod: True if len(getattr(prod, "assets", {})) > 0 else False
],
"driver": StacAssets(),
},
{
"criteria": [lambda prod: True if "assets" in prod.properties else False],
"driver": StacAssets(),
},
{
"criteria": [
lambda prod: True
if getattr(prod, "product_type") == "S2_MSI_L1C"
else False
],
"driver": Sentinel2Driver(),
},
{
"criteria": [lambda prod: True],
"driver": GenericDriver(),
},
]

# exportable content
__all__ = ["DRIVERS", "DatasetDriver", "NoDriver"]
__all__ = ["DRIVERS", "DatasetDriver", "GenericDriver", "NoDriver", "Sentinel2Driver"]
52 changes: 51 additions & 1 deletion eodag/api/product/drivers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,61 @@
# limitations under the License.
from __future__ import annotations

from typing import TYPE_CHECKING
import logging
import re
from typing import TYPE_CHECKING, Optional, TypedDict

if TYPE_CHECKING:
from eodag.api.product import EOProduct


class AssetPatterns(TypedDict):
"""Asset patterns definition"""

pattern: re.Pattern
roles: list[str]


logger = logging.getLogger("eodag.driver.base")


class DatasetDriver(metaclass=type):
"""Dataset driver"""

#: legacy driver for deprecated get_data method usage
legacy: DatasetDriver

ASSET_KEYS_PATTERNS_ROLES: list[AssetPatterns] = []

STRIP_SPECIAL_PATTERN = re.compile(r"^[^A-Z0-9]+|[^A-Z0-9]+$", re.IGNORECASE)

def _normalize_key(self, key, eo_product):
# default cleanup
norm_key = key.replace(eo_product.properties.get("id", ""), "")
norm_key = re.sub(self.STRIP_SPECIAL_PATTERN, "", norm_key)

return norm_key

def guess_asset_key_and_roles(
self, href: str, eo_product: EOProduct
) -> tuple[Optional[str], Optional[list[str]]]:
"""Guess the asset key and roles from the given href.

:param href: The asset href
:param eo_product: The product to which the asset belongs
:returns: The asset key and roles
"""
for pattern_dict in self.ASSET_KEYS_PATTERNS_ROLES:
if matched := pattern_dict["pattern"].match(href):
extracted_key, roles = (
"".join([m for m in matched.groups() if m is not None]),
pattern_dict.get("roles"),
)
normalized_key = self._normalize_key(extracted_key, eo_product)
return normalized_key or extracted_key, roles
logger.debug(f"No key & roles could be guessed for {href}")
return None, None

def get_data_address(self, eo_product: EOProduct, band: str) -> str:
"""Retrieve the address of the dataset represented by `eo_product`.

Expand All @@ -35,6 +81,10 @@ def get_data_address(self, eo_product: EOProduct, band: str) -> str:
:raises: :class:`~eodag.utils.exceptions.AddressNotFound`
:raises: :class:`~eodag.utils.exceptions.UnsupportedDatasetAddressScheme`
"""
# legacy driver usage if defined
if legacy_driver := getattr(self, "legacy", None):
return legacy_driver.get_data_address(eo_product, band)

raise NotImplementedError


Expand Down
96 changes: 96 additions & 0 deletions eodag/api/product/drivers/generic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# -*- coding: utf-8 -*-
# Copyright 2021, CS GROUP - France, http://www.c-s.fr
#
# This file is part of EODAG project
# https://www.github.com/CS-SI/EODAG
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import logging
import re
from typing import TYPE_CHECKING

from eodag.api.product.drivers.base import AssetPatterns, DatasetDriver
from eodag.utils.exceptions import AddressNotFound

if TYPE_CHECKING:
from eodag.api.product._product import EOProduct

logger = logging.getLogger("eodag.driver.generic")


# File extensions to accept on top of those known to rasterio/GDAL
EXTRA_ALLOWED_FILE_EXTENSIONS = [".grib", ".grib2"]


class GenericDriver(DatasetDriver):
"""Generic Driver for products that need to be downloaded"""

ASSET_KEYS_PATTERNS_ROLES: list[AssetPatterns] = [
# metadata
{
"pattern": re.compile(
r"^(?:.*[/\\])?([^/\\]+)(\.xml|\.xsd|\.safe|\.json)$", re.IGNORECASE
),
"roles": ["metadata"],
},
# thumbnail
{
"pattern": re.compile(
r"^(?:.*[/\\])?(thumbnail)(\.jpg|\.jpeg|\.png)$", re.IGNORECASE
),
"roles": ["thumbnail"],
},
# quicklook
{
"pattern": re.compile(
r"^(?:.*[/\\])?([^/\\]+-ql|preview|quick-?look)(\.jpg|\.jpeg|\.png)$",
re.IGNORECASE,
),
"roles": ["overview"],
},
# default
{"pattern": re.compile(r"^(?:.*[/\\])?([^/\\]+)$"), "roles": ["auxiliary"]},
]

def _get_data_address(self, eo_product: EOProduct, band: str) -> str:
"""Get the address of a product subdataset.

This method should not be called as ``get_data_address()`` is only expected to be
called from ``eodag-cube``.

:param eo_product: The product whom underlying dataset address is to be retrieved
:type eo_product: :class:`~eodag.api.product._product.EOProduct`
:param band: The band to retrieve (e.g: 'B01')
:type band: str
:returns: An address for the dataset
:rtype: str
:raises: :class:`~eodag.utils.exceptions.AddressNotFound`
:raises: :class:`~eodag.utils.exceptions.UnsupportedDatasetAddressScheme`
"""
# legacy driver usage if defined
if legacy_driver := getattr(self, "legacy", None):
return legacy_driver.get_data_address(eo_product, band)

raise AddressNotFound("eodag-cube required for this feature")

try:
# import from eodag-cube if installed
from eodag_cube.api.product.drivers.generic import ( # pyright: ignore[reportMissingImports] ; isort: skip
GenericDriver as GenericDriver_cube,
)

get_data_address = GenericDriver_cube.get_data_address
except ImportError:
get_data_address = _get_data_address
91 changes: 91 additions & 0 deletions eodag/api/product/drivers/sentinel1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
# Copyright 2021, CS GROUP - France, http://www.c-s.fr
#
# This file is part of EODAG project
# https://www.github.com/CS-SI/EODAG
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import re
from typing import TYPE_CHECKING

from eodag.api.product.drivers.base import AssetPatterns, DatasetDriver

if TYPE_CHECKING:
from eodag.api.product._product import EOProduct


class Sentinel1Driver(DatasetDriver):
"""Driver for Sentinel1 products"""

DATA_PATTERN = re.compile(r"[vh]{2}", re.IGNORECASE)

REPLACE_PATTERNS = [
(re.compile(r"s1a?", re.IGNORECASE), ""),
(re.compile(r"grd", re.IGNORECASE), ""),
(re.compile(r"slc", re.IGNORECASE), ""),
(re.compile(r"iw", re.IGNORECASE), ""),
(re.compile(r"ocn", re.IGNORECASE), ""),
(re.compile(r"raw([-_]s)?", re.IGNORECASE), ""),
(re.compile(r"[t?0-9]{3,}", re.IGNORECASE), ""),
(re.compile(r"-+"), "-"),
(re.compile(r"-+\."), "."),
(re.compile(r"_+"), "_"),
(re.compile(r"_+\."), "."),
]

ASSET_KEYS_PATTERNS_ROLES: list[AssetPatterns] = [
# data
{
"pattern": re.compile(
r"^.*?([vh]{2}).*\.(?:jp2|tiff?|dat)$", re.IGNORECASE
),
"roles": ["data"],
},
# metadata
{
"pattern": re.compile(
r"^(?:.*[/\\])?([^/\\]+)(\.xml|\.xsd|\.safe|\.json)$", re.IGNORECASE
),
"roles": ["metadata"],
},
# thumbnail
{
"pattern": re.compile(
r"^(?:.*[/\\])?(thumbnail)(\.jpe?g|\.png)$", re.IGNORECASE
),
"roles": ["thumbnail"],
},
# quicklook
{
"pattern": re.compile(
r"^(?:.*[/\\])?([^/\\]+-ql|preview|quick-?look)(\.jpe?g|\.png)$",
re.IGNORECASE,
),
"roles": ["overview"],
},
# default
{"pattern": re.compile(r"^(?:.*[/\\])?([^/\\]+)$"), "roles": ["auxiliary"]},
]

def _normalize_key(self, key: str, eo_product: EOProduct) -> str:
if self.DATA_PATTERN.fullmatch(key):
return key.upper()

key = super()._normalize_key(key, eo_product)

for pattern, replacement in self.REPLACE_PATTERNS:
key = pattern.sub(replacement, key)

return super()._normalize_key(key, eo_product)
Loading
Loading