Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

more supported override keys #129

Merged
merged 3 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 54 additions & 6 deletions cads_catalogue/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def load_resource_metadata_file(folder_path: str | pathlib.Path) -> dict[str, An
-------
dict: dictionary of metadata collected
"""
metadata = dict()
metadata: dict[str, Any] = dict()
metadata_file_path = os.path.join(folder_path, "metadata.json")
if not os.path.isfile(metadata_file_path):
# some fields are required
Expand Down Expand Up @@ -297,7 +297,9 @@ def load_resource_metadata_file(folder_path: str | pathlib.Path) -> dict[str, An
"ds_responsible_organisation_role"
)
end_date = data.get("end_date")
if end_date != "now":
if end_date == "now":
metadata["end_date"] = None
else:
metadata["end_date"] = end_date
metadata["file_format"] = data.get("file_format")
metadata["format_version"] = data.get("format_version")
Expand Down Expand Up @@ -372,6 +374,8 @@ def parse_override_md(override_path: str | pathlib.Path | None) -> dict[str, Any
dict: dictionary of metadata extracted
"""
ret_value: dict[str, Any] = dict()

# base extraction and validation
if not override_path:
return ret_value
if not os.path.exists(override_path):
Expand All @@ -380,30 +384,74 @@ def parse_override_md(override_path: str | pathlib.Path | None) -> dict[str, Any
logger.warning(f"detected override file {override_path}")
with open(override_path) as fp:
try:
data = yaml.safe_load(fp)
data = yaml.load(fp.read(), Loader=yaml.loader.BaseLoader)
except Exception: # noqa
logger.exception(f"override file {override_path} is not a valid YAML")
return ret_value
if data is None:
logger.warning(f"override file {override_path} is empty")
return ret_value
if not isinstance(data, dict):
logger.error(
f"override file {override_path} has a wrong format and cannot be parsed"
)
return ret_value

# normalization
supported_keys_str = (
"abstract",
"begin_date",
"contactemail",
"disabled_reason",
"doi",
"ds_contactemail",
"ds_responsible_organisation",
"ds_responsible_organisation_role",
"format_version",
"high_priority_terms",
"lineage",
"portal",
"publication_date",
"responsible_organisation",
"responsible_organisation_role",
"responsible_organisation_website",
"title",
"topic",
"unit_measure",
"use_limitation",
)
supported_keys_bool = (
"api_enforce_constraints",
"qa_flag",
"hidden",
)
supported_keys_int = ("popularity",)
supported_keys_floats = ("representative_fraction",)
for dataset_uid in data:
ret_value[dataset_uid] = dict()
dataset_md = data[dataset_uid]
if not dataset_md:
continue
for key, value in dataset_md.items():
if key in ("qa_flag", "disabled_reason", "portal"):
ret_value[dataset_uid][key] = value
elif key == "hidden":
if value == "null":
ret_value[dataset_uid][key] = None
continue
if key in supported_keys_bool:
if isinstance(value, bool):
ret_value[dataset_uid][key] = value # type: ignore
else:
ret_value[dataset_uid][key]: bool = utils.str2bool(value) # type: ignore
elif key in supported_keys_str:
ret_value[dataset_uid][key] = value
elif key in supported_keys_int:
ret_value[dataset_uid][key] = int(value)
elif key in supported_keys_floats:
ret_value[dataset_uid][key] = float(value)
else:
logger.warning(
f"unknown key '{key}' found in override file for {dataset_uid}. It will be ignored"
)
continue
return ret_value


Expand Down
42 changes: 21 additions & 21 deletions tests/data/dumped_resources7.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1486,8 +1486,8 @@
{
"resource_id": 1,
"resource_uid": "reanalysis-era5-land",
"popularity": 500,
"api_enforce_constraints": true,
"popularity": 200,
"api_enforce_constraints": false,
"constraints": "an url",
"form": "an url",
"layout": "an url",
Expand All @@ -1502,43 +1502,43 @@
"bboxS": -89,
"bboxW": 0
},
"begin_date": "1950-01-01",
"begin_date": "1960-11-02",
"end_date": "2023-02-11",
"publication_date": "2019-07-12",
"publication_date": "2022-06-01",
"record_update": "2023-12-11 08:50:52.748454+01:00",
"resource_update": "2023-02-17",
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past.",
"abstract": "an abstract",
"citation": null,
"contactemail": "https://support.ecmwf.int",
"contactemail": "an@email",
"description": [],
"disabled_reason": "A reason",
"documentation": [],
"doi": "10.24381/cds.e2161bac",
"ds_contactemail": "https://support.ecmwf.int",
"ds_responsible_organisation": "ECMWF",
"ds_responsible_organisation_role": "publisher",
"doi": "20.24381/cds.22161bac",
"ds_contactemail": "https://support.ecmwf.com",
"ds_responsible_organisation": "ORG1",
"ds_responsible_organisation_role": null,
"file_format": "{grib,netcdf}",
"format_version": null,
"hidden": true,
"high_priority_terms": "reanalysis ERA5 land",
"fts": "'era5':2 'land':3 'reanalysi':1",
"lineage": "EC Copernicus program",
"representative_fraction": 0.25,
"responsible_organisation": "ECMWF",
"responsible_organisation_role": "pointOfContact",
"responsible_organisation_website": "https://www.ecmwf.int/",
"high_priority_terms": "ERA5 reanalysis temperature",
"fts": "'era5':1 'reanalysi':2 'temperatur':3",
"lineage": "Copernicus Atmospheric Monitoring Service",
"representative_fraction": 0.5,
"responsible_organisation": "Org2",
"responsible_organisation_role": "pointOfContact2",
"responsible_organisation_website": "http://a/website.com",
"portal": "c3s2",
"qa_flag": false,
"qos_tags": [
"tag1",
"tag2",
"tag3"
],
"title": "ERA5-Land hourly data from 1950 to present",
"topic": "climatologyMeteorologyAtmosphere",
"title": "a title",
"topic": "a topic",
"type": "dataset",
"unit_measure": "dd",
"use_limitation": "Content accessible through the CDS may only be used under the terms of the licenses attributed to each particular resource.",
"unit_measure": "degree",
"use_limitation": "Content accessible through the CDS",
"variables": [],
"fulltext": null,
"search_field": "'1950':7A 'accur':88B 'across':61B 'back':83B 'climat':52B,92B 'combin':55B 'compar':34B 'complet':67B 'compon':47B 'consist':19B,69B 'data':5A,57B,78B 'dataset':16B,70B 'decad':29B,82B 'descript':89B 'ecmwf':50B 'enhanc':32B 'era5':2A,11B,36B,38B,51B 'era5-land':1A,10B,37B 'evolut':23B 'global':66B 'goe':80B 'hour':4A 'land':3A,12B,25B,39B,46B 'law':73B 'model':56B 'observ':59B 'past':95B 'physic':75B 'present':9A 'produc':42B,77B 'provid':17B,86B 'reanalysi':15B,53B,54B,76B 'replay':44B 'resolut':33B 'sever':28B,81B 'time':85B 'use':71B 'variabl':26B 'view':20B 'world':63B"
Expand Down
21 changes: 21 additions & 0 deletions tests/data/override2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,27 @@ reanalysis-era5-land:
hidden: True
disabled_reason: A reason
portal: c3s2
abstract: an abstract
begin_date: 1960-11-02
contactemail: an@email
doi: 20.24381/cds.22161bac
ds_contactemail: https://support.ecmwf.com
ds_responsible_organisation: ORG1
ds_responsible_organisation_role: null
format_version: null
high_priority_terms: ERA5 reanalysis temperature
lineage: Copernicus Atmospheric Monitoring Service
popularity: 200
publication_date: 2022-06-01
representative_fraction: 0.5
responsible_organisation: Org2
responsible_organisation_role: pointOfContact2
responsible_organisation_website: http://a/website.com
title: a title
topic: a topic
unit_measure: degree
use_limitation: Content accessible through the CDS
api_enforce_constraints: False
reanalysis-era5-pressure-levels:
reanalysis-era5-single-levels:
hidden: False
Expand Down
27 changes: 24 additions & 3 deletions tests/test_40_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,34 @@ def test_parse_override_md() -> None:
# consistent override info
overrides_path = os.path.join(TESTDATA_PATH, "override2.yaml")
expected = {
"reanalysis-era5-pressure-levels": {},
"reanalysis-era5-land": {
"disabled_reason": "A reason",
"qa_flag": False,
"hidden": True,
"disabled_reason": "A reason",
"portal": "c3s2",
"qa_flag": False,
"abstract": "an abstract",
"begin_date": "1960-11-02",
"contactemail": "an@email",
"doi": "20.24381/cds.22161bac",
"ds_contactemail": "https://support.ecmwf.com",
"ds_responsible_organisation": "ORG1",
"ds_responsible_organisation_role": None,
"format_version": None,
"high_priority_terms": "ERA5 reanalysis temperature",
"lineage": "Copernicus Atmospheric Monitoring Service",
"popularity": 200,
"publication_date": "2022-06-01",
"representative_fraction": 0.5,
"responsible_organisation": "Org2",
"responsible_organisation_role": "pointOfContact2",
"responsible_organisation_website": "http://a/website.com",
"title": "a title",
"topic": "a topic",
"unit_measure": "degree",
"use_limitation": "Content accessible through the CDS",
"api_enforce_constraints": False,
},
"reanalysis-era5-pressure-levels": {},
"reanalysis-era5-single-levels": {
"hidden": False,
"portal": "ads",
Expand Down