Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

global attributes #23

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 190 additions & 0 deletions src/pymorize/global_attributes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
# global_attributes.py

import re
import json
from pathlib import Path
# from loguru import logger

# TODO: no need to hard-code these values, can be directly read from cmip6-cmor-tables/Tables/CMIP6_CV.json
_fields = (
"activity_id",
"Conventions",
"creation_date",
"data_specs_version",
"experiment",
"experiment_id",
"forcing_index",
"frequency",
"further_info_url",
"grid",
"grid_label",
"initialization_index",
"institution",
"institution_id",
"license",
"mip_era",
"nominal_resolution",
"physics_index",
"product",
"realization_index",
"realm",
"source",
"source_id",
"source_type",
"sub_experiment",
"sub_experiment_id",
"table_id",
"tracking_id",
"variable_id",
"variant_label",
)

_parent_fields = (
"branch_method",
"branch_time_in_child",
"branch_time_in_parent",
"parent_experiment_id",
"parent_mip_era",
"parent_source_id",
"parent_time_units",
"parent_variant_label",
)


defaults = {
"institution_id": "AWI",
"license_type": "CC BY-SA 4.0",
"maintainer_url": None,
}


def set_global_attributes(ds, rule):
gattrs = {}
cvs = rule.get("cvs", {})
variant_label = rule.get("variant_label")
update_variant_label(variant_label, gattrs)
source_id = rule.get("source_id")
experiment_id = rule.get("experiment_id")
activity_id = rule.get("activity_id", None)
if activity_id is None:
_experiment_id_cv = cvs.get("experiment_id", {}).get(experiment_id, {})
activity_id = _experiment_id_cv.get("activity_id", [])
if activity_id and len(activity_id) > 1:
activity_ids = ", ".join(activity_id)
raise ValueError(
f"activity_id -- {activity_ids} -- has multiple value for experiment_id {experiment_id}."
)

experiment = _experiment_id_cv.get("experiment", "")
parent_activity_id = _experiment_id_cv.get("parent_activity_id", "")


def update_global_attributes(ds, rule):
""" """
table = rule.data_request_variable.table
header = table._data["Header"]
attrs = {}
attrs["data_specs_version"] = header["data_specs_version"]
attrs["Conventions"] = header["Conventions"]
attrs["mip_era"] = header["mip_era"]
attrs["realm"] = header["realm"]
attrs["product"] = header["product"]


def _parse_variant_label(label: str) -> dict:
"""Extracts indices values from variant label.
`label` must be of the form "r<int>i<int>p<int>f<int>".
Example: "r1i1p1f1"
"""
pattern = re.compile(
r"r(?P<realization_index>\d+)"
r"i(?P<initialization_index>\d+)"
r"p(?P<physics_index>\d+)"
r"f(?P<forcing_index>\d+)"
r"$"
)
if label is None:
raise ValueError(
f"`label` must be of the form 'r<int>i<int>p<int>f<int>', Got: {label}"
)
d = pattern.match(label)
if d is None:
raise ValueError(
f"`label` must be of the form 'r<int>i<int>p<int>f<int>', Got: {label}"
)
d = {name: int(val) for name, val in d.groupdict().items()}
return d


def _update_variant_label(label: str, gattrs: dict) -> dict:
"Add variant_label to global attributes"
variant_label_indices = _parse_variant_label(label)
gattrs |= variant_label_indices
gattrs["variant_label"] = label
return gattrs


def load_cvs(path: Path):
"Loads all controlled vocabilaries at given path as dict mapping"
d = {}
for p in path.glob("*.json"):
with open(p) as fid:
d |= json.load(fid)
d.pop("version_metadata")
return d


def update_license(
gattrs: dict,
cv: dict,
institution_id: str = None,
license_type: str = None,
further_info_url: str = None,
):
"""
Updates the license attribute in the global attributes dictionary.

Args:
gattrs (dict): The global attributes dictionary to update.
cv (dict): The controlled vocabulary dictionary.
institution_id (str, optional): The institution ID. Defaults to None.
license_type (str, optional): The license type. Defaults to None.
further_info_url (str, optional): The maintainer URL. Defaults to None.

Returns:
None

Raises:
None
"""

institution_id = institution_id or defaults.get("institution_id")
license_type = license_type or defaults.get("license_type")
further_info_url = further_info_url or defaults.get("further_info_url")
logger.debug(f"{institution_id=}")
logger.debug(f"{license_type=}")
logger.debug(f"{further_info_url=}")
lic = cv["license"]
license_text = lic["license"]
license_id = lic["license_options"][license_type]["license_id"]
license_url = lic["license_options"][license_type]["license_url"]
if further_info_url is None:
logger.debug(
"Removing placeholder for maintainer url from license text as it is not provided."
)
license_text = re.sub(r"\[.*?\]", "", license_text)
institution = cv["institution_id"][institution_id]

def make_placeholders(text):
return re.sub(r"<.*?>", "{}", text)

logger.debug(
"Creating place-holders in license template found in CMIP6_license.json"
)
text = make_placeholders(license_text)
if further_info_url is None:
text = text.format(institution, license_id, license_url)
else:
text = text.format(institution, license_id, license_url, further_info_url)
logger.debug(f"License: {text}")
gattrs["license"] = text
86 changes: 86 additions & 0 deletions src/pymorize/global_attributes_checklist.org
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
* GLOBAL ATTRIBUTES

reference CMIP6_required_global_attributes.json

| index | NAME | IMPLEMENTED | SOURCE | EXAMPLE |
|-------+----------------------+-------------+---------------+---------------------------------------------------|
| 1 | activity_id | x | USER | CMIP |
| 2 | Conventions | x | table | CF-1.7 CMIP-6.2 |
| 3 | creation_date | | | 2018-12-18T12:00:00Z |
| 4 | data_specs_version | x | USER / table | 01.00.27 |
| 5 | experiment | | | piControl |
| 6 | experiment_id | | | piControl |
| 7 | forcing_index | x | derived from | 1 |
| | | | variant_label | |
| 8 | frequency | x | table | mon |
| 9 | further_info_url | x | USER | *too_long_to_list_here |
| | | | (optional, | |
| | | | default: | |
| | | | None) | |
| 10 | grid | | | *too_long_to_list_here |
| 11 | grid_label | | | gn |
| 12 | initialization_index | x | derived from | 1 |
| | | | variant_label | |
| 13 | institution | | | *too_long_to_list_here |
| 14 | institution_id | x | using | AWI |
| | | | default: AWI | |
| 15 | license | x | CV | *too_long_to_list_here |
| 16 | mip_era | x | table | CMIP6 |
| 17 | nominal_resolution | | | 25 km |
| 18 | physics_index | x | derived from | 1 |
| | | | variant_label | |
| 19 | product | x | table | model-output |
| 20 | realization_index | x | derived from | 1 |
| | | | variant_label | |
| 21 | realm | x | table | ocean |
| 22 | source | | | AWI-CM-1-1-MR |
| 23 | source_id | | | AWI-CM-1-1-MR |
| 24 | source_type | | | AOGCM |
| 25 | sub_experiment | | | none |
| 26 | sub_experiment_id | | | none |
| 27 | table_id | x | USER / use | Omon |
| | | | all matching | |
| | | | tables | |
| 28 | tracking_id | | | hdl:21.14100/84bfc093-b0a3-44ee-b733-91239b6fa6b2 |
| 29 | variable_id | x | USER | fgco2 |
| 30 | variant_label | x | USER | r1i1p1f1 |



EXAMPLE
- further_info_url: "https://furtherinfo.es-doc.org/CMIP6.AWI.AWI-CM-1-1-MR.piControl.none.r1i1p1f1" ;
- grid: "FESOM 1.4 (unstructured grid in the horizontal with 830305 wet nodes; 46 levels; top grid cell 0-5 m)"
- institution: "Alfred Wegener Institute, Helmholtz Centre for Polar and Marine Research, Am Handelshafen 12, 27570 Bremerhaven, Germany"
- license: "CMIP6 model data produced by Alfred Wegener Institute, Helmholtz
Centre for Polar and Marine Research, Am Handelshafen 12, 27570 Bremerhaven,
Germany is licensed under a Creative Commons Attribution-ShareAlike 4.0
International License (https://creativecommons.org/licenses/). Consult
https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing CMIP6
output, including citation requirements and proper acknowledgment. Further
information about this data, including some limitations, can be found via
the further_info_url (recorded as a global attribute in this file). The data
producers and data providers make no warranty, either express or implied,
including, but not limited to, warranties of merchantability and fitness for
a particular purpose. All liabilities arising from the supply of the
information (including any liability arising in negligence) are excluded to
the fullest extent permitted by law."
- branch_method: "standard"
- branch_time_in_child: 0.
- branch_time_in_parent: 182622.
- parent_activity_id: "CMIP"
- parent_experiment_id: "piControl-spinup"
- parent_mip_era: "CMIP6"
- parent_source_id: "AWI-CM-1-1-MR"
- parent_time_units: "days since 1901-1-1"
- parent_variant_label: "r1i1p1f1"


COMMENT
4. `data_specs_version`: At the moment using Tables with a specific version ("01.00.13" for instance).
Exposing this attribute to user means, fetching user defined version tables from git.
Currently not implemented.
27. `table_id`: [Optional] A CMOR_variable may be in more than one table.
If user does not specify a table_id, then all matching table for this variable
is considered.


1 change: 1 addition & 0 deletions tests/configs/fesom_sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,4 @@ rules:
input_patterns:
- /path/to/fesom/output/files/*_temp.nc
output_file: temp.nc

Loading
Loading