From 721787c3a87ff255183b8a6e89d265d8be25ebf9 Mon Sep 17 00:00:00 2001 From: garciam Date: Mon, 14 Oct 2024 09:53:55 +0200 Subject: [PATCH 1/2] add record_timestamp to CDM Core --- cdsobs/cdm/lite.py | 1 + cdsobs/ingestion/readers/cuon.py | 6 +++++- tests/retrieve/test_adaptor.py | 8 +++----- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/cdsobs/cdm/lite.py b/cdsobs/cdm/lite.py index 428a275..a65893d 100644 --- a/cdsobs/cdm/lite.py +++ b/cdsobs/cdm/lite.py @@ -60,6 +60,7 @@ "fg_depar@body", "an_depar@body", "fg_depar@offline", + "record_timestamp", ] # Add uncetainty numbered vars programatycally, as they are to many to add by hand to # the list diff --git a/cdsobs/ingestion/readers/cuon.py b/cdsobs/ingestion/readers/cuon.py index 3e9adb2..c233f3e 100644 --- a/cdsobs/ingestion/readers/cuon.py +++ b/cdsobs/ingestion/readers/cuon.py @@ -402,7 +402,7 @@ def get_denormalized_table_file( ) # Decode time if len(denormalized_table_file) > 0: - for time_field in ["record_timestamp", "report_timestamp"]: + for time_field in ["record_timestamp", "report_timestamp", "date_time"]: denormalized_table_file.loc[:, time_field] = cftime.num2date( denormalized_table_file.loc[:, time_field], constants.TIME_UNITS, @@ -461,6 +461,10 @@ def _fix_table_data( ).astype("bytes") # Remove missing values to save memory table_data = table_data.loc[~table_data.observation_value.isnull()] + # Fix latitude and longitude adding lond and latd so they really represent the + # measurement location + table_data["latitude"] += table_data["latd"] + table_data["longitude"] += table_data["lond"] # Remove duplicate station records if table_name == "station_configuration": table_data = table_data.drop_duplicates( diff --git a/tests/retrieve/test_adaptor.py b/tests/retrieve/test_adaptor.py index f613e10..48b7e31 100644 --- a/tests/retrieve/test_adaptor.py +++ b/tests/retrieve/test_adaptor.py @@ -23,7 +23,7 @@ def test_adaptor(tmp_path): test_adaptor_config = { "entry_point": "cads_adaptors:ObservationsAdaptor", "collection_id": "insitu-observations-woudc-ozone-total-column-and-profiles", - "obs_api_url": "http://obscatalogue.cads-obs.compute.cci2.ecmwf.int", + "obs_api_url": "http://localhost:8000", "mapping": { "remap": { "observation_type": { @@ -51,11 +51,9 @@ def test_adaptor_uscrn(tmp_path): test_request = { "time_aggregation": "daily", - "format": "netCDF", + "format": "csv", "variable": [ - "maximum_air_temperature", - "maximum_air_temperature_negative_total_uncertainty", - "maximum_air_temperature_positive_total_uncertainty", + "air_temperature", ], "year": ["2007"], "month": ["11"], From 2ebecfcbff46d0ea669b0c715ccf50a4ef6bf2bb Mon Sep 17 00:00:00 2001 From: garciam Date: Fri, 18 Oct 2024 16:26:56 +0200 Subject: [PATCH 2/2] save desroziers uncertainry as random (type 1) --- cdsobs/cdm/denormalize.py | 1 + cdsobs/data/cdsobs_config_template.yml | 1 + .../service_definition.yml | 10 ++++++++++ cdsobs/ingestion/readers/cuon.py | 7 +++++++ tests/retrieve/test_api.py | 9 +++------ 5 files changed, 22 insertions(+), 6 deletions(-) diff --git a/cdsobs/cdm/denormalize.py b/cdsobs/cdm/denormalize.py index 1970a33..0fea3ab 100644 --- a/cdsobs/cdm/denormalize.py +++ b/cdsobs/cdm/denormalize.py @@ -38,6 +38,7 @@ def denormalize_tables( "era5fb_table", "advanced_homogenisation", "advanced_uncertainty", + "uncertainty_table", ]: if table_name in tables_to_use: if len(dataset_cdm["observations_table"]) != len(dataset_cdm[table_name]): diff --git a/cdsobs/data/cdsobs_config_template.yml b/cdsobs/data/cdsobs_config_template.yml index ccc2bbf..e778ff6 100644 --- a/cdsobs/data/cdsobs_config_template.yml +++ b/cdsobs/data/cdsobs_config_template.yml @@ -86,6 +86,7 @@ datasets: - era5fb_table - advanced_homogenisation - advanced_uncertainty + - uncertainty_table reader: "cdsobs.ingestion.readers.cuon.read_cuon_netcdfs" reader_extra_args: input_dir: "test" diff --git a/cdsobs/data/insitu-comprehensive-upper-air-observation-network/service_definition.yml b/cdsobs/data/insitu-comprehensive-upper-air-observation-network/service_definition.yml index 2b588c5..b5dee7e 100644 --- a/cdsobs/data/insitu-comprehensive-upper-air-observation-network/service_definition.yml +++ b/cdsobs/data/insitu-comprehensive-upper-air-observation-network/service_definition.yml @@ -155,6 +155,16 @@ sources: station_type: description: Type of station, e.g. land station, sea station etc dtype: int32 + uncertainty_value: + description: Uncertainty value + dtype: float32 + units: Defined in uncertainty_unitsN + uncertainty_type: + description: Uncertainty type + dtype: uint8 + uncertainty_units: + description: Units for uncertainty + dtype: object units: description: abbreviated name of SI units of observation value, e.g. K, m/s or J/kg dtype: object diff --git a/cdsobs/ingestion/readers/cuon.py b/cdsobs/ingestion/readers/cuon.py index c233f3e..24db2c0 100644 --- a/cdsobs/ingestion/readers/cuon.py +++ b/cdsobs/ingestion/readers/cuon.py @@ -504,6 +504,13 @@ def _fix_table_data( table_data = table_data.set_index(obs_id_name).rename( {"index": f"index|{table_name}"}, axis=1 ) + # Rename uncertainty + if table_name == "advanced_uncertainty": + table_data = table_data.rename(dict(desroziers_30="uncertainty_value"), axis=1) + table_data["uncertainty_type"] = 1 + table_data.loc[:, "uncertainty_units"] = dataset_cdm["observations_table"][ + "units" + ].values if "level_0" in table_data: table_data = table_data.drop("level_0", axis=1) primary_keys_are_unique = ( diff --git a/tests/retrieve/test_api.py b/tests/retrieve/test_api.py index 16fb285..a269c0a 100644 --- a/tests/retrieve/test_api.py +++ b/tests/retrieve/test_api.py @@ -5,8 +5,6 @@ import pytest import xarray -from cdsobs.config import CDSObsConfig -from cdsobs.constants import CONFIG_YML from cdsobs.retrieve.api import retrieve_observations from cdsobs.retrieve.models import RetrieveArgs from cdsobs.storage import S3Client @@ -81,12 +79,11 @@ def test_retrieve( @pytest.mark.skip("Too slow") -def test_retrieve_cuon(): - test_config = CDSObsConfig.from_yaml(CONFIG_YML) +def test_retrieve_cuon(test_repository, test_config): dataset_name = "insitu-comprehensive-upper-air-observation-network" params = { "dataset_source": "CUON", - "time_coverage": ["2000-01-01 00:00:00", "2000-12-31 00:00:00"], + "time_coverage": ["1960-01-01 00:00:00", "1960-02-28 00:00:00"], "variables": [ "aerosol_absorption_optical_depth", "air_temperature", @@ -103,7 +100,7 @@ def test_retrieve_cuon(): Path("/tmp"), size_limit=1000000000000, ) - print(output_file) + (print(output_file) @ pytest.mark.skip("Too slow")) def test_retrieve_gruan(test_repository, test_config):