Skip to content

Commit

Permalink
skip unneeded variables
Browse files Browse the repository at this point in the history
As agreed with UNIVIE. Also use longitudes and latitudes from header and to from station_configuration
  • Loading branch information
garciampred committed Aug 14, 2024
1 parent 60afd48 commit 99c68c8
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -199,5 +199,5 @@ sources:
- desroziers_30_uncertainy
group_name: uncertainty
space_columns:
y: latitude|station_configuration
x: longitude|station_configuration
y: latitude|header_table
x: longitude|header_table
16 changes: 4 additions & 12 deletions cdsobs/ingestion/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from importlib import import_module
from pathlib import Path
from typing import List, Tuple
from typing import List

import numpy
import pandas
Expand Down Expand Up @@ -151,20 +151,12 @@ def cast_to_descriptions(
return data_renamed


def _get_latlon_names(data: pandas.DataFrame) -> Tuple[str, str]:
if "longitude" in data:
latname = "latitude"
lonname = "longitude"
else:
latname = "latitude|station_configuration"
lonname = "longitude|station_configuration"
return latname, lonname


def sort(partition: DatasetPartition) -> DatasetPartition:
"""Sort data of a partition."""
logger.info("Sorting partition data")
latname, lonname = _get_latlon_names(partition.data)
space_columns = partition.dataset_metadata.space_columns
latname = space_columns.y
lonname = space_columns.x
partition.data.sort_values(
by=["report_timestamp", latname, lonname], kind="mergesort", inplace=True
)
Expand Down
18 changes: 15 additions & 3 deletions cdsobs/ingestion/readers/cuon.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ def _process_table(
"eda_spread@errstat",
"processing_level",
"location_method",
"source_id",
"crs",
]
file_vars = [
fv
Expand All @@ -141,7 +143,11 @@ def _process_table(
logger.debug(f"Reading variable {variable}")
selector = slices[variable]
# dropping string dims - not necessary for dataframes
fields = [f for f in hfile[table_name] if "string" not in f]
fields = [
f
for f in hfile[table_name]
if "string" not in f and f not in vals_to_exclude
]
data: dict[str, numpy.ndarray] = {
field: _get_field_data(field, hfile, selector, table_name)
for field in fields
Expand Down Expand Up @@ -409,7 +415,13 @@ def _fix_table_data(
):
# the name in station_configuration
if table_name == "header_table":
vars_to_drop = ["station_name", "platform_sub_type", "platform_type"]
vars_to_drop = [
"station_name",
"platform_sub_type",
"platform_type",
"station_type",
"crs",
]
table_data = table_data.drop(vars_to_drop, axis=1, errors="ignore")
# Check that observation id is unique and fix if not
if table_name == "observations_table":
Expand All @@ -432,7 +444,7 @@ def _fix_table_data(
table_data = table_data.drop_duplicates(
subset=["primary_id", "record_number"], ignore_index=True
)
table_data = table_data.drop(["latitude", "longitude"])
table_data = table_data.drop(["latitude", "longitude"], axis=1)
# Check primary keys can be used to build a unique index
primary_keys = table_definition.primary_keys
if table_name in [
Expand Down

0 comments on commit 99c68c8

Please sign in to comment.