From e9d21ce112486b208f301c76ff6b690daec577c9 Mon Sep 17 00:00:00 2001 From: Heiko Klein Date: Wed, 17 Jul 2024 14:09:38 +0000 Subject: [PATCH 1/9] extract interface, make all other properties private --- pyaerocom/io/gridded_model_reader.py | 101 +++++++++++++++++++++ pyaerocom/io/mscw_ctm/reader.py | 127 ++++++++++++++------------- tests/colocation/test_colocator.py | 2 +- tests/io/mscw_ctm/test_reader.py | 54 ++++++------ 4 files changed, 196 insertions(+), 88 deletions(-) create mode 100644 pyaerocom/io/gridded_model_reader.py diff --git a/pyaerocom/io/gridded_model_reader.py b/pyaerocom/io/gridded_model_reader.py new file mode 100644 index 000000000..355c853d4 --- /dev/null +++ b/pyaerocom/io/gridded_model_reader.py @@ -0,0 +1,101 @@ +import abc + +from pyaerocom.griddeddata import GriddedData + + +class GriddedModelReader(abc.ABC): + """Abstract base class for griddel model reader used for collocation""" + + @property + @abc.abstractmethod + def data_id(self) -> str: + """ + Data ID of dataset + """ + pass + + @property + @abc.abstractmethod + def ts_type(self): + """ + Frequency of time dimension of current data file. Since a reader + might have multiple ts_types, this value is volatile. + + Raises + ------ + AttributeError + if :attr:`filename` is not set. + + Returns + ------- + str + current ts_type. + + """ + pass + + @property + @abc.abstractmethod + def ts_types(self): + """ + List of available frequencies + + Raises + ------ + AttributeError + if :attr:`data_dir` is not set. + + Returns + ------- + list + list of available frequencies + + """ + pass + + @property + @abc.abstractmethod + def years_avail(self) -> list: + """ + Years available in dataset + """ + pass + + @property + @abc.abstractmethod + def vars_provided(self): + """Variables provided by this dataset""" + pass + + @abc.abstractmethod + def has_var(self, var_name): + """Check if variable is supported + + Parameters + ---------- + var_name : str + variable to be checked + + Returns + ------- + bool + """ + pass + + @abc.abstractmethod + def read_var(self, var_name, ts_type=None, **kwargs) -> GriddedData: + """Load data for given variable. + + Parameters + ---------- + var_name : str + Variable to be read + ts_type : str + Temporal resolution of data to read. Supported are + "hourly", "daily", "monthly" , "yearly". + + Returns + ------- + GriddedData + """ + pass diff --git a/pyaerocom/io/mscw_ctm/reader.py b/pyaerocom/io/mscw_ctm/reader.py index 1a252ef52..3b6f3df34 100755 --- a/pyaerocom/io/mscw_ctm/reader.py +++ b/pyaerocom/io/mscw_ctm/reader.py @@ -4,6 +4,7 @@ import os import re import warnings +from collections import namedtuple import numpy as np import xarray as xr @@ -11,6 +12,7 @@ from pyaerocom import const from pyaerocom.exceptions import VarNotAvailableError from pyaerocom.griddeddata import GriddedData +from pyaerocom.io.gridded_model_reader import GriddedModelReader from pyaerocom.units_helpers import UALIASES from .additional_variables import ( @@ -45,7 +47,7 @@ logger = logging.getLogger(__name__) -class ReadMscwCtm: +class ReadMscwCtm(GriddedModelReader): """ Class for reading model output from the EMEP MSC-W chemical transport model. @@ -174,15 +176,17 @@ class ReadMscwCtm: #: pattern for 4-digit years for 19XX and 20XX used for trend subdirectories YEAR_PATTERN = r".*((?:19|20)\d\d).*" + class _PrivateFields: + filename = None + filedata = None + filepaths = None + files = None + data_dir = None + def __init__(self, data_id=None, data_dir=None, **kwargs): - self._data_dir = None # opened dataset (for performance boost), will be reset if data_dir is # changed - self._filename = None - self._filedata = None - self._filepaths = None - - self._files = None + self._private = self._PrivateFields() self.var_map = emep_variables() if "emep_vars" in kwargs: @@ -196,14 +200,14 @@ def __init__(self, data_id=None, data_dir=None, **kwargs): if not isinstance(data_dir, str) or not os.path.exists(data_dir): raise FileNotFoundError(f"{data_dir}") - self.data_dir = data_dir + self._data_dir = data_dir - self.data_id = data_id - self._filename = self.DEFAULT_FILE_NAME + self._data_id = data_id + self._private.filename = self.DEFAULT_FILE_NAME - def search_all_files(self): + def _search_all_files(self): folders = self._get_trend_folders_from_folder() - self.filepaths = self._get_files_from_folders(folders) + self._filepaths = self._get_files_from_folders(folders) def _get_files_from_folders(self, folders): files = [] @@ -234,7 +238,7 @@ def _get_trend_folders_from_folder(self): List of the names of the subfolder """ - dd = self.data_dir + dd = self._data_dir mscwfiles = [] for freq in self.FREQ_CODES.keys(): @@ -277,7 +281,7 @@ def _get_yrs_from_filepaths(self) -> list[str]: :return: list of years as str """ - fps = self.filepaths + fps = self._filepaths yrs = [] for fp in fps: try: @@ -331,67 +335,71 @@ def _clean_filepaths(self, filepaths, yrs, ts_type): return [d for _, d in sorted(zip(found_yrs, clean_paths))] @property - def data_dir(self): + def data_id(self): + return self._data_id + + @property + def _data_dir(self): """ Directory containing netcdf files """ - if self._data_dir is None: + if self._private.data_dir is None: raise AttributeError(f"data_dir needs to be set before accessing") - return self._data_dir + return self._private.data_dir - @data_dir.setter - def data_dir(self, val): + @_data_dir.setter + def _data_dir(self, val): if val is None: raise ValueError(f"Data dir {val} needs to be a dictionary or a file") if not os.path.isdir(val): raise FileNotFoundError(val) - self._data_dir = val - self._filedata = None - self.search_all_files() - self._files = self.filepaths + self._private.data_dir = val + self._private.filedata = None + self._search_all_files() + self._private.files = self._filepaths @property - def filename(self): + def _filename(self): """ Name of latest netcdf file read """ - return self._filename + return self._private.filename - @filename.setter - def filename(self, val): + @_filename.setter + def _filename(self, val): """ Name of netcdf file """ if not isinstance(val, str): # pragma: no cover raise ValueError("needs str") - elif val == self._filename: + elif val == self._private.filename: return - self._filename = val - self._filedata = None + self._private.filename = val + self._private.filedata = None @property - def filepaths(self): + def _filepaths(self): """ Path to data file """ - if self.data_dir is None and self._filepaths is None: # pragma: no cover + if self._data_dir is None and self._filepaths is None: # pragma: no cover raise AttributeError("data_dir or filepaths needs to be set before accessing") - return self._filepaths + return self._private.filepaths - @filepaths.setter - def filepaths(self, value): + @_filepaths.setter + def _filepaths(self, value): if not isinstance(value, list): # pragma: no cover raise ValueError("needs to be list of strings") - self._filepaths = value + self._private.filepaths = value @property - def filedata(self): + def _filedata(self): """ Loaded netcdf file (:class:`xarray.Dataset`) """ - if self._filedata is None: - self.open_file() - return self._filedata + if self._private.filedata is None: + self._open_file() + return self._private.filedata def _check_files_in_data_dir(self, data_dir): """ @@ -445,7 +453,7 @@ def ts_type(self): current ts_type. """ - return self.ts_type_from_filename(self.filename) + return self._ts_type_from_filename(self._filename) @property def ts_types(self): @@ -463,11 +471,11 @@ def ts_types(self): list of available frequencies """ - if not isinstance(self._files, list): + if not isinstance(self._private.files, list): raise AttributeError("please set data_dir first") tsts = [] - for file in self._files: - tsts.append(self.ts_type_from_filename(file)) + for file in self._private.files: + tsts.append(self._ts_type_from_filename(file)) return list(set(tsts)) @property @@ -475,7 +483,6 @@ def years_avail(self): """ Years available in loaded dataset """ - data = self.filepaths years = self._get_yrs_from_filepaths() years = list(np.unique(years)) @@ -486,7 +493,7 @@ def vars_provided(self): """Variables provided by this dataset""" return list(self.var_map) + list(self.AUX_REQUIRES) - def open_file(self): + def _open_file(self): """ Open current netcdf file @@ -496,19 +503,19 @@ def open_file(self): Dict with years as keys and Datasets as items """ - fps = self.filepaths + fps = self._filepaths ds = {} yrs = self._get_yrs_from_filepaths() - ts_type = self.ts_type_from_filename(self.filename) + ts_type = self._ts_type_from_filename(self._filename) fps = self._clean_filepaths(fps, yrs, ts_type) if len(fps) > 1 and ts_type == "hourly": raise ValueError(f"ts_type {ts_type} can not be hourly when using multiple years") logger.info(f"Opening {fps}") ds = xr.open_mfdataset(fps, chunks={"time": 24}) - self._filedata = ds + self._private.filedata = ds return ds @@ -535,7 +542,7 @@ def has_var(self, var_name): return True return False - def ts_type_from_filename(self, filename): + def _ts_type_from_filename(self, filename): """ Get ts_type from filename @@ -558,7 +565,7 @@ def ts_type_from_filename(self, filename): return tstype raise ValueError(f"Failed to retrieve ts_type from filename {filename}") - def filename_from_ts_type(self, ts_type): + def _filename_from_ts_type(self, ts_type): """ Infer file name of data based on input ts_type @@ -666,14 +673,14 @@ def read_var(self, var_name, ts_type=None, **kwargs): var = const.VARS[var_name] var_name_aerocom = var.var_name_aerocom - if self.data_dir is None: # pragma: no cover + if self._data_dir is None: # pragma: no cover raise ValueError("data_dir must be set before reading.") - elif self.filename is None and ts_type is None: # pragma: no cover + elif self._filename is None and ts_type is None: # pragma: no cover raise ValueError("please specify ts_type") elif ts_type is not None: # filename and ts_type are set. update filename if ts_type suggests # that current file has different resolution - self.filename = self.filename_from_ts_type(ts_type) + self._filename = self._filename_from_ts_type(ts_type) ts_type = self.ts_type @@ -701,8 +708,8 @@ def read_var(self, var_name, ts_type=None, **kwargs): # At this point a GriddedData object with name gridded should exist - gridded.metadata["data_id"] = self.data_id - gridded.metadata["from_files"] = self.filepaths + gridded.metadata["data_id"] = self._data_id + gridded.metadata["from_files"] = self._filepaths # Remove unneccessary metadata. Better way to do this? for metadata in ["current_date_first", "current_date_last"]: @@ -737,22 +744,22 @@ def _read_var_from_file(self, var_name_aerocom, ts_type): emep_var = self.var_map[var_name_aerocom] try: - filedata = self.filedata + filedata = self._filedata data = filedata[emep_var] except KeyError: raise VarNotAvailableError( - f"{var_name_aerocom} ({emep_var}) not available in {self.filename}" + f"{var_name_aerocom} ({emep_var}) not available in {self._filename}" ) data.attrs["long_name"] = var_name_aerocom data.time.attrs["long_name"] = "time" data.time.attrs["standard_name"] = "time" prefix = emep_var.split("_")[0] - data.attrs["units"] = self.preprocess_units(data.units, prefix) + data.attrs["units"] = self._preprocess_units(data.units, prefix) return data @staticmethod - def preprocess_units(units, prefix): + def _preprocess_units(units, prefix): """ Update units for certain variables diff --git a/tests/colocation/test_colocator.py b/tests/colocation/test_colocator.py index 1431ffd7d..27ba371ca 100644 --- a/tests/colocation/test_colocator.py +++ b/tests/colocation/test_colocator.py @@ -371,7 +371,7 @@ def test_colocator_instantiate_gridded_reader_model_data_dir(setup, path_emep): col = Colocator(col_stp) r = col._instantiate_gridded_reader(what="model") assert isinstance(r, ReadMscwCtm) - assert r.data_dir == model_data_dir + assert r._data_dir == model_data_dir assert r.data_id == model_id diff --git a/tests/io/mscw_ctm/test_reader.py b/tests/io/mscw_ctm/test_reader.py index 478f27a13..7f144c90e 100644 --- a/tests/io/mscw_ctm/test_reader.py +++ b/tests/io/mscw_ctm/test_reader.py @@ -148,7 +148,7 @@ def test_ReadMscwCtm__get_year_from_nc(data_dir: str): def test_ReadMscwCtm__init__(data_dir: str): reader = ReadMscwCtm("EMEP_2017", data_dir) assert getattr(reader, "data_id") == "EMEP_2017" - assert getattr(reader, "data_dir") == data_dir + assert getattr(reader, "_data_dir") == data_dir def test_ReadMscwCtm__init___error(): @@ -160,8 +160,8 @@ def test_ReadMscwCtm__init___error(): def test_ReadMscwCtm_data_dir(data_dir: str): reader = ReadMscwCtm() - reader.data_dir = data_dir - assert Path(reader.data_dir) == Path(data_dir) + reader._data_dir = data_dir + assert Path(reader._data_dir) == Path(data_dir) @pytest.mark.parametrize( @@ -174,7 +174,7 @@ def test_ReadMscwCtm_data_dir(data_dir: str): def test_ReadMscwCtm_data_dir_error(value, exception, error: str): reader = ReadMscwCtm(value) with pytest.raises(exception) as e: - reader.data_dir = value + reader._data_dir = value assert str(e.value) == error @@ -276,12 +276,12 @@ def test_ReadMscwCtm_data(data_dir: str): def test_ReadMscwCtm_directory(data_dir: str): reader = ReadMscwCtm(data_dir=data_dir) - assert reader.data_dir == data_dir + assert reader._data_dir == data_dir vars_provided = reader.vars_provided assert "vmro3" in vars_provided assert "concpm10" in vars_provided assert "concno2" in vars_provided - paths = reader.filepaths + paths = reader._filepaths assert len(paths) == 3 @@ -295,12 +295,12 @@ def test_ReadMscwCtm_directory(data_dir: str): ], ) def test_ReadMscwCtm_ts_type_from_filename(reader, filename, ts_type): - assert reader.ts_type_from_filename(filename) == ts_type + assert reader._ts_type_from_filename(filename) == ts_type def test_ReadMscwCtm_ts_type_from_filename_error(reader): with pytest.raises(ValueError) as e: - reader.ts_type_from_filename("blaaa") + reader._ts_type_from_filename("blaaa") assert str(e.value) == "Failed to retrieve ts_type from filename blaaa" @@ -314,12 +314,12 @@ def test_ReadMscwCtm_ts_type_from_filename_error(reader): ], ) def test_ReadMscwCtm_filename_from_ts_type(reader, filename, ts_type): - assert reader.filename_from_ts_type(ts_type) == filename + assert reader._filename_from_ts_type(ts_type) == filename def test_ReadMscwCtm_filename_from_ts_type_error(reader): with pytest.raises(ValueError) as e: - reader.filename_from_ts_type("blaaa") + reader._filename_from_ts_type("blaaa") assert str(e.value) == "unknown ts_type=blaaa" @@ -331,15 +331,15 @@ def test_ReadMscwCtm_years_avail(data_dir: str): def test_ReadMscwCtm_preprocess_units(): units = "" prefix = "AOD" - assert ReadMscwCtm().preprocess_units(units, prefix) == "1" + assert ReadMscwCtm()._preprocess_units(units, prefix) == "1" def test_ReadMscwCtm_open_file(data_dir: str): reader = ReadMscwCtm() with pytest.raises(AttributeError): - reader.open_file() - reader.data_dir = data_dir - data = reader.open_file() + reader._open_file() + reader._data_dir = data_dir + data = reader._open_file() assert isinstance(data, xr.Dataset) assert reader._filedata is data @@ -490,7 +490,7 @@ def test_read_emep_clean_filepaths(data_path: Path, year, years: list[int], freq reader = ReadMscwCtm(data_dir=str(data_path / year)) tst = reader.FREQ_CODES[freq] - filepaths = reader.filepaths + filepaths = reader._filepaths cleaned_paths = reader._clean_filepaths(filepaths, years, tst) assert len(cleaned_paths) == len(years) @@ -517,7 +517,7 @@ def test_read_emep_clean_filepaths(data_path: Path, year, years: list[int], freq def test_read_emep_clean_filepaths_error(data_path: Path, years, freq: str, error: str): reader = ReadMscwCtm(data_dir=str(data_path)) tst = reader.FREQ_CODES[freq] - filepaths = reader.filepaths + filepaths = reader._filepaths with pytest.raises(ValueError) as e: reader._clean_filepaths(filepaths, years, tst) @@ -535,7 +535,7 @@ def test_read_emep_clean_filepaths_error(data_path: Path, years, freq: str, erro def test_read_emep_wrong_filenames(data_path: Path, freq: str, wrong_name: str): reader = ReadMscwCtm(data_dir=str(data_path)) tst = reader.FREQ_CODES[freq] - filepaths = reader.filepaths + filepaths = reader._filepaths years = reader._get_yrs_from_filepaths() cleaned_paths = reader._clean_filepaths(filepaths, years, tst) @@ -559,7 +559,7 @@ def test_read_emep_wrong_filenames(data_path: Path, freq: str, wrong_name: str): def test_read_emep_wrong_tst(data_path: Path, wrong_tst: str): reader = ReadMscwCtm(data_dir=str(data_path)) with pytest.raises(ValueError) as e: - filepaths = reader.filepaths + filepaths = reader._filepaths wrong_path = Path(filepaths[0]).with_name(f"Base_{wrong_tst}.nc") reader._get_tst_from_file(str(wrong_path)) @@ -570,7 +570,7 @@ def test_read_emep_LF_tst(tmp_path: Path): data_path = emep_data_path(tmp_path, "month", vars_and_units={"prmm": "mm"}) reader = ReadMscwCtm(data_dir=str(data_path)) with pytest.raises(ValueError) as e: - filepaths = reader.filepaths + filepaths = reader._filepaths wrong_path = Path(filepaths[0]).with_name(f"Base_LF_month.nc") reader._get_tst_from_file(str(wrong_path)) @@ -580,7 +580,7 @@ def test_read_emep_LF_tst(tmp_path: Path): def test_read_emep_year_defined_twice(tmp_path: Path): data_path = emep_data_path(tmp_path, "day", vars_and_units={"prmm": "mm"}) reader = ReadMscwCtm(data_dir=str(data_path)) - filepaths = reader.filepaths + filepaths = reader._filepaths wrong_path = Path(filepaths[0]).with_name(f"Base_day.nc") filepaths.append(str(wrong_path)) new_yrs = reader._get_yrs_from_filepaths() @@ -600,8 +600,8 @@ def test_read_emep_year_defined_twice(tmp_path: Path): @pytest.mark.parametrize("vars_and_units", [{"prmm": "mm"}]) def test_read_emep_multiple_dirs(data_path: Path, year: str, freq: str, num: int): reader = ReadMscwCtm(data_dir=str(data_path / year)) - assert len(reader.filepaths) == num - assert all(freq in Path(file).stem for file in reader.filepaths) + assert len(reader._filepaths) == num + assert all(freq in Path(file).stem for file in reader._filepaths) tst = reader.FREQ_CODES[freq] data = reader.read_var("prmm", ts_type=tst) @@ -628,15 +628,15 @@ def test_read_emep_multiple_dirs_hour_error(tmp_path: Path): def test_search_all_files(data_path: Path, year: str, num: int): reader = ReadMscwCtm() with pytest.raises(AttributeError): - reader.search_all_files() + reader._search_all_files() with pytest.raises(AttributeError): - reader.filepaths + reader._filepaths reader._data_dir = str(data_path / year) - reader.search_all_files() - assert len(reader.filepaths) == num + reader._search_all_files() + assert len(reader._filepaths) == num @pytest.mark.parametrize( @@ -654,7 +654,7 @@ def test_ts_types(data_path: Path, year: str, freq: list[str]): with pytest.raises(AttributeError): reader.ts_types - reader.data_dir = str(data_path / year) + reader._data_dir = str(data_path / year) ts_types = reader.ts_types assert len(ts_types) == len(freq) From bd2c18e1979c63d0e85dff7b2c13574b5cd8dfa1 Mon Sep 17 00:00:00 2001 From: Heiko Klein Date: Wed, 17 Jul 2024 14:31:49 +0000 Subject: [PATCH 2/9] remove ts_type from interface --- pyaerocom/io/gridded_model_reader.py | 29 +++++----------------------- pyaerocom/io/mscw_ctm/reader.py | 4 ++-- tests/io/mscw_ctm/test_reader.py | 4 ++-- 3 files changed, 9 insertions(+), 28 deletions(-) diff --git a/pyaerocom/io/gridded_model_reader.py b/pyaerocom/io/gridded_model_reader.py index 355c853d4..bbb73d07f 100644 --- a/pyaerocom/io/gridded_model_reader.py +++ b/pyaerocom/io/gridded_model_reader.py @@ -1,4 +1,5 @@ import abc +from typing import Iterator from pyaerocom.griddeddata import GriddedData @@ -16,27 +17,7 @@ def data_id(self) -> str: @property @abc.abstractmethod - def ts_type(self): - """ - Frequency of time dimension of current data file. Since a reader - might have multiple ts_types, this value is volatile. - - Raises - ------ - AttributeError - if :attr:`filename` is not set. - - Returns - ------- - str - current ts_type. - - """ - pass - - @property - @abc.abstractmethod - def ts_types(self): + def ts_types(self) -> Iterator[str]: """ List of available frequencies @@ -55,7 +36,7 @@ def ts_types(self): @property @abc.abstractmethod - def years_avail(self) -> list: + def years_avail(self) -> Iterator[str]: """ Years available in dataset """ @@ -63,12 +44,12 @@ def years_avail(self) -> list: @property @abc.abstractmethod - def vars_provided(self): + def vars_provided(self) -> Iterator[str]: """Variables provided by this dataset""" pass @abc.abstractmethod - def has_var(self, var_name): + def has_var(self, var_name) -> bool: """Check if variable is supported Parameters diff --git a/pyaerocom/io/mscw_ctm/reader.py b/pyaerocom/io/mscw_ctm/reader.py index 3b6f3df34..eb21ce3be 100755 --- a/pyaerocom/io/mscw_ctm/reader.py +++ b/pyaerocom/io/mscw_ctm/reader.py @@ -438,7 +438,7 @@ def _check_files_in_data_dir(self, data_dir): return matches @property - def ts_type(self): + def _ts_type(self): """ Frequency of time dimension of current data file @@ -682,7 +682,7 @@ def read_var(self, var_name, ts_type=None, **kwargs): # that current file has different resolution self._filename = self._filename_from_ts_type(ts_type) - ts_type = self.ts_type + ts_type = self._ts_type arr = self._load_var(var_name_aerocom, ts_type) if arr.units in UALIASES: diff --git a/tests/io/mscw_ctm/test_reader.py b/tests/io/mscw_ctm/test_reader.py index 7f144c90e..7e5b38ad2 100644 --- a/tests/io/mscw_ctm/test_reader.py +++ b/tests/io/mscw_ctm/test_reader.py @@ -192,7 +192,7 @@ def test__ReadMscwCtm__check_files_in_data_dir_error(): def test_ReadMscwCtm_ts_type(): reader = ReadMscwCtm() - assert reader.ts_type == "daily" + assert reader._ts_type == "daily" def test_ReadMscwCtm_var_map(): @@ -211,7 +211,7 @@ def test_ReadMscwCtm_read_var(var_name: str, ts_type: str, data_dir: str): if ts_type is not None: assert data.ts_type == ts_type assert data.ts_type is not None - assert data.ts_type == reader.ts_type + assert data.ts_type == reader._ts_type @pytest.mark.parametrize( From ac995bc8c090d75b11f4e94b4f2e7a11012ec7a8 Mon Sep 17 00:00:00 2001 From: Heiko Klein Date: Wed, 17 Jul 2024 15:17:31 +0000 Subject: [PATCH 3/9] all gridded model readers use same interface --- pyaerocom/io/cams2_83/reader.py | 22 +++++++++++++++++++++- pyaerocom/io/readgridded.py | 3 ++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/pyaerocom/io/cams2_83/reader.py b/pyaerocom/io/cams2_83/reader.py index 375c47e4b..6f35782a2 100644 --- a/pyaerocom/io/cams2_83/reader.py +++ b/pyaerocom/io/cams2_83/reader.py @@ -14,6 +14,7 @@ from pyaerocom import const from pyaerocom.griddeddata import GriddedData from pyaerocom.io.cams2_83.models import ModelData, ModelName, RunType +from pyaerocom.io.gridded_model_reader import GriddedModelReader # from pyaerocom.units_helpers import UALIASES @@ -220,7 +221,7 @@ def check_files(paths: list[Path]) -> list[Path]: return new_paths -class ReadCAMS2_83: +class ReadCAMS2_83(GriddedModelReader): FREQ_CODES = dict(hour="hourly", day="daily", month="monthly", fullrun="yearly") REVERSE_FREQ_CODES = {val: key for key, val in FREQ_CODES.items()} @@ -291,6 +292,20 @@ def data_id(self, val): self.model = ModelName[model] self.forecast_day = int(day) + @property + def years_avail(self): + return np.unique( + reader.daterange.values.astype("datetime64[Y]").astype("int") + 1970 + ).astype("str") + + @property + def ts_types(self): + return self.REVERSE_FREQ_CODES.keys() + + @property + def vars_provided(self): + return AEROCOM_NAMES.values() + @property def run_type(self): if self._run_type is None: @@ -443,6 +458,11 @@ def read_var(self, var_name: str, ts_type: str | None = None, **kwargs) -> Gridd data_id = "CAMS2-83.EMEP.day0.AN" reader = ReadCAMS2_83(data_dir=data_dir, data_id=data_id) reader.daterange = ("2021-12-01", "2021-12-04") + print( + np.unique(reader.daterange.values.astype("datetime64[Y]").astype("int") + 1970).astype( + "str" + ) + ) print(reader.filepaths) # dates = ("2021-12-01", "2021-12-04") diff --git a/pyaerocom/io/readgridded.py b/pyaerocom/io/readgridded.py index a514a7283..f0d4c8ddc 100755 --- a/pyaerocom/io/readgridded.py +++ b/pyaerocom/io/readgridded.py @@ -45,6 +45,7 @@ subtract_cubes, ) from pyaerocom.io.fileconventions import FileConventionRead +from pyaerocom.io.gridded_model_reader import GriddedModelReader from pyaerocom.io.helpers import add_file_to_log from pyaerocom.io.iris_io import concatenate_iris_cubes, load_cubes_custom from pyaerocom.metastandards import AerocomDataID @@ -54,7 +55,7 @@ logger = logging.getLogger(__name__) -class ReadGridded: +class ReadGridded(GriddedModelReader): """Class for reading gridded files using AeroCom file conventions Attributes From c6b4e2edf79ac100c0a8510fa29d7efed08933c1 Mon Sep 17 00:00:00 2001 From: Heiko Klein Date: Thu, 18 Jul 2024 11:13:46 +0000 Subject: [PATCH 4/9] config files --- pyaerocom/io/uemep/__init__.py | 0 pyaerocom/io/uemep/uemep_variables.ini | 6 ++++++ 2 files changed, 6 insertions(+) create mode 100644 pyaerocom/io/uemep/__init__.py create mode 100644 pyaerocom/io/uemep/uemep_variables.ini diff --git a/pyaerocom/io/uemep/__init__.py b/pyaerocom/io/uemep/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pyaerocom/io/uemep/uemep_variables.ini b/pyaerocom/io/uemep/uemep_variables.ini new file mode 100644 index 000000000..8c701f52a --- /dev/null +++ b/pyaerocom/io/uemep/uemep_variables.ini @@ -0,0 +1,6 @@ +[uemep_variables] +concno2 = no2_concentration +conco3 = o3_concentration +concpm25 = pm25_concentration +concpm10 = pm10_concentration +prmm = precipitation From e1f651ae3c2e324af21737989fbd3228576b8643 Mon Sep 17 00:00:00 2001 From: Heiko Klein Date: Tue, 23 Jul 2024 09:42:39 +0000 Subject: [PATCH 5/9] rename to keep same naming-convention as for readungridded --- pyaerocom/io/cams2_83/reader.py | 2 +- pyaerocom/io/mscw_ctm/reader.py | 2 +- pyaerocom/io/readgridded.py | 2 +- pyaerocom/io/{gridded_model_reader.py => readgriddedbase.py} | 0 4 files changed, 3 insertions(+), 3 deletions(-) rename pyaerocom/io/{gridded_model_reader.py => readgriddedbase.py} (100%) diff --git a/pyaerocom/io/cams2_83/reader.py b/pyaerocom/io/cams2_83/reader.py index 6f35782a2..8771ee5d0 100644 --- a/pyaerocom/io/cams2_83/reader.py +++ b/pyaerocom/io/cams2_83/reader.py @@ -14,7 +14,7 @@ from pyaerocom import const from pyaerocom.griddeddata import GriddedData from pyaerocom.io.cams2_83.models import ModelData, ModelName, RunType -from pyaerocom.io.gridded_model_reader import GriddedModelReader +from pyaerocom.io.readgriddedbase import GriddedModelReader # from pyaerocom.units_helpers import UALIASES diff --git a/pyaerocom/io/mscw_ctm/reader.py b/pyaerocom/io/mscw_ctm/reader.py index eb21ce3be..434e56993 100755 --- a/pyaerocom/io/mscw_ctm/reader.py +++ b/pyaerocom/io/mscw_ctm/reader.py @@ -12,7 +12,7 @@ from pyaerocom import const from pyaerocom.exceptions import VarNotAvailableError from pyaerocom.griddeddata import GriddedData -from pyaerocom.io.gridded_model_reader import GriddedModelReader +from pyaerocom.io.readgriddedbase import GriddedModelReader from pyaerocom.units_helpers import UALIASES from .additional_variables import ( diff --git a/pyaerocom/io/readgridded.py b/pyaerocom/io/readgridded.py index f0d4c8ddc..ac595358f 100755 --- a/pyaerocom/io/readgridded.py +++ b/pyaerocom/io/readgridded.py @@ -45,7 +45,7 @@ subtract_cubes, ) from pyaerocom.io.fileconventions import FileConventionRead -from pyaerocom.io.gridded_model_reader import GriddedModelReader +from pyaerocom.io.readgriddedbase import GriddedModelReader from pyaerocom.io.helpers import add_file_to_log from pyaerocom.io.iris_io import concatenate_iris_cubes, load_cubes_custom from pyaerocom.metastandards import AerocomDataID diff --git a/pyaerocom/io/gridded_model_reader.py b/pyaerocom/io/readgriddedbase.py similarity index 100% rename from pyaerocom/io/gridded_model_reader.py rename to pyaerocom/io/readgriddedbase.py From f234f73b27dfc637fa6c0b8f83deaff809b95253 Mon Sep 17 00:00:00 2001 From: Heiko Klein Date: Tue, 23 Jul 2024 09:49:26 +0000 Subject: [PATCH 6/9] rename again, since readungriddedbase is not a baseclass of readungridded, the old convention is to confusing --- pyaerocom/io/cams2_83/reader.py | 2 +- pyaerocom/io/{readgriddedbase.py => gridded_reader.py} | 0 pyaerocom/io/mscw_ctm/reader.py | 2 +- pyaerocom/io/readgridded.py | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename pyaerocom/io/{readgriddedbase.py => gridded_reader.py} (100%) diff --git a/pyaerocom/io/cams2_83/reader.py b/pyaerocom/io/cams2_83/reader.py index 8771ee5d0..dfa80dcf6 100644 --- a/pyaerocom/io/cams2_83/reader.py +++ b/pyaerocom/io/cams2_83/reader.py @@ -14,7 +14,7 @@ from pyaerocom import const from pyaerocom.griddeddata import GriddedData from pyaerocom.io.cams2_83.models import ModelData, ModelName, RunType -from pyaerocom.io.readgriddedbase import GriddedModelReader +from pyaerocom.io.gridded_reader import GriddedModelReader # from pyaerocom.units_helpers import UALIASES diff --git a/pyaerocom/io/readgriddedbase.py b/pyaerocom/io/gridded_reader.py similarity index 100% rename from pyaerocom/io/readgriddedbase.py rename to pyaerocom/io/gridded_reader.py diff --git a/pyaerocom/io/mscw_ctm/reader.py b/pyaerocom/io/mscw_ctm/reader.py index 434e56993..3d2419cec 100755 --- a/pyaerocom/io/mscw_ctm/reader.py +++ b/pyaerocom/io/mscw_ctm/reader.py @@ -12,7 +12,7 @@ from pyaerocom import const from pyaerocom.exceptions import VarNotAvailableError from pyaerocom.griddeddata import GriddedData -from pyaerocom.io.readgriddedbase import GriddedModelReader +from pyaerocom.io.gridded_reader import GriddedModelReader from pyaerocom.units_helpers import UALIASES from .additional_variables import ( diff --git a/pyaerocom/io/readgridded.py b/pyaerocom/io/readgridded.py index ac595358f..e7bc67306 100755 --- a/pyaerocom/io/readgridded.py +++ b/pyaerocom/io/readgridded.py @@ -45,7 +45,7 @@ subtract_cubes, ) from pyaerocom.io.fileconventions import FileConventionRead -from pyaerocom.io.readgriddedbase import GriddedModelReader +from pyaerocom.io.gridded_reader import GriddedModelReader from pyaerocom.io.helpers import add_file_to_log from pyaerocom.io.iris_io import concatenate_iris_cubes, load_cubes_custom from pyaerocom.metastandards import AerocomDataID From b489e8b40e1a1c4ffb4ef07319cfa5f3ff0901aa Mon Sep 17 00:00:00 2001 From: Heiko Klein Date: Tue, 23 Jul 2024 09:50:46 +0000 Subject: [PATCH 7/9] naming baseclass GriddedReader since it is useful for both gridded satellite and model data --- pyaerocom/io/cams2_83/reader.py | 4 ++-- pyaerocom/io/gridded_reader.py | 2 +- pyaerocom/io/mscw_ctm/reader.py | 4 ++-- pyaerocom/io/readgridded.py | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pyaerocom/io/cams2_83/reader.py b/pyaerocom/io/cams2_83/reader.py index dfa80dcf6..d57338263 100644 --- a/pyaerocom/io/cams2_83/reader.py +++ b/pyaerocom/io/cams2_83/reader.py @@ -14,7 +14,7 @@ from pyaerocom import const from pyaerocom.griddeddata import GriddedData from pyaerocom.io.cams2_83.models import ModelData, ModelName, RunType -from pyaerocom.io.gridded_reader import GriddedModelReader +from pyaerocom.io.gridded_reader import GriddedReader # from pyaerocom.units_helpers import UALIASES @@ -221,7 +221,7 @@ def check_files(paths: list[Path]) -> list[Path]: return new_paths -class ReadCAMS2_83(GriddedModelReader): +class ReadCAMS2_83(GriddedReader): FREQ_CODES = dict(hour="hourly", day="daily", month="monthly", fullrun="yearly") REVERSE_FREQ_CODES = {val: key for key, val in FREQ_CODES.items()} diff --git a/pyaerocom/io/gridded_reader.py b/pyaerocom/io/gridded_reader.py index bbb73d07f..38d839d58 100644 --- a/pyaerocom/io/gridded_reader.py +++ b/pyaerocom/io/gridded_reader.py @@ -4,7 +4,7 @@ from pyaerocom.griddeddata import GriddedData -class GriddedModelReader(abc.ABC): +class GriddedReader(abc.ABC): """Abstract base class for griddel model reader used for collocation""" @property diff --git a/pyaerocom/io/mscw_ctm/reader.py b/pyaerocom/io/mscw_ctm/reader.py index 3d2419cec..ebf8e81c9 100755 --- a/pyaerocom/io/mscw_ctm/reader.py +++ b/pyaerocom/io/mscw_ctm/reader.py @@ -12,7 +12,7 @@ from pyaerocom import const from pyaerocom.exceptions import VarNotAvailableError from pyaerocom.griddeddata import GriddedData -from pyaerocom.io.gridded_reader import GriddedModelReader +from pyaerocom.io.gridded_reader import GriddedReader from pyaerocom.units_helpers import UALIASES from .additional_variables import ( @@ -47,7 +47,7 @@ logger = logging.getLogger(__name__) -class ReadMscwCtm(GriddedModelReader): +class ReadMscwCtm(GriddedReader): """ Class for reading model output from the EMEP MSC-W chemical transport model. diff --git a/pyaerocom/io/readgridded.py b/pyaerocom/io/readgridded.py index e7bc67306..84773f9de 100755 --- a/pyaerocom/io/readgridded.py +++ b/pyaerocom/io/readgridded.py @@ -45,7 +45,7 @@ subtract_cubes, ) from pyaerocom.io.fileconventions import FileConventionRead -from pyaerocom.io.gridded_reader import GriddedModelReader +from pyaerocom.io.gridded_reader import GriddedReader from pyaerocom.io.helpers import add_file_to_log from pyaerocom.io.iris_io import concatenate_iris_cubes, load_cubes_custom from pyaerocom.metastandards import AerocomDataID @@ -55,7 +55,7 @@ logger = logging.getLogger(__name__) -class ReadGridded(GriddedModelReader): +class ReadGridded(GriddedReader): """Class for reading gridded files using AeroCom file conventions Attributes From 54bde5e02ceac70e3df08ade425e2eec54740e4f Mon Sep 17 00:00:00 2001 From: Heiko Klein Date: Tue, 23 Jul 2024 09:59:58 +0000 Subject: [PATCH 8/9] adding typing hints --- pyaerocom/io/mscw_ctm/reader.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pyaerocom/io/mscw_ctm/reader.py b/pyaerocom/io/mscw_ctm/reader.py index ebf8e81c9..8f80a3291 100755 --- a/pyaerocom/io/mscw_ctm/reader.py +++ b/pyaerocom/io/mscw_ctm/reader.py @@ -177,11 +177,11 @@ class ReadMscwCtm(GriddedReader): YEAR_PATTERN = r".*((?:19|20)\d\d).*" class _PrivateFields: - filename = None - filedata = None - filepaths = None - files = None - data_dir = None + filename: str | None = None + filedata: xr.Dataset | None = None + filepaths: list[str] | None = None + files: list[str] | None = None + data_dir: str | None = None def __init__(self, data_id=None, data_dir=None, **kwargs): # opened dataset (for performance boost), will be reset if data_dir is @@ -335,11 +335,11 @@ def _clean_filepaths(self, filepaths, yrs, ts_type): return [d for _, d in sorted(zip(found_yrs, clean_paths))] @property - def data_id(self): + def data_id(self) -> str | None: return self._data_id @property - def _data_dir(self): + def _data_dir(self) -> str | None: """ Directory containing netcdf files """ @@ -348,7 +348,7 @@ def _data_dir(self): return self._private.data_dir @_data_dir.setter - def _data_dir(self, val): + def _data_dir(self, val: str): if val is None: raise ValueError(f"Data dir {val} needs to be a dictionary or a file") if not os.path.isdir(val): @@ -359,7 +359,7 @@ def _data_dir(self, val): self._private.files = self._filepaths @property - def _filename(self): + def _filename(self) -> str | None: """ Name of latest netcdf file read """ @@ -378,22 +378,22 @@ def _filename(self, val): self._private.filedata = None @property - def _filepaths(self): + def _filepaths(self) -> list[str]: """ - Path to data file + Paths to data file """ if self._data_dir is None and self._filepaths is None: # pragma: no cover raise AttributeError("data_dir or filepaths needs to be set before accessing") return self._private.filepaths @_filepaths.setter - def _filepaths(self, value): + def _filepaths(self, value: list[str]): if not isinstance(value, list): # pragma: no cover raise ValueError("needs to be list of strings") self._private.filepaths = value @property - def _filedata(self): + def _filedata(self) -> xr.Dataset: """ Loaded netcdf file (:class:`xarray.Dataset`) """ From 01a158ac4c51b1ed68d273bf0f1327ba2dc40eb8 Mon Sep 17 00:00:00 2001 From: Heiko Klein Date: Wed, 24 Jul 2024 08:11:57 +0000 Subject: [PATCH 9/9] merge fixes --- pyaerocom/io/mscw_ctm/reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyaerocom/io/mscw_ctm/reader.py b/pyaerocom/io/mscw_ctm/reader.py index f5fa5ed49..d4da15b04 100755 --- a/pyaerocom/io/mscw_ctm/reader.py +++ b/pyaerocom/io/mscw_ctm/reader.py @@ -12,8 +12,8 @@ from pyaerocom import const from pyaerocom.exceptions import VarNotAvailableError from pyaerocom.griddeddata import GriddedData -from pyaerocom.projection_information import ProjectionInformation from pyaerocom.io.gridded_reader import GriddedReader +from pyaerocom.projection_information import ProjectionInformation from pyaerocom.units_helpers import UALIASES from .additional_variables import ( @@ -763,7 +763,7 @@ def _read_var_from_file(self, var_name_aerocom, ts_type): data.time.attrs["long_name"] = "time" data.time.attrs["standard_name"] = "time" prefix = emep_var.split("_")[0] - data.attrs["units"] = self.preprocess_units(data.units, prefix) + data.attrs["units"] = self._preprocess_units(data.units, prefix) return data, proj_info @staticmethod