Skip to content

Commit

Permalink
Merge pull request #66 from esm-tools/fix/ci-segfault
Browse files Browse the repository at this point in the history
Segfault in CI
  • Loading branch information
pgierz authored Nov 21, 2024
2 parents ebf53e7 + 32558dc commit 59f8681
Show file tree
Hide file tree
Showing 14 changed files with 458 additions and 31 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/CI-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Set up NetCDF4 with HDF5 support
run: |
sudo apt-get update
sudo apt-get install -y libnetcdf-dev libhdf5-dev
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand All @@ -37,12 +41,17 @@ jobs:
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test if data will work (Meta-Test)
run: |
export HDF5_DEBUG=1
export NETCDF_DEBUG=1
export XARRAY_BACKEND=h5netcdf
pytest -vvv -s --cov tests/meta/*.py
- name: Test with pytest (Unit)
run: |
export XARRAY_BACKEND=h5netcdf
pytest -vvv -s --cov tests/unit/*.py
- name: Test with pytest (Integration)
run: |
export XARRAY_BACKEND=h5netcdf
pytest -vvv -s --cov tests/integration/*.py
- name: Test with doctest
run: |
Expand Down
2 changes: 2 additions & 0 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
"sphinx_jinja",
"sphinx.ext.intersphinx",
"cerberus_sphinx_ext",
"everett.sphinxext",
"sphinx_click",
]

Expand All @@ -85,6 +86,7 @@
"xarray": ("http://xarray.pydata.org/en/stable/", None),
"chemicals": ("https://chemicals.readthedocs.io/", None),
"cerberus": ("https://docs.python-cerberus.org/", None),
"everett": ("https://everett.readthedocs.io/", None),
}


Expand Down
1 change: 1 addition & 0 deletions doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Contents
standard_library
including_custom_steps
including_subcommand_plugins
pymorize_configuration
developer_guide
API

Expand Down
4 changes: 4 additions & 0 deletions doc/pymorize_configuration.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
==========================
``pymorize`` Configuration
==========================
.. automodule:: pymorize.config
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@ def read(filename):
"deprecation",
"distributed",
"dpath",
"everett[yaml]",
"flexparser < 0.4", # NOTE(PG): See https://tinyurl.com/ypf99xnh
"flox",
"h5netcdf",
"imohash",
"netcdf4",
"netcdf4", # NOTE(PG): Shouldn't be a prereq for xarray?
"numbagg",
"pendulum",
"pint-xarray",
Expand Down
72 changes: 60 additions & 12 deletions src/pymorize/cmorizer.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from pathlib import Path

import dask
import dask # noqa: F401
import pandas as pd
import questionary
import yaml
from dask.distributed import Client
from dask_jobqueue import SLURMCluster
from everett.manager import generate_uppercase_key, get_runtime_config
from prefect import flow, task
from prefect.futures import wait
from prefect.logging import get_run_logger
from prefect_dask import DaskTaskRunner
from rich.progress import track

from .config import PymorizeConfig, PymorizeConfigManager, parse_bool
from .data_request import (DataRequest, DataRequestTable, DataRequestVariable,
IgnoreTableFiles)
from .filecache import fc
Expand All @@ -34,16 +34,52 @@ def __init__(
inherit_cfg=None,
**kwargs,
):
################################################################################
self._general_cfg = general_cfg or {}
self._pymorize_cfg = pymorize_cfg or {}
self._pymorize_cfg = PymorizeConfigManager.from_pymorize_cfg(pymorize_cfg or {})
self._dask_cfg = dask_cfg or {}
self._inherit_cfg = inherit_cfg or {}
self.rules = rules_cfg or []
self.pipelines = pipelines_cfg or []

self._cluster = None # Dask Cluster, might be set up later
if self._pymorize_cfg.get("parallel", True):
if pymorize_cfg.get("parallel_backend") == "dask":
self._cluster = None # ask Cluster, might be set up later
################################################################################

################################################################################
# Print Out Configuration:
logger.debug(80 * "#")
logger.debug("---------------------")
logger.debug("General Configuration")
logger.debug("---------------------")
logger.debug(yaml.dump(self._general_cfg))
logger.debug("-----------------------")
logger.debug("Pymorize Configuration:")
logger.debug("-----------------------")
# This isn't actually the config, it's the "App" object. Everett is weird about this...
pymorize_config = PymorizeConfig()
# NOTE(PG): This variable is for demonstration purposes:
_pymorize_config_dict = {}
for namespace, key, value, option in get_runtime_config(
self._pymorize_cfg, pymorize_config
):
full_key = generate_uppercase_key(key, namespace)
_pymorize_config_dict[full_key] = value
logger.info(yaml.dump(_pymorize_config_dict))
# Avoid confusion:
del pymorize_config
logger.info(80 * "#")
################################################################################

################################################################################
# NOTE(PG): Curious about the configuration? Add a breakpoint here and print
# out the variable _pymorize_config_dict to see EVERYTHING that is
# available to you in the configuration.
# breakpoint()
################################################################################

################################################################################
# Post_Init:
if self._pymorize_cfg("parallel"):
if self._pymorize_cfg("parallel_backend") == "dask":
self._post_init_configure_dask()
self._post_init_create_dask_cluster()
self._post_init_create_pipelines()
Expand All @@ -52,6 +88,7 @@ def __init__(
self._post_init_create_data_request()
self._post_init_populate_rules_with_tables()
self._post_init_data_request_variables()
################################################################################

def _post_init_configure_dask(self):
"""
Expand All @@ -61,8 +98,9 @@ def _post_init_configure_dask(self):
--------
https://docs.dask.org/en/stable/configuration.html?highlight=config#directly-within-python
"""
import dask.distributed # Needed to pre-populate config, noqa: F401
import dask_jobqueue # Needed to pre-populate config, noqa: F401
# Needed to pre-populate config
import dask.distributed # noqa: F401
import dask_jobqueue # noqa: F401

logger.info("Updating Dask configuration. Changed values will be:")
logger.info(yaml.dump(self._dask_cfg))
Expand Down Expand Up @@ -173,7 +211,11 @@ def find_matching_rule(
else:
logger.critical(msg)
logger.critical(
"This should lead to a program crash! Exception due to >> pymorize_cfg['raise_on_multiple_rules'] = False <<"
"""
This should lead to a program crash! Exception due to:
>> pymorize_cfg['raise_on_multiple_rules'] = False <<
"""
)
logger.warning("Returning the first match.")
return matches[0]
Expand Down Expand Up @@ -213,6 +255,11 @@ def _post_init_create_pipelines(self):
def _post_init_create_rules(self):
self.rules = [Rule.from_dict(p) for p in self.rules if not isinstance(p, Rule)]
self._post_init_inherit_rules()
self._post_init_attach_pymorize_config_rules()

def _post_init_attach_pymorize_config_rules(self):
for rule in self.rules:
rule._pymorize_cfg = self._pymorize_cfg

def _post_init_inherit_rules(self):
for rule_attr, rule_value in self._inherit_cfg.items():
Expand Down Expand Up @@ -250,7 +297,7 @@ def _check_is_subperiod(self):
if not is_subperiod:
errors.append(
ValueError(
f"Frequency in source file {data_freq} is not a subperiod of frequency in table {table_freq}."
f"Freq in source file {data_freq} is not a subperiod of freq in table {table_freq}."
),
)
logger.info(
Expand All @@ -276,6 +323,7 @@ def from_dict(cls, data):
for rule in data.get("rules", []):
rule_obj = Rule.from_dict(rule)
instance.add_rule(rule_obj)
instance._post_init_attach_pymorize_config_rules()
instance._post_init_inherit_rules()
if "pipelines" in data:
if not PIPELINES_VALIDATOR.validate({"pipelines": data["pipelines"]}):
Expand Down
Loading

0 comments on commit 59f8681

Please sign in to comment.