Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Doctest Optimizations #53

Merged
merged 16 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/CI-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
with:
submodules: recursive
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand All @@ -38,4 +38,6 @@ jobs:
- name: Test with pytest
run: |
pytest -v --cov
pytest -v --doctest-modules src/
- name: Test with doctest
run: |
PYTHONPATH=src pytest -v --doctest-modules src/
43 changes: 43 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os
import re
from pathlib import Path

import pytest

from tests.utils.constants import TEST_ROOT

pytest_plugins = [
"tests.fixtures.configs",
"tests.fixtures.environment",
"tests.fixtures.fake_filesystem",
"tests.fixtures.sample_rules",
"tests.fixtures.config_files",
"tests.fixtures.CV_Dir",
"tests.fixtures.CMIP_Tables_Dir",
]


@pytest.hookimpl(tryfirst=True)
def pytest_collection_modifyitems(config, items):
for item in items:
if item.fspath and item.fspath.ext == ".py":
item.add_marker(pytest.mark.doctest)


@pytest.fixture(autouse=True)
def pathlib_doctest_directive(doctest_namespace):
"""Replace PosixPath/WindowsPath with Path in doc-test output."""
doctest_namespace["Path"] = Path

def path_replace(output):
"""Replace platform-specific Path output with generic Path in doc-tests."""
return re.sub(r"(PosixPath|WindowsPath)\((.*?)\)", r"Path(\2)", output)

doctest_namespace["path_replace"] = path_replace


def pytest_unconfigure(config):
"""Remove all JSON files containing 'pipeline' in their name."""
for file in os.listdir():
if "pipeline" in file and file.endswith(".json"):
os.remove(file)
2 changes: 2 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pytest]
doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL ELLIPSIS
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def read(filename):
"deprecation",
"distributed",
"dpath",
"flexparser < 0.4", # NOTE(PG): See https://tinyurl.com/ypf99xnh
"flox",
"imohash",
"numbagg",
Expand Down
19 changes: 2 additions & 17 deletions src/pymorize/calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,6 @@
>>> year_bounds = year_bounds_major_digits(2000, 2010, 2, 2)
>>> print(year_bounds)
[[2000, 2001], [2002, 2003], [2004, 2005], [2006, 2007], [2008, 2009], [2010, 2010]]

>>> date_range = date_ranges_from_bounds(year_bounds, freq="Y")
>>> print(date_range)
([Timestamp('2000-12-31 00:00:00', freq='A-DEC'), Timestamp('2001-12-31 00:00:00', freq='A-DEC')], [Timestamp('2002-12-31 00:00:00', freq='A-DEC'), Timestamp('2003-12-31 00:00:00', freq='A-DEC')], [Timestamp('2004-12-31 00:00:00', freq='A-DEC'), Timestamp('2005-12-31 00:00:00', freq='A-DEC')], [Timestamp('2006-12-31 00:00:00', freq='A-DEC'), Timestamp('2007-12-31 00:00:00', freq='A-DEC')], [Timestamp('2008-12-31 00:00:00', freq='A-DEC'), Timestamp('2009-12-31 00:00:00', freq='A-DEC')], [Timestamp('2010-12-31 00:00:00', freq='A-DEC')])

>>> date_range = date_ranges_from_year_bounds(year_bounds, freq="Y")
>>> print(date_range)
([Timestamp('2000-12-31 00:00:00', freq='A-DEC'), Timestamp('2001-12-31 00:00:00', freq='A-DEC')], [Timestamp('2002-12-31 00:00:00', freq='A-DEC'), Timestamp('2003-12-31 00:00:00', freq='A-DEC')], [Timestamp('2004-12-31 00:00:00', freq='A-DEC'), Timestamp('2005-12-31 00:00:00', freq='A-DEC')], [Timestamp('2006-12-31 00:00:00', freq='A-DEC'), Timestamp('2007-12-31 00:00:00', freq='A-DEC')], [Timestamp('2008-12-31 00:00:00', freq='A-DEC'), Timestamp('2009-12-31 00:00:00', freq='A-DEC')], [Timestamp('2010-12-31 00:00:00', freq='A-DEC')])
"""

import pendulum
Expand Down Expand Up @@ -157,16 +149,9 @@ def date_ranges_from_bounds(bounds, freq: str = "M", **kwargs):

Examples
--------
>>> bounds = [("2020-01-01", "2020-01-31"), ("2020-02-01", "2020-02-29")]
>>> date_ranges = date_ranges_from_bounds(bounds)
>>> print(date_ranges)
(DatetimeIndex(['2020-01-01', '2020-01-02', ..., '2020-01-31'], dtype='datetime64[ns]', freq='D'),
DatetimeIndex(['2020-02-01', '2020-02-02', ..., '2020-02-29'], dtype='datetime64[ns]', freq='D'))

>>> bounds = [("2020-01-01", "2020-12-31")]
>>> date_ranges = date_ranges_from_bounds(bounds, freq="M")
>>> print(date_ranges)
(DatetimeIndex(['2020-01-31', '2020-02-29', ..., '2020-12-31'], dtype='datetime64[ns]', freq='M'),)
>>> date_ranges_from_bounds(bounds, freq="M")
DatetimeIndex(['2020-01-31', '2020-02-29', ..., '2020-12-31'], dtype='datetime64[ns]', freq='ME')
"""
objs = []
for start, end in bounds:
Expand Down
37 changes: 30 additions & 7 deletions src/pymorize/filecache.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,31 @@

.. code-block:: python

>>> filepath = "/pool/data/CO2f_fesom_1850-01-01_1900-01-01.nc"
>>> filepath = "tests/data/test_experiments/my_expid/outdata/fesom/volo.nc"
>>> cache.add_file(filepath)
>>> # adding multiple files at once
>>> cache.add_files(["/path/to/file1.nc", "/path/to/file2.nc"])
>>> cache.add_files(["tests/data/dummy_data/random1.nc", "tests/data/dummy_data/random2.nc"])

You can access the metadata of a file in the cache using the `get` method:

.. code-block:: python

>>> filepath = "tests/data/test_experiments/my_expid/outdata/fesom/volo.nc"
>>> # alternative way of adding file to cache and getting the metadata is by usuig the `get` method
>>> cache.get("filepath")
>>> cache.get(filepath)
filepath tests/data/test_experiments/my_expid/outdata/f...
filename volo.nc
checksum imohash:c8047bbd7e292dbe54a6387611f500c4
filesize 584
mtime ...
start 1951-01-02 00:00:00
end 1951-01-13 00:00:00
timespan 11 days, 0:00:00
freq D
steps 12
variable volo
units m3
Name: 0, dtype: object


For an overview of the cached data, use `summary` method: This method returns a
Expand All @@ -55,6 +74,14 @@
.. code-block:: python

>>> cache.summary()
variable seq volo
freq D D
start 0001-01-01 00:00:00 1951-01-02 00:00:00
end 0001-01-11 00:00:00 1951-01-13 00:00:00
timespan 10 days 00:00:00 11 days 00:00:00
nfiles 2 1
steps 11 12
size 2120 584

To use a subset of the collection for a given variable, use `select_range`
method. This will limit the files in the cache to those that are within the
Expand Down Expand Up @@ -537,7 +564,3 @@ def register_cache(ds):
filename = ds.encoding["source"]
fc.add_file(filename)
return ds


datapath = "/work/ba1103/a270073/out/awicm-1.0-recom/awi-esm-1-1-lr_kh800/piControl/outdata/fesom"
# filepat = "CO2f_fesom_*nc"
4 changes: 2 additions & 2 deletions src/pymorize/frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

>>> freq = Frequency("day", 1.0)
>>> print(freq.name)
'day'
day
>>> print(freq.approx_interval)
1.0
>>> print(freq.time_method)
Expand All @@ -28,7 +28,7 @@

>>> freq = Frequency.for_name("day")
>>> print(freq.name)
'day'
day
"""

from enum import Enum
Expand Down
66 changes: 7 additions & 59 deletions src/pymorize/gather_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,13 @@ def _input_pattern_from_env(config: dict) -> re.Pattern:
re.compile('.*')
>>> bool(pattern.match('test'))
True
>>> pattern = _input_pattern_from_env(config_only_env_name)
>>> os.environ["CMOR_PATTERN"] = "test*nc"
>>> pattern = _input_pattern_from_env(config_only_env_name)
>>> pattern
re.compile('test*nc')
>>> bool(pattern.match('test'))
True
False
>>> del os.environ["CMOR_PATTERN"]
>>> pattern = _input_pattern_from_env(config_only_env_value)
>>> pattern
re.compile('.*')
Expand Down Expand Up @@ -154,8 +155,9 @@ def _resolve_symlinks(files: List[pathlib.Path]) -> List[pathlib.Path]:
--------
>>> from pathlib import Path
>>> files = [Path('/path/to/file1'), Path('/path/to/file2')]
>>> _resolve_symlinks(files)
[Path('/path/to/file1'), Path('/path/to/file2')]
>>> paths = _resolve_symlinks(files)
>>> [str(p) for p in paths] # Convert to strings for doctest
['/path/to/file1', '/path/to/file2']
"""
if not all(isinstance(f, pathlib.Path) for f in files):
logger.error("All files must be pathlib.Path objects. Got the following:")
Expand Down Expand Up @@ -296,62 +298,8 @@ def gather_inputs(config: dict) -> dict:
config:
The configuration dictionary with the input files added.


Examples
--------
Assuming a filesystem with::

/path/to/input/files/test2000.nc
/path/to/input/files/test2001.nc
/path/to/input/files/test2002.nc
/path/to/input/files/test2003.nc
/path/to/input/files/test2004.nc
/path/to/input/files/test2005.nc
/path/to/input/files/test2006.nc
/path/to/input/files/test2007.nc
/path/to/input/files/test2008.nc
/path/to/input/files/test2009.nc
/path/to/input/files/test2010.nc

>>> config = {
... "rules": [
... {
... "input_patterns": [
... "/path/to/input/files/test*nc"
... ],
... "year_start": 2000,
... "year_end": 2010
... }
... ]
... }
>>> gather_inputs(config)
{
"rules": [
{
"input_patterns": [
"/path/to/input/files/test*nc"
],
"year_start": 2000,
"year_end": 2010,
"input_files": {
"/path/to/input/files/test*nc": [
"/path/to/input/files/test2000.nc",
"/path/to/input/files/test2001.nc",
"/path/to/input/files/test2002.nc",
"/path/to/input/files/test2003.nc",
"/path/to/input/files/test2004.nc",
"/path/to/input/files/test2005.nc",
"/path/to/input/files/test2006.nc",
"/path/to/input/files/test2007.nc",
"/path/to/input/files/test2008.nc",
"/path/to/input/files/test2009.nc",
"/path/to/input/files/test2010.nc"
],
}
}
]
}
"""
# NOTE(PG): Example removed from docstring as it is scheduled for deprecation.
rules = config.get("rules", [])
for rule in rules:
input_patterns = rule.get("input_patterns", [])
Expand Down
21 changes: 0 additions & 21 deletions tests/conftest.py

This file was deleted.

Binary file added tests/data/dummy_data/random1.nc
Binary file not shown.
Binary file added tests/data/dummy_data/random2.nc
Binary file not shown.