From 9bef6c9c90d9f875b67966514b3983ae2cc0cb37 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 10:37:49 -0600 Subject: [PATCH 01/11] Add `BaseDatasetIterable` class --- rex/resource.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/rex/resource.py b/rex/resource.py index c078ba0d..11d7fc1b 100644 --- a/rex/resource.py +++ b/rex/resource.py @@ -3,7 +3,7 @@ Classes to handle resource data """ import os -from abc import ABC +from abc import ABC, abstractmethod from warnings import warn import dateutil @@ -17,6 +17,18 @@ from rex.utilities.utilities import check_tz, get_lat_lon_cols +class BaseDatasetIterable(ABC): + """Base class for file that is iterable over datasets. """ + + @property + @abstractmethod + def datasets(self): + """iterable: Datasets available in file. """ + + def __iter__(self): + return iter(self.datasets) + + class ResourceDataset: """ h5py.Dataset wrapper for Resource .h5 files From c650f38fc308454864785d972eba909cbdc906c8 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 10:38:26 -0600 Subject: [PATCH 02/11] Resource uses base iterable + tests --- rex/resource.py | 16 +--------------- tests/test_resource.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/rex/resource.py b/rex/resource.py index 11d7fc1b..64b7ccf5 100644 --- a/rex/resource.py +++ b/rex/resource.py @@ -595,7 +595,7 @@ def extract(cls, ds, ds_slice, scale_attr='scale_factor', return dset[ds_slice] -class BaseResource(ABC): +class BaseResource(BaseDatasetIterable): """ Abstract Base class to handle resource .h5 files """ @@ -658,7 +658,6 @@ def __init__(self, h5_file, mode='r', unscale=True, str_decode=True, self._shapes = None self._chunks = None self._dtypes = None - self._i = 0 def __repr__(self): msg = "{} for {}".format(self.__class__.__name__, self.h5_file) @@ -703,19 +702,6 @@ def __getitem__(self, keys): return out - def __iter__(self): - return self - - def __next__(self): - if self._i >= len(self.datasets): - self._i = 0 - raise StopIteration - - dset = self.datasets[self._i] - self._i += 1 - - return dset - def __contains__(self, dset): return dset in self.datasets diff --git a/tests/test_resource.py b/tests/test_resource.py index 3c3c86c9..30620bbe 100644 --- a/tests/test_resource.py +++ b/tests/test_resource.py @@ -949,6 +949,19 @@ def test_1D_dataset_slicing_spatial_repeat(): assert res['dset3', 55, 79].dtype == np.float32 +@pytest.mark.timeout(10) +def test_resource_iterator(): + """ + test MultiH5 iterator. Incorrect implementation can cause an infinite loop + """ + h5_file = os.path.join(TESTDATADIR, 'nsrdb', 'nsrdb_irradiance_2018.h5') + with Resource(h5_file) as res: + dsets_permutation = {(a, b) for a in res for b in res} + num_dsets = len(res.datasets) + + assert len(dsets_permutation) == num_dsets ** 2 + + def execute_pytest(capture='all', flags='-rapP'): """Execute module as pytest with detailed summary report. From de809caa4f8da4bbc57dc43a5e32640d2ae2293c Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 10:38:54 -0600 Subject: [PATCH 03/11] `MultiH5` uses `BaseDatasetIterable` + tests --- rex/multi_file_resource.py | 19 ++----------------- tests/test_resource.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/rex/multi_file_resource.py b/rex/multi_file_resource.py index 239144c9..91635b99 100644 --- a/rex/multi_file_resource.py +++ b/rex/multi_file_resource.py @@ -10,12 +10,12 @@ from rex.renewable_resource import (NSRDB, SolarResource, GeothermalResource, WindResource, WaveResource, AbstractInterpolatedResource) -from rex.resource import Resource +from rex.resource import Resource, BaseDatasetIterable from rex.utilities.exceptions import FileInputError, ResourceRuntimeError from rex.utilities.utilities import unstupify_path -class MultiH5: +class MultiH5(BaseDatasetIterable): """ Class to handle multiple h5 file Resources """ @@ -32,8 +32,6 @@ def __init__(self, h5_files, check_files=False): self._dset_map = self._map_file_dsets(h5_files) self._h5_map = self._map_file_instances(set(self._dset_map.values())) - self._i = 0 - if check_files: self._preflight_check() @@ -66,19 +64,6 @@ def __getitem__(self, dset): return ds - def __next__(self): - if self._i >= len(self.datasets): - self._i = 0 - raise StopIteration - - dset = self.datasets[self._i] - self._i += 1 - - return dset - - def __iter__(self): - return self - def __contains__(self, dset): return dset in self.datasets diff --git a/tests/test_resource.py b/tests/test_resource.py index 30620bbe..b4983b0d 100644 --- a/tests/test_resource.py +++ b/tests/test_resource.py @@ -962,6 +962,19 @@ def test_resource_iterator(): assert len(dsets_permutation) == num_dsets ** 2 +@pytest.mark.timeout(10) +def test_mh5_iterator(): + """ + test MultiH5 iterator. Incorrect implementation can cause an infinite loop + """ + h5_files = [os.path.join(TESTDATADIR, 'nsrdb', 'nsrdb_irradiance_2018.h5'), + os.path.join(TESTDATADIR, 'wtk', 'wtk_2010_100m.h5')] + + mh5 = MultiH5(h5_files) + dsets_permutation = {(a, b) for a in mh5 for b in mh5} + assert len(dsets_permutation) == len(mh5.datasets) ** 2 + + def execute_pytest(capture='all', flags='-rapP'): """Execute module as pytest with detailed summary report. From 2192365b072782dff9570affc0cb58978dabb3b2 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 10:39:11 -0600 Subject: [PATCH 04/11] Fix docstring --- tests/test_resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_resource.py b/tests/test_resource.py index b4983b0d..37fe759a 100644 --- a/tests/test_resource.py +++ b/tests/test_resource.py @@ -952,7 +952,7 @@ def test_1D_dataset_slicing_spatial_repeat(): @pytest.mark.timeout(10) def test_resource_iterator(): """ - test MultiH5 iterator. Incorrect implementation can cause an infinite loop + test Resource iterator. Incorrect implementation can cause an infinite loop """ h5_file = os.path.join(TESTDATADIR, 'nsrdb', 'nsrdb_irradiance_2018.h5') with Resource(h5_file) as res: From 3b9f23e60eed2db752f8a7c708bc6b15ad33f36f Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 10:39:49 -0600 Subject: [PATCH 05/11] `ResourceX` uses `BaseDatasetIterable` + tests --- rex/resource_extraction/resource_extraction.py | 18 ++---------------- tests/test_resource_extraction.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/rex/resource_extraction/resource_extraction.py b/rex/resource_extraction/resource_extraction.py index 223bdeea..080c0c22 100644 --- a/rex/resource_extraction/resource_extraction.py +++ b/rex/resource_extraction/resource_extraction.py @@ -27,7 +27,7 @@ WaveResource, WindResource, ) -from rex.resource import Resource, ResourceDataset +from rex.resource import Resource, ResourceDataset, BaseDatasetIterable from rex.temporal_stats.temporal_stats import TemporalStats from rex.utilities.exceptions import ResourceValueError, ResourceWarning from rex.utilities.execution import SpawnProcessPool @@ -39,7 +39,7 @@ logger = logging.getLogger(__name__) -class ResourceX: +class ResourceX(BaseDatasetIterable): """ Resource data extraction tool """ @@ -88,7 +88,6 @@ def __init__(self, res_h5, res_cls=None, tree=None, unscale=True, group=group, hsds=hsds, hsds_kwargs=hsds_kwargs) self._dist_thresh = None self._tree = tree - self._i = 0 def __repr__(self): msg = "{} extractor for {}".format(self._res.__class__.__name__, @@ -114,19 +113,6 @@ def __getitem__(self, keys): def __contains__(self, dset): return dset in self.datasets - def __iter__(self): - return self - - def __next__(self): - if self._i >= len(self.datasets): - self._i = 0 - raise StopIteration - - dset = self.datasets[self._i] - self._i += 1 - - return dset - @property def resource(self): """ diff --git a/tests/test_resource_extraction.py b/tests/test_resource_extraction.py index 20393a1f..94a17111 100644 --- a/tests/test_resource_extraction.py +++ b/tests/test_resource_extraction.py @@ -1130,6 +1130,19 @@ def test_get_bad_raster_index(): ext.get_raster_index(target, shape, meta=meta) +@pytest.mark.timeout(10) +def test_resourcex_iterable(NSRDBX_cls): + """ + test ResourceX iterator. Incorrect implementation can cause an + infinite loop + """ + with NSRDBX_cls as res: + dsets_permutation = {(a, b) for a in res for b in res} + num_dsets = len(res.datasets) + + assert len(dsets_permutation) == num_dsets ** 2 + + def execute_pytest(capture='all', flags='-rapP'): """Execute module as pytest with detailed summary report. From 6d4adcf43f9706fa0ec47f6fcb53811e08f8bf49 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 10:50:26 -0600 Subject: [PATCH 06/11] Multi res now creates iter + tests --- rex/multi_res_resource.py | 12 +----------- tests/test_multi_res_resource.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/rex/multi_res_resource.py b/rex/multi_res_resource.py index 75272b4d..1ae342f1 100644 --- a/rex/multi_res_resource.py +++ b/rex/multi_res_resource.py @@ -237,17 +237,7 @@ def __getitem__(self, keys): return out def __iter__(self): - return self - - def __next__(self): - if self._i >= len(self.datasets): - self._i = 0 - raise StopIteration - - dset = self.datasets[self._i] - self._i += 1 - - return dset + return iter(self.datasets) def __contains__(self, dset): return dset in self.datasets diff --git a/tests/test_multi_res_resource.py b/tests/test_multi_res_resource.py index 0ca3438a..c02d74a3 100644 --- a/tests/test_multi_res_resource.py +++ b/tests/test_multi_res_resource.py @@ -170,3 +170,18 @@ def test_preload_sam(): assert np.allclose(true, test) mrr.close() + + +@pytest.mark.timeout(10) +def test_multi_res_resource_iterator(): + """ + test MultiResolutionResource iterator. Incorrect implementation can + cause an infinite loop + """ + with tempfile.TemporaryDirectory() as td: + fp_hr, fp_lr = make_multi_res_files(td) + mrr = MultiResolutionResource(fp_hr, fp_lr, handler_class=WindResource) + dsets_permutation = {(a, b) for a in mrr for b in mrr} + num_dsets = len(mrr.datasets) + + assert len(dsets_permutation) == num_dsets ** 2 From bc8f5f31fef91b0259d32f18beca60f83e33ad09 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 11:00:45 -0600 Subject: [PATCH 07/11] `MultiTimeResource` now uses `BaseDatasetIterable` + tests --- rex/multi_time_resource.py | 18 ++---------------- tests/test_multi_time_resource.py | 19 +++++++++++++++++-- tests/test_multi_year_resource.py | 15 +++++++++++++++ 3 files changed, 34 insertions(+), 18 deletions(-) diff --git a/rex/multi_time_resource.py b/rex/multi_time_resource.py index 8b679070..84d599a2 100644 --- a/rex/multi_time_resource.py +++ b/rex/multi_time_resource.py @@ -15,7 +15,7 @@ WaveResource, WindResource, ) -from rex.resource import Resource +from rex.resource import Resource, BaseDatasetIterable from rex.utilities.exceptions import FileInputError from rex.utilities.parse_keys import parse_keys, parse_slice @@ -419,7 +419,7 @@ def close(self): f.close() -class MultiTimeResource: +class MultiTimeResource(BaseDatasetIterable): """ Class to handle resource data stored temporally accross multiple .h5 files @@ -519,7 +519,6 @@ def __init__(self, h5_path, unscale=True, str_decode=True, self._h5 = MultiTimeH5(self.h5_path, res_cls=res_cls, **cls_kwargs) self.h5_files = self._h5.h5_files self.h5_file = self.h5_files[0] - self._i = 0 def __repr__(self): msg = "{} for {}".format(self.__class__.__name__, self.h5_path) @@ -537,19 +536,6 @@ def __exit__(self, type, value, traceback): def __len__(self): return len(self.h5.time_index) - def __iter__(self): - return self - - def __next__(self): - if self._i >= len(self.datasets): - self._i = 0 - raise StopIteration - - dset = self.datasets[self._i] - self._i += 1 - - return dset - def __getitem__(self, keys): ds, ds_slice = parse_keys(keys) diff --git a/tests/test_multi_time_resource.py b/tests/test_multi_time_resource.py index acca2233..12944bf1 100644 --- a/tests/test_multi_time_resource.py +++ b/tests/test_multi_time_resource.py @@ -9,8 +9,8 @@ import pytest from rex import TESTDATADIR -from rex.multi_time_resource import (MultiTimeH5, MultiTimeNSRDB, - MultiTimeWindResource) +from rex.multi_time_resource import (MultiTimeH5, MultiTimeResource, + MultiTimeNSRDB, MultiTimeWindResource) from rex.resource import Resource @@ -323,6 +323,21 @@ def test_map_hsds_files(): assert not any(wrong), 'Wrong files: {}'.format(wrong) +@pytest.mark.timeout(10) +def test_mt_iterator(): + """ + test MultiTimeResource iterator. Incorrect implementation can + cause an infinite loop + """ + path = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_*.h5') + + with MultiTimeResource(path) as res: + dsets_permutation = {(a, b) for a in res for b in res} + num_dsets = len(res.datasets) + + assert len(dsets_permutation) == num_dsets ** 2 + + def execute_pytest(capture='all', flags='-rapP'): """Execute module as pytest with detailed summary report. diff --git a/tests/test_multi_year_resource.py b/tests/test_multi_year_resource.py index 60df156a..029fd989 100644 --- a/tests/test_multi_year_resource.py +++ b/tests/test_multi_year_resource.py @@ -358,6 +358,21 @@ def test_multi_file_year(): assert test_meta.equals(f.meta) +@pytest.mark.timeout(10) +def test_my_resource_iterator(): + """ + test MultiYearWindResource iterator. Incorrect implementation can + cause an infinite loop + """ + path = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_*.h5') + + with MultiYearWindResource(path) as res: + dsets_permutation = {(a, b) for a in res for b in res} + num_dsets = len(res.datasets) + + assert len(dsets_permutation) == num_dsets ** 2 + + def execute_pytest(capture='all', flags='-rapP'): """Execute module as pytest with detailed summary report. From 92e4632f119deb6f8a4abe72cf1275506f6ebaa8 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 11:05:50 -0600 Subject: [PATCH 08/11] Fix iterator in `MultiYearH5` + test --- rex/multi_year_resource.py | 14 +------------- tests/test_multi_year_resource.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/rex/multi_year_resource.py b/rex/multi_year_resource.py index 21ba59d1..b6b04f1f 100644 --- a/rex/multi_year_resource.py +++ b/rex/multi_year_resource.py @@ -58,7 +58,6 @@ def __init__(self, h5_path, years=None, res_cls=Resource, hsds=False, self._datasets = None self._shape = None self._time_index = None - self._i = 0 def __repr__(self): msg = ("{} for {}:\n Contains data for {} years" @@ -82,17 +81,7 @@ def __getitem__(self, year): return h5 def __iter__(self): - return self - - def __next__(self): - if self._i >= len(self.years): - self._i = 0 - raise StopIteration - - year = self.years[self._i] - self._i += 1 - - return year + return iter(self.years) def __contains__(self, year): return year in self.years @@ -451,7 +440,6 @@ def __init__(self, h5_path, years=None, unscale=True, str_decode=True, **cls_kwargs) self.h5_files = self._h5.h5_files self.h5_file = self.h5_files[0] - self._i = 0 @property def years(self): diff --git a/tests/test_multi_year_resource.py b/tests/test_multi_year_resource.py index 029fd989..0376a846 100644 --- a/tests/test_multi_year_resource.py +++ b/tests/test_multi_year_resource.py @@ -373,6 +373,21 @@ def test_my_resource_iterator(): assert len(dsets_permutation) == num_dsets ** 2 +@pytest.mark.timeout(10) +def test_myh5_iterator(): + """ + test MultiYearH5 iterator. Incorrect implementation can + cause an infinite loop + """ + path = os.path.join(TESTDATADIR, 'nsrdb/ri_100_nsrdb_*.h5') + + myh5 = MultiYearH5(path) + dsets_permutation = {(a, b) for a in myh5 for b in myh5} + num_dsets = len(myh5.years) + + assert len(dsets_permutation) == num_dsets ** 2 + + def execute_pytest(capture='all', flags='-rapP'): """Execute module as pytest with detailed summary report. From 9a38939f3902e6d445da9a932469a84950a87ef1 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 11:06:57 -0600 Subject: [PATCH 09/11] Remove various `self._i` --- rex/multi_file_resource.py | 1 - rex/multi_res_resource.py | 1 - rex/multi_time_resource.py | 1 - rex/resource_extraction/resource_extraction.py | 3 --- 4 files changed, 6 deletions(-) diff --git a/rex/multi_file_resource.py b/rex/multi_file_resource.py index 91635b99..613fa1a0 100644 --- a/rex/multi_file_resource.py +++ b/rex/multi_file_resource.py @@ -390,7 +390,6 @@ def __init__(self, h5_source, unscale=True, str_decode=True, self._shapes = None self._chunks = None self._dtypes = None - self._i = 0 self._interp_var = None self._use_lapse = use_lapse_rate diff --git a/rex/multi_res_resource.py b/rex/multi_res_resource.py index 1ae342f1..1e55fd4e 100644 --- a/rex/multi_res_resource.py +++ b/rex/multi_res_resource.py @@ -70,7 +70,6 @@ def __init__(self, h5_hr, h5_lr, handler_class=Resource, self._lr_res = handler_class(h5_lr, **handle_kwargs) self._nn_map = nn_map self._nn_d = nn_d - self._i = 0 if self._nn_map is None: self._nn_d, self._nn_map = self.make_nn_map(self._hr_res, diff --git a/rex/multi_time_resource.py b/rex/multi_time_resource.py index 84d599a2..2fcaae73 100644 --- a/rex/multi_time_resource.py +++ b/rex/multi_time_resource.py @@ -58,7 +58,6 @@ def __init__(self, h5_path, res_cls=Resource, hsds=False, hsds_kwargs=None, self._shape = None self._time_index = None self._time_slice_map = [] - self._i = 0 def __repr__(self): msg = ("{} for {}:\n Contains data from {} files" diff --git a/rex/resource_extraction/resource_extraction.py b/rex/resource_extraction/resource_extraction.py index 080c0c22..d28939e9 100644 --- a/rex/resource_extraction/resource_extraction.py +++ b/rex/resource_extraction/resource_extraction.py @@ -1529,7 +1529,6 @@ def __init__(self, resource_path, res_cls=None, tree=None, str_decode=str_decode, check_files=check_files) self._dist_thresh = None self._tree = tree - self._i = 0 class MultiYearResourceX(ResourceX): @@ -1576,7 +1575,6 @@ def __init__(self, resource_path, years=None, tree=None, unscale=True, hsds_kwargs=hsds_kwargs) self._dist_thresh = None self._tree = tree - self._i = 0 def get_means_map(self, ds_name, year=None, region=None, region_col='state', max_workers=None, @@ -1662,7 +1660,6 @@ def __init__(self, resource_path, tree=None, unscale=True, hsds=hsds, hsds_kwargs=hsds_kwargs) self._dist_thresh = None self._tree = tree - self._i = 0 class SolarX(ResourceX): From db728dda7619701b5bdb7869f72b3d04d51a67d8 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 11:07:07 -0600 Subject: [PATCH 10/11] Add `pytest-timeout` --- .github/workflows/pull_request_tests.yml | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pull_request_tests.yml b/.github/workflows/pull_request_tests.yml index cbb8f29d..56517637 100644 --- a/.github/workflows/pull_request_tests.yml +++ b/.github/workflows/pull_request_tests.yml @@ -28,6 +28,7 @@ jobs: pip install --upgrade pip pip install pytest pip install pytest-cov + pip install pytest-timeout pip install -e . - name: Run pytest and Generate coverage report run: | diff --git a/setup.py b/setup.py index 93f15176..4dc10e80 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ def run(self): with open("requirements.txt") as f: install_requires = f.readlines() -test_requires = ["pytest>=5.2", ] +test_requires = ["pytest>=5.2", "pytest-timeout>=2.3.1"] dev_requires = ["flake8", "pre-commit", "pylint", "hsds>=0.8.4"] description = ("National Renewable Energy Laboratory's (NREL's) REsource " "eXtraction tool: rex") From 9d952d7e53935b762adc4d9a5f15b04eed8d11d8 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 11:20:25 -0600 Subject: [PATCH 11/11] Close file for windows processes --- tests/test_multi_res_resource.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_multi_res_resource.py b/tests/test_multi_res_resource.py index c02d74a3..018d8285 100644 --- a/tests/test_multi_res_resource.py +++ b/tests/test_multi_res_resource.py @@ -184,4 +184,6 @@ def test_multi_res_resource_iterator(): dsets_permutation = {(a, b) for a in mrr for b in mrr} num_dsets = len(mrr.datasets) + mrr.close() + assert len(dsets_permutation) == num_dsets ** 2