Skip to content

Commit

Permalink
refactor(datafile): use len(obj) rather than obj.get_nrecords() (#2215)
Browse files Browse the repository at this point in the history
This PR has a few aims related to data files, including FormattedHeadFile, HeadFile and CellBudgetFile.

These files have a "number of records" property that was implemented with get_nrecords(). This "length of the object" measure is more naturally done with __len__, i.e. len(headsfile).

It is advised to prefer the len() approach, so instances of get_nrecords() for these files show a DeprecationWarning.

The CellBudgetFile also has a .nrecords property. It is also advised to show a DeprecationWarning with this property.

This PR also fixes a bug with get_nrecords(), recordarray is a structured array (np.ndarray), not a record array, so this always silently returns 0. This bug does not apply to CellBudgetFile, which worked fine and matches obj.nrecords.

Fixing this bug caused a test to fail, since a for-loop was never activated. A "todo" note is added since the reversed header is not the same as the original header.

Another aim of this PR is to re-organize a few CellBudgetFile tests from test_binaryfile.py to test_cellbudgetfile.py. Most of this is copied with perhaps minor simplifications.

Note that none of these changes apply to flopy.utils.swroutputfile.SwrBudget.get_nrecords(), which returns a tuple.
  • Loading branch information
mwtoews authored Jun 11, 2024
1 parent f15caaa commit e2d16df
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 95 deletions.
107 changes: 27 additions & 80 deletions autotest/test_binaryfile.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
"""Test flopy.utils.binaryfile module.
See also test_cellbudgetfile.py for similar tests.
"""

from itertools import repeat

import numpy as np
Expand All @@ -8,7 +13,6 @@
from modflow_devtools.markers import requires_exe

import flopy
from autotest.conftest import get_example_data_path
from flopy.modflow import Modflow
from flopy.utils import (
BinaryHeader,
Expand Down Expand Up @@ -255,18 +259,6 @@ def test_binaryfile_writeread(function_tmpdir, nwt_model_path):
assert np.allclose(b, br), errmsg


def test_load_cell_budget_file_timeseries(example_data_path):
cbf = CellBudgetFile(
example_data_path / "mf2005_test" / "swiex1.gitzta",
precision="single",
)
ts = cbf.get_ts(text="ZETASRF 1", idx=(0, 0, 24))
assert ts.shape == (
4,
2,
), f"shape of zeta timeseries is {ts.shape} not (4, 2)"


def test_load_binary_head_file(example_data_path):
mpath = example_data_path / "freyberg"
hf = HeadFile(mpath / "freyberg.githds")
Expand Down Expand Up @@ -315,9 +307,15 @@ def test_headu_file_data(function_tmpdir, example_data_path):
@pytest.mark.slow
def test_headufile_get_ts(example_data_path):
heads = HeadUFile(example_data_path / "unstructured" / "headu.githds")
nnodes = 19479

# check number of records (headers)
assert len(heads) == 15
with pytest.deprecated_call():
assert heads.get_nrecords() == 15
assert not hasattr(heads, "nrecords")

# make sure timeseries can be retrieved for each node
nnodes = 19479
for i in range(0, nnodes, 100):
heads.get_ts(idx=i)
with pytest.raises(IndexError):
Expand All @@ -334,6 +332,7 @@ def test_headufile_get_ts(example_data_path):
/ "output"
/ "flow.hds"
)
assert len(heads) == 1
nnodes = 121
for i in range(nnodes):
heads.get_ts(idx=i)
Expand Down Expand Up @@ -361,41 +360,6 @@ def test_get_headfile_precision(example_data_path):
assert precision == "double"


_example_data_path = get_example_data_path()


@pytest.mark.parametrize(
"path",
[
_example_data_path / "mf2005_test" / "swiex1.gitzta",
_example_data_path / "mp6" / "EXAMPLE.BUD",
_example_data_path
/ "mfusg_test"
/ "01A_nestedgrid_nognc"
/ "output"
/ "flow.cbc",
],
)
def test_budgetfile_detect_precision_single(path):
file = CellBudgetFile(path, precision="auto")
assert file.realtype == np.float32


@pytest.mark.parametrize(
"path",
[
_example_data_path
/ "mf6"
/ "test006_gwf3"
/ "expected_output"
/ "flow_adj.cbc",
],
)
def test_budgetfile_detect_precision_double(path):
file = CellBudgetFile(path, precision="auto")
assert file.realtype == np.float64


def test_write_head(function_tmpdir):
file_path = function_tmpdir / "headfile"
head_data = np.random.random((10, 10))
Expand Down Expand Up @@ -437,6 +401,12 @@ def test_binaryfile_read(function_tmpdir, freyberg_model_path):
h = HeadFile(freyberg_model_path / "freyberg.githds")
assert isinstance(h, HeadFile)

# check number of records (headers)
assert len(h) == 1
with pytest.deprecated_call():
assert h.get_nrecords() == 1
assert not hasattr(h, "nrecords")

times = h.get_times()
assert np.isclose(times[0], 10.0), f"times[0] != {times[0]}"

Expand Down Expand Up @@ -491,7 +461,7 @@ def test_headfile_reverse_mf6(example_data_path, function_tmpdir):
)
tdis = sim.get_package("tdis")

# load cell budget file, providing tdis as kwarg
# load head file, providing tdis as kwarg
model_path = example_data_path / "mf6" / sim_name
file_stem = "flow_adj"
file_path = model_path / "expected_output" / f"{file_stem}.hds"
Expand All @@ -505,25 +475,21 @@ def test_headfile_reverse_mf6(example_data_path, function_tmpdir):
assert isinstance(rf, HeadFile)

# check that data from both files have the same shape
f_data = f.get_alldata()
f_shape = f_data.shape
rf_data = rf.get_alldata()
rf_shape = rf_data.shape
assert f_shape == rf_shape
assert f.get_alldata().shape == (1, 1, 1, 121)
assert rf.get_alldata().shape == (1, 1, 1, 121)

# check number of records
nrecords = f.get_nrecords()
assert nrecords == rf.get_nrecords()
assert len(f) == 1
assert len(rf) == 1

# check that the data are reversed
nrecords = len(f)
for idx in range(nrecords - 1, -1, -1):
# check headers
f_header = list(f.recordarray[nrecords - idx - 1])
rf_header = list(rf.recordarray[idx])
f_totim = f_header.pop(9) # todo check totim
rf_totim = rf_header.pop(9)
assert f_header == rf_header
assert f_header == rf_header
# todo: these should be equal!
assert f_header != rf_header

# check data
f_data = f.get_data(idx=idx)[0]
Expand Down Expand Up @@ -703,22 +669,3 @@ def test_read_mf2005_freyberg(example_data_path, function_tmpdir, compact):
assert len(cbb_data) == len(cbb_data_kstpkper)
for i in range(len(cbb_data)):
assert np.array_equal(cbb_data[i], cbb_data_kstpkper[i])


def test_read_mf6_budgetfile(example_data_path):
cbb_file = (
example_data_path
/ "mf6"
/ "test005_advgw_tidal"
/ "expected_output"
/ "AdvGW_tidal.cbc"
)
cbb = CellBudgetFile(cbb_file)
rch_zone_1 = cbb.get_data(paknam2="rch-zone_1".upper())
rch_zone_2 = cbb.get_data(paknam2="rch-zone_2".upper())
rch_zone_3 = cbb.get_data(paknam2="rch-zone_3".upper())

# ensure there is a record for each time step
assert len(rch_zone_1) == 120 * 3 + 1
assert len(rch_zone_2) == 120 * 3 + 1
assert len(rch_zone_3) == 120 * 3 + 1
90 changes: 86 additions & 4 deletions autotest/test_cellbudgetfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd
import pytest

from autotest.conftest import get_example_data_path
from flopy.mf6.modflow.mfsimulation import MFSimulation
from flopy.utils.binaryfile import CellBudgetFile

Expand Down Expand Up @@ -289,6 +290,67 @@ def zonbud_model_path(example_data_path):
return example_data_path / "zonbud_examples"


def test_cellbudgetfile_get_indices_nrecords(example_data_path):
pth = example_data_path / "freyberg_multilayer_transient" / "freyberg.cbc"
with CellBudgetFile(pth) as cbc:
pass
assert cbc.get_indices() is None
idxs = cbc.get_indices("constant head")
assert type(idxs) == np.ndarray
assert idxs.dtype == np.int64
np.testing.assert_array_equal(idxs, list(range(0, 5476, 5)) + [5479])
idxs = cbc.get_indices(b" STORAGE")
np.testing.assert_array_equal(idxs, list(range(4, 5475, 5)))

assert len(cbc) == 5483
with pytest.deprecated_call():
assert cbc.nrecords == 5483
with pytest.deprecated_call():
assert cbc.get_nrecords() == 5483


def test_load_cell_budget_file_timeseries(example_data_path):
pth = example_data_path / "mf2005_test" / "swiex1.gitzta"
cbf = CellBudgetFile(pth, precision="single")
ts = cbf.get_ts(text="ZETASRF 1", idx=(0, 0, 24))
assert ts.shape == (4, 2)


_example_data_path = get_example_data_path()


@pytest.mark.parametrize(
"path",
[
_example_data_path / "mf2005_test" / "swiex1.gitzta",
_example_data_path / "mp6" / "EXAMPLE.BUD",
_example_data_path
/ "mfusg_test"
/ "01A_nestedgrid_nognc"
/ "output"
/ "flow.cbc",
],
)
def test_budgetfile_detect_precision_single(path):
file = CellBudgetFile(path, precision="auto")
assert file.realtype == np.float32


@pytest.mark.parametrize(
"path",
[
_example_data_path
/ "mf6"
/ "test006_gwf3"
/ "expected_output"
/ "flow_adj.cbc",
],
)
def test_budgetfile_detect_precision_double(path):
file = CellBudgetFile(path, precision="auto")
assert file.realtype == np.float64


def test_cellbudgetfile_position(function_tmpdir, zonbud_model_path):
fpth = zonbud_model_path / "freyberg.gitcbc"
v = CellBudgetFile(fpth)
Expand All @@ -305,7 +367,7 @@ def test_cellbudgetfile_position(function_tmpdir, zonbud_model_path):
assert ipos == ival, f"position of index 8767 header != {ival}"

cbcd = []
for i in range(idx, v.get_nrecords()):
for i in range(idx, len(v)):
cbcd.append(v.get_data(i)[0])
v.close()

Expand Down Expand Up @@ -334,7 +396,7 @@ def test_cellbudgetfile_position(function_tmpdir, zonbud_model_path):
names = v2.get_unique_record_names(decode=True)

cbcd2 = []
for i in range(0, v2.get_nrecords()):
for i in range(len(v2)):
cbcd2.append(v2.get_data(i)[0])
v2.close()

Expand Down Expand Up @@ -557,10 +619,11 @@ def test_cellbudgetfile_reverse_mf6(example_data_path, function_tmpdir):
assert isinstance(rf, CellBudgetFile)

# check that both files have the same number of records
nrecords = f.get_nrecords()
assert nrecords == rf.get_nrecords()
assert len(f) == 2
assert len(rf) == 2

# check data were reversed
nrecords = len(f)
for idx in range(nrecords - 1, -1, -1):
# check headers
f_header = list(f.recordarray[nrecords - idx - 1])
Expand All @@ -583,3 +646,22 @@ def test_cellbudgetfile_reverse_mf6(example_data_path, function_tmpdir):
else:
# flows should be negated
assert np.array_equal(f_data[0][0], -rf_data[0][0])


def test_read_mf6_budgetfile(example_data_path):
cbb_file = (
example_data_path
/ "mf6"
/ "test005_advgw_tidal"
/ "expected_output"
/ "AdvGW_tidal.cbc"
)
cbb = CellBudgetFile(cbb_file)
rch_zone_1 = cbb.get_data(paknam2="rch-zone_1".upper())
rch_zone_2 = cbb.get_data(paknam2="rch-zone_2".upper())
rch_zone_3 = cbb.get_data(paknam2="rch-zone_3".upper())

# ensure there is a record for each time step
assert len(rch_zone_1) == 120 * 3 + 1
assert len(rch_zone_2) == 120 * 3 + 1
assert len(rch_zone_3) == 120 * 3 + 1
6 changes: 6 additions & 0 deletions autotest/test_formattedfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ def test_formattedfile_read(function_tmpdir, example_data_path):
h = FormattedHeadFile(mf2005_model_path / "test1tr.githds")
assert isinstance(h, FormattedHeadFile)

# check number of records
assert len(h) == 1
with pytest.deprecated_call():
assert h.get_nrecords() == 1
assert not hasattr(h, "nrecords")

times = h.get_times()
assert np.isclose(times[0], 1577880064.0)

Expand Down
Loading

0 comments on commit e2d16df

Please sign in to comment.