feat(datafile): add .headers property with data frame (#2221)

This feature adds a .headers property for a pandas data frame of the headers of various data files, including HeadFile, FormattedHeadFile, UcnFile and CellBudgetFile. This is a modern accessor to the headers, which is created internally using: pd.DataFrame(self.recordarray, index=self.iposarray) where the index is the file position to the start of each array. Text fields are decoded to str types (rather than dealing with bytes types, which is probably a hang-over from Python2). Int32 types are left as-is, except for iposarray which should be int64 to read large files >2GB. Float32 types are also left as-is, since these would otherwise have a lossy conversion to float64. With the CellBudgetFile, the headers vary depending on the type of budget file. For instance, "classic" files (created without "COMPACT BUDGET" option) don't have imeth, delt, pertim, or totim columns. Furthermore, only files with imeth=6 have the extra text columns modelnam, paknam, modelnam2, and paknam2, since these fields are always empty. This PR also adds more checks to the outputs created with ._build_index() methods. Eventually, I'm planning to deprecate other properties and functions that are using the .recordarray structured array, so it is important to have a detailed trace of these outputs before replacing them. This PR also moves the __enter__ / __exit__ methods from BinaryLayerFile to LayerFile (this feature was from #669). This is so FormattedHeadFile can also use the "with" context statement to also auto-close the file.
modflowpy · Jun 11, 2024 · d36bb78 · d36bb78
1 parent 17bb2e9
commit d36bb78
Show file tree

Hide file tree

Showing 6 changed files with 486 additions and 8 deletions.
diff --git a/autotest/test_binaryfile.py b/autotest/test_binaryfile.py
@@ -1,6 +1,7 @@
 from itertools import repeat
 
 import numpy as np
+import pandas as pd
 import pytest
 from matplotlib import pyplot as plt
 from matplotlib.axes import Axes
@@ -14,6 +15,7 @@
     CellBudgetFile,
     HeadFile,
     HeadUFile,
+    UcnFile,
     Util2d,
 )
 from flopy.utils.binaryfile import (
@@ -71,6 +73,116 @@ def test_deprecated_binaryread_struct(example_data_path):
         assert res == 20
 
 
+def test_headfile_build_index(example_data_path):
+    # test low-level BinaryLayerFile._build_index() method
+    pth = example_data_path / "freyberg_multilayer_transient" / "freyberg.hds"
+    with HeadFile(pth) as hds:
+        pass
+    assert hds.nrow == 40
+    assert hds.ncol == 20
+    assert hds.nlay == 3
+    assert not hasattr(hds, "nper")
+    assert hds.totalbytes == 10_676_004
+    assert len(hds.recordarray) == 3291
+    assert type(hds.recordarray) == np.ndarray
+    assert hds.recordarray.dtype == np.dtype(
+        [
+            ("kstp", "i4"),
+            ("kper", "i4"),
+            ("pertim", "f4"),
+            ("totim", "f4"),
+            ("text", "S16"),
+            ("ncol", "i4"),
+            ("nrow", "i4"),
+            ("ilay", "i4"),
+        ]
+    )
+    # check first and last recorddict
+    list_recordarray = hds.recordarray.tolist()
+    assert list_recordarray[0] == (
+        (1, 1, 1.0, 1.0, b"            HEAD", 20, 40, 1)
+    )
+    assert list_recordarray[-1] == (
+        (1, 1097, 1.0, 1097.0, b"            HEAD", 20, 40, 3)
+    )
+    assert hds.times == list((np.arange(1097) + 1).astype(np.float32))
+    assert hds.kstpkper == [(1, kper + 1) for kper in range(1097)]
+    np.testing.assert_array_equal(hds.iposarray, np.arange(3291) * 3244 + 44)
+    assert hds.iposarray.dtype == np.int64
+    # check first and last row of data frame
+    pd.testing.assert_frame_equal(
+        hds.headers.iloc[[0, -1]],
+        pd.DataFrame(
+            {
+                "kstp": np.array([1, 1], np.int32),
+                "kper": np.array([1, 1097], np.int32),
+                "pertim": np.array([1.0, 1.0], np.float32),
+                "totim": np.array([1.0, 1097.0], np.float32),
+                "text": ["HEAD", "HEAD"],
+                "ncol": np.array([20, 20], np.int32),
+                "nrow": np.array([40, 40], np.int32),
+                "ilay": np.array([1, 3], np.int32),
+            },
+            index=[44, 10672804],
+        ),
+    )
+
+
+def test_concentration_build_index(example_data_path):
+    # test low-level BinaryLayerFile._build_index() method with UCN file
+    pth = example_data_path / "mt3d_test/mf2005mt3d/P07/MT3D001.UCN"
+    with UcnFile(pth) as ucn:
+        pass
+    assert ucn.nrow == 15
+    assert ucn.ncol == 21
+    assert ucn.nlay == 8
+    assert not hasattr(ucn, "nper")
+    assert ucn.totalbytes == 10_432
+    assert len(ucn.recordarray) == 8
+    assert type(ucn.recordarray) == np.ndarray
+    assert ucn.recordarray.dtype == np.dtype(
+        [
+            ("ntrans", "i4"),
+            ("kstp", "i4"),
+            ("kper", "i4"),
+            ("totim", "f4"),
+            ("text", "S16"),
+            ("ncol", "i4"),
+            ("nrow", "i4"),
+            ("ilay", "i4"),
+        ]
+    )
+    # check first and last recorddict
+    list_recordarray = ucn.recordarray.tolist()
+    assert list_recordarray[0] == (
+        (29, 1, 1, 100.0, b"CONCENTRATION   ", 21, 15, 1)
+    )
+    assert list_recordarray[-1] == (
+        (29, 1, 1, 100.0, b"CONCENTRATION   ", 21, 15, 8)
+    )
+    assert ucn.times == [np.float32(100.0)]
+    assert ucn.kstpkper == [(1, 1)]
+    np.testing.assert_array_equal(ucn.iposarray, np.arange(8) * 1304 + 44)
+    assert ucn.iposarray.dtype == np.int64
+    # check first and last row of data frame
+    pd.testing.assert_frame_equal(
+        ucn.headers.iloc[[0, -1]],
+        pd.DataFrame(
+            {
+                "ntrans": np.array([29, 29], np.int32),
+                "kstp": np.array([1, 1], np.int32),
+                "kper": np.array([1, 1], np.int32),
+                "totim": np.array([100.0, 100.0], np.float32),
+                "text": ["CONCENTRATION", "CONCENTRATION"],
+                "ncol": np.array([21, 21], np.int32),
+                "nrow": np.array([15, 15], np.int32),
+                "ilay": np.array([1, 8], np.int32),
+            },
+            index=[44, 9172],
+        ),
+    )
+
+
 def test_binaryfile_writeread(function_tmpdir, nwt_model_path):
     model = "Pr3_MFNWT_lower.nam"
     ml = flopy.modflow.Modflow.load(