700 refactor hi l1a histogram (IMAP-Science-Operations-Center#1196)

* Implement numpy 12-bit unpacking of each counter * Refactor allocate_histogram_dataset to copy and update the L0 packets dataset rather than starting from scratch * add check on output dtype * make nitpick_ignore_regex cover all numpy integer and unsigned integer types * Try again to fix doc build * darn typo
lacoak21 · Dec 6, 2024 · 46c8d3e · 46c8d3e
1 parent 7aa5860
commit 46c8d3e
Show file tree

Hide file tree

Showing 4 changed files with 172 additions and 105 deletions.
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -122,6 +122,7 @@
     (r"py:.*", r".*.spice.geometry.SpiceBody.*"),
     (r"py:.*", r".*.spice.geometry.SpiceFrame.*"),
     (r"py:class", r"numpy._typing.*"),
+    (r"py:class", r"^numpy\.(u?int(?:8|16|32|64))$"),
 ]
 
 # Ignore the inherited members from the <instrument>APID IntEnum class

diff --git a/imap_processing/cdf/config/imap_hi_variable_attrs.yaml b/imap_processing/cdf/config/imap_hi_variable_attrs.yaml
@@ -55,6 +55,54 @@ default_float32_attrs: &default_float32
   VALIDMAX: 3.4028235e+38
   dtype: float32
 
+default_ccsds_version: &default_ccsds_version
+  <<: *default_uint8
+  CATDESC: CCSDS packet version number
+  FIELDNAM: CCSDS version
+  VAR_TYPE: support_data
+
+default_ccsds_type: &default_ccsds_type
+  <<: *default_uint8
+  CATDESC: CCSDS packet type
+  FIELDNAM: CCSDS type
+  VAR_TYPE: support_data
+
+default_ccsds_sec_hdr_flg: &default_ccsds_sec_hdr_flg
+  <<: *default_uint8
+  CATDESC: CCSDS secondary header flag
+  FIELDNAM: CCSDS secondary header flag
+  VAR_TYPE: support_data
+
+default_ccsds_pkt_apid: &default_ccsds_pkt_apid
+  <<: *default_uint16
+  CATDESC: CCSDS application process ID
+  FIELDNAM: CCSDS APID
+  VAR_TYPE: support_data
+
+default_ccsds_seq_flgs: &default_ccsds_seq_flgs
+  <<: *default_uint8
+  CATDESC: CCSDS sequence flags
+  FIELDNAM: CCSDS sequence flags
+  VAR_TYPE: support_data
+
+default_ccsds_src_seq_ctr: &default_ccsds_src_seq_ctr
+  <<: *default_uint16
+  CATDESC: CCSDS source sequence counter
+  FIELDNAM: CCSDS sequence counter
+  VAR_TYPE: support_data
+
+default_ccsds_pkt_len: &default_ccsds_pkt_len
+  <<: *default_uint16
+  CATDESC: CCSDS packet length
+  FIELDNAM: CCSDS packet length
+  VAR_TYPE: support_data
+
+default_ccsds_cksum: &default_ccsds_cksum
+  <<: *default_uint16
+  CATDESC: CCSDS packet checksum
+  FIELDNAM: CCSDS packet checksum
+  VAR_TYPE: support_data
+
 default_ccsds_met: &hi_ccsds_met
   <<: *default_uint32
   CATDESC: CCSDS mission elapsed time (MET). 32-bit integer value that represents the MET in seconds.
@@ -178,6 +226,27 @@ hi_hist_angle:
   VAR_TYPE: support_data
   dtype: uint16
 
+hi_hist_version:
+  <<: *default_ccsds_version
+
+hi_hist_type:
+  <<: *default_ccsds_type
+
+hi_hist_sec_hdr_flg:
+  <<: *default_ccsds_sec_hdr_flg
+
+hi_hist_pkt_apid:
+  <<: *default_ccsds_pkt_apid
+
+hi_hist_seq_flgs:
+  <<: *default_ccsds_seq_flgs
+
+hi_hist_src_seq_ctr:
+  <<: *default_ccsds_src_seq_ctr
+
+hi_hist_pkt_len:
+  <<: *default_ccsds_pkt_len
+
 hi_hist_ccsds_met:
   <<: *hi_ccsds_met
 
@@ -199,6 +268,9 @@ hi_hist_counters:
   LABL_PTR_1: angle_label
   FORMAT: I4
 
+hi_hist_cksum:
+  <<: *default_ccsds_cksum
+
 # ======= L1B DE Section =======
 hi_de_coincidence_type:
   <<: *default_uint8

diff --git a/imap_processing/hi/l1a/histogram.py b/imap_processing/hi/l1a/histogram.py
@@ -2,9 +2,9 @@
 
 import numpy as np
 import xarray as xr
+from numpy._typing import NDArray
 
 from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes
-from imap_processing.utils import convert_to_binary_string
 
 # define the names of the 24 counter arrays
 # contained in the histogram packet
@@ -41,115 +41,113 @@ def create_dataset(input_ds: xr.Dataset) -> xr.Dataset:
     Parameters
     ----------
     input_ds : xarray.Dataset
-        Dataset of packets.
+        Dataset of packets generated using the
+        `imap_processing.utils.packet_file_to_datasets` function.
 
     Returns
     -------
     dataset : xarray.Dataset
         Dataset with all metadata field data in xr.DataArray.
     """
-    dataset = allocate_histogram_dataset(len(input_ds.epoch))
-
-    # TODO: Move into the allocate dataset function. Ticket: #700
-    dataset["epoch"].data[:] = input_ds["epoch"].data
-    dataset["ccsds_met"].data = input_ds["shcoarse"].data
-    dataset["esa_stepping_num"].data = input_ds["esa_step"].data
-    dataset["num_of_spins"].data = input_ds["num_of_spins"].data
-
-    # unpack the counter binary blobs into the Dataset
-    # TODO: Look into avoiding the for-loops below
-    #       It seems like we could try to reshape the arrays and do some numpy
-    #       broadcasting rather than for-loops directly here. Ticket: #700
-    for i_epoch in range(input_ds["epoch"].size):
-        for counter in (*QUALIFIED_COUNTERS, *LONG_COUNTERS, *TOTAL_COUNTERS):
-            binary_str_val = convert_to_binary_string(input_ds[counter].data[i_epoch])
-            # unpack array of 90 12-bit unsigned integers
-            counter_ints = [
-                int(binary_str_val[i * 12 : (i + 1) * 12], 2) for i in range(90)
-            ]
-            dataset[counter][i_epoch] = counter_ints
-
-    return dataset
-
-
-def allocate_histogram_dataset(num_packets: int) -> xr.Dataset:
-    """
-    Allocate empty xarray.Dataset for specified number of Hi Histogram packets.
-
-    Parameters
-    ----------
-    num_packets : int
-        The number of Hi Histogram packets to allocate space for
-        in the xarray.Dataset.
-
-    Returns
-    -------
-    dataset : xarray.Dataset
-        Empty xarray.Dataset ready to be filled with packet data.
-    """
     attr_mgr = ImapCdfAttributes()
     attr_mgr.add_instrument_global_attrs(instrument="hi")
     attr_mgr.add_instrument_variable_attrs(instrument="hi", level=None)
-    # preallocate the xr.DataArrays for all CDF attributes based on number of packets
-    coords = dict()
-    coords["epoch"] = xr.DataArray(
-        np.empty(num_packets, dtype="datetime64[ns]"),
-        name="epoch",
-        dims=["epoch"],
-        attrs=attr_mgr.get_variable_attributes("epoch"),
-    )
-    # Histogram data is binned in 90, 4-degree bins
-    coords["angle"] = xr.DataArray(
-        np.arange(2, 360, 4),
-        name="angle",
-        dims=["angle"],
-        attrs=attr_mgr.get_variable_attributes("hi_hist_angle"),
-    )
 
-    data_vars = dict()
-    # Generate label variables
-    data_vars["angle_label"] = xr.DataArray(
-        coords["angle"].values.astype(str),
-        name="angle_label",
-        dims=["angle"],
-        attrs=attr_mgr.get_variable_attributes(
-            "hi_hist_angle_label", check_schema=False
-        ),
-    )
-    # Other data variables
-    data_vars["ccsds_met"] = xr.DataArray(
-        np.empty(num_packets, dtype=np.uint32),
-        dims=["epoch"],
-        attrs=attr_mgr.get_variable_attributes("hi_hist_ccsds_met"),
-    )
-    data_vars["esa_stepping_num"] = xr.DataArray(
-        np.empty(num_packets, dtype=np.uint8),
-        dims=["epoch"],
-        attrs=attr_mgr.get_variable_attributes("hi_hist_esa_step"),
+    # Rename shcoarse variable (do this first since it copies the input_ds)
+    dataset = input_ds.rename_vars({"shcoarse": "ccsds_met"})
+
+    dataset.epoch.attrs.update(
+        attr_mgr.get_variable_attributes("epoch"),
     )
-    data_vars["num_of_spins"] = xr.DataArray(
-        np.empty(num_packets, dtype=np.uint8),
-        dims=["epoch"],
-        attrs=attr_mgr.get_variable_attributes("hi_hist_esa_step"),
+    # Add the hist_angle coordinate
+    # Histogram data is binned in 90, 4-degree bins
+    attrs = attr_mgr.get_variable_attributes("hi_hist_angle")
+    dataset.coords.update(
+        {
+            "angle": xr.DataArray(
+                np.arange(2, 360, 4),
+                name="angle",
+                dims=["angle"],
+                attrs=attrs,
+            )
+        }
     )
-
-    # Allocate xarray.DataArray objects for the 24 90-element histogram counters
+    # Update existing variable attributes
+    for var_name in [
+        "version",
+        "type",
+        "sec_hdr_flg",
+        "pkt_apid",
+        "seq_flgs",
+        "src_seq_ctr",
+        "pkt_len",
+        "ccsds_met",
+        "esa_step",
+        "num_of_spins",
+        "cksum",
+    ]:
+        attrs = attr_mgr.get_variable_attributes(f"hi_hist_{var_name}")
+        dataset.data_vars[var_name].attrs.update(attrs)
+
+    new_vars = dict()
+    # Populate 90-element histogram counters
     default_counter_attrs = attr_mgr.get_variable_attributes("hi_hist_counters")
     for counter_name in (*QUALIFIED_COUNTERS, *LONG_COUNTERS, *TOTAL_COUNTERS):
         # Inject counter name into generic counter attributes
         counter_attrs = default_counter_attrs.copy()
         for key, val in counter_attrs.items():
             if isinstance(val, str) and "{counter_name}" in val:
                 counter_attrs[key] = val.format(counter_name=counter_name)
-        data_vars[counter_name] = xr.DataArray(
-            data=np.empty((num_packets, len(coords["angle"])), np.uint16),
+        # Instantiate the counter DataArray
+        new_vars[counter_name] = xr.DataArray(
+            data=unpack_hist_counter(input_ds[counter_name].data.sum()),
             dims=["epoch", "angle"],
             attrs=counter_attrs,
         )
 
-    dataset = xr.Dataset(
-        data_vars=data_vars,
-        coords=coords,
-        attrs=attr_mgr.get_global_attributes("imap_hi_l1a_hist_attrs"),
+    # Generate label variable for angle coordinate
+    new_vars["angle_label"] = xr.DataArray(
+        dataset.coords["angle"].values.astype(str),
+        name="angle_label",
+        dims=["angle"],
+        attrs=attr_mgr.get_variable_attributes(
+            "hi_hist_angle_label", check_schema=False
+        ),
     )
+
+    dataset.update(new_vars)
+    dataset.attrs.update(attr_mgr.get_global_attributes("imap_hi_l1a_hist_attrs"))
+
     return dataset
+
+
+def unpack_hist_counter(counter_bytes: bytes) -> NDArray[np.uint16]:
+    """
+    Unpack Hi SCI_CNT counter data for a single counter.
+
+    Parameters
+    ----------
+    counter_bytes : bytes
+        Sum individual bytes for all epochs of a Hi SCI_CNT counter.
+
+    Returns
+    -------
+    output_array : numpy.ndarray[numpy.uint16]
+        The unpacked 12-bit unsigned integers for the input bytes. The
+        output array has a shape of (n, 90) where n is the number of SCI_CNT
+        packets in the input dataset.
+    """
+    # Interpret bytes for all epochs of current counter as uint8 array
+    counter_uint8 = np.frombuffer(counter_bytes, dtype=np.uint8)
+    # Split into triplets of upper-byte, split-byte and lower-byte arrays
+    upper_uint8, split_unit8, lower_uint8 = np.reshape(
+        counter_uint8, (3, -1), order="F"
+    ).astype(np.uint16)
+    # Compute even indexed uint12 values from upper-byte and first 4-bits of
+    # split-byte
+    even_uint12 = (upper_uint8 << 4) + (split_unit8 >> 4)
+    # Compute odd indexed uint12 values from lower 4-bits of split-byte and
+    # lower-byte
+    odd_uint12 = ((split_unit8 & (2**4 - 1)) << 8) + lower_uint8
+    output_array = np.column_stack((even_uint12, odd_uint12)).reshape(-1, 90)
+    return output_array
diff --git a/imap_processing/tests/hi/test_l1a.py b/imap_processing/tests/hi/test_l1a.py
@@ -1,8 +1,8 @@
 import numpy as np
 
 from imap_processing.cdf.utils import write_cdf
-from imap_processing.hi.l1a import histogram as hist
 from imap_processing.hi.l1a.hi_l1a import hi_l1a
+from imap_processing.hi.l1a.histogram import unpack_hist_counter
 from imap_processing.hi.utils import HIAPID
 
 
@@ -59,19 +59,15 @@ def test_app_hist_decom(hi_l0_test_data_path):
     assert cem_raw_cdf_filepath.name.startswith("imap_hi_l1a_90sensor-hist_")
 
 
-def test_allocate_histogram_dataset():
-    """Test hi.l1a.histogram.allocate_histogram_dataset()"""
-    n_packets = 5
-    dataset = hist.allocate_histogram_dataset(n_packets)
-
-    assert dataset.attrs["Data_type"] == "L1A_HIST>Level-1A Histogram"
-    assert dataset.sizes["epoch"] == n_packets
-    assert dataset.sizes["angle"] == 90
-    for var_name in (
-        "ccsds_met",
-        "esa_stepping_num",
-        *hist.QUALIFIED_COUNTERS,
-        *hist.LONG_COUNTERS,
-        *hist.TOTAL_COUNTERS,
-    ):
-        assert var_name in dataset
+def test_unpack_hist_counter():
+    """Test hi.l1a.histogram.unpack_hist_counter()"""
+    # To ensure correct unpacking, use expected values with ones in the upper
+    # and lower parts of the 12-bit numbers
+    expected = (np.arange(180).reshape((2, 90)) + 2**10).astype(">u2")
+    # convert each expected uint16 to a 12-bit bitstring and join
+    bin_str = "".join([f"{val:012b}" for val in expected.ravel()])
+    # convert the bitstring to a bytes object
+    bytes_array = int(bin_str, 2).to_bytes(len(bin_str) // 8, byteorder="big")
+    output_array = unpack_hist_counter(bytes_array)
+    np.testing.assert_array_equal(output_array, expected)
+    assert output_array.dtype == np.uint16