MNT: Change IMAP-Hi over to use new dataset accessor

This updates the Hi utilities to use the dataset creator. There is still some work that can be done to improve and numpy broadcast some of the routines, but this at least shows the basics.
IMAP-Science-Operations-Center · Jul 10, 2024 · 3a56b68 · 3a56b68
1 parent 8e7e0c4
commit 3a56b68
Show file tree

Hide file tree

Showing 6 changed files with 41 additions and 73 deletions.
diff --git a/imap_processing/hi/l1a/hi_l1a.py b/imap_processing/hi/l1a/hi_l1a.py
@@ -1,18 +1,20 @@
 """IMAP-HI L1A processing module."""
 
 import logging
+from pathlib import Path
+from typing import Union
 
-from imap_processing.hi.l0 import decom_hi
+from imap_processing import imap_module_directory
 from imap_processing.hi.l1a.histogram import create_dataset as hist_create_dataset
 from imap_processing.hi.l1a.housekeeping import process_housekeeping
 from imap_processing.hi.l1a.science_direct_event import science_direct_event
 from imap_processing.hi.utils import HIAPID
-from imap_processing.utils import group_by_apid
+from imap_processing.utils import packet_file_to_datasets
 
 logger = logging.getLogger(__name__)
 
 
-def hi_l1a(packet_file_path: str, data_version: str) -> list:
+def hi_l1a(packet_file_path: Union[str, Path], data_version: str) -> list:
     """
     Will process IMAP raw data to l1a.
 
@@ -28,30 +30,32 @@ def hi_l1a(packet_file_path: str, data_version: str) -> list:
     processed_data : list
         List of processed xarray dataset.
     """
-    unpacked_data = decom_hi.decom_packets(packet_file_path)
-
-    # group data by apid
-    grouped_data = group_by_apid(unpacked_data)
+    packet_def_file = (
+        imap_module_directory / "hi/packet_definitions/hi_packet_definition.xml"
+    )
+    datasets_by_apid = packet_file_to_datasets(
+        packet_file=packet_file_path, xtce_packet_definition=packet_def_file
+    )
 
     # Process science to l1a.
     processed_data = []
-    for apid in grouped_data.keys():
+    for apid in datasets_by_apid:
         if apid == HIAPID.H45_SCI_CNT:
             logger.info(
                 "Processing histogram data for [%s] packets", HIAPID.H45_SCI_CNT.name
             )
-            data = hist_create_dataset(grouped_data[apid])
+            data = hist_create_dataset(datasets_by_apid[apid])
         elif apid == HIAPID.H45_SCI_DE:
             logger.info(
                 "Processing direct event data for [%s] packets", HIAPID.H45_SCI_DE.name
             )
 
-            data = science_direct_event(grouped_data[apid])
+            data = science_direct_event(datasets_by_apid[apid])
         elif apid == HIAPID.H45_APP_NHK:
             logger.info(
                 "Processing housekeeping data for [%s] packets", HIAPID.H45_APP_NHK.name
             )
-            data = process_housekeeping(grouped_data[apid])
+            data = process_housekeeping(datasets_by_apid[apid])
         else:
             raise RuntimeError(f"Encountered unexpected APID [{apid}]")
 

diff --git a/imap_processing/hi/l1a/histogram.py b/imap_processing/hi/l1a/histogram.py
@@ -2,10 +2,8 @@
 
 import numpy as np
 import xarray as xr
-from space_packet_parser.parser import Packet
 
 from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes
-from imap_processing.cdf.utils import met_to_j2000ns
 
 # define the names of the 24 counter arrays
 # contained in the histogram packet
@@ -35,30 +33,34 @@
 TOTAL_COUNTERS = ("total_a", "total_b", "total_c", "fee_de_sent", "fee_de_recd")
 
 
-def create_dataset(packets: list[Packet]) -> xr.Dataset:
+def create_dataset(input_ds: xr.Dataset) -> xr.Dataset:
     """
     Create dataset for a number of Hi Histogram packets.
 
     Parameters
     ----------
-    packets : list[space_packet_parser.ParsedPacket]
-        Packet list.
+    input_ds : xarray.Dataset
+        Dataset of packets.
 
     Returns
     -------
     dataset : xarray.Dataset
         Dataset with all metadata field data in xr.DataArray.
     """
-    dataset = allocate_histogram_dataset(len(packets))
+    dataset = allocate_histogram_dataset(len(input_ds.epoch))
 
-    # unpack the packets data into the Dataset
-    for i_epoch, packet in enumerate(packets):
-        dataset.epoch.data[i_epoch] = met_to_j2000ns(packet.data["CCSDS_MET"].raw_value)
-        dataset.ccsds_met[i_epoch] = packet.data["CCSDS_MET"].raw_value
-        dataset.esa_stepping_num[i_epoch] = packet.data["ESA_STEP"].raw_value
+    # TODO: Move into the allocate dataset function?
+    dataset["epoch"].data[:] = input_ds["epoch"].data
+    dataset["ccsds_met"].data = input_ds["ccsds_met"].data
+    dataset["esa_stepping_num"].data = input_ds["esa_step"].data
 
+    # unpack the packets data into the Dataset
+    # (npackets, 24 * 90 * 12)
+    # TODO: Look into avoiding the for-loops below
+    #       It seems like we could try to reshape the arrays and do some numpy
+    #       broadcasting rather than for-loops directly here
+    for i_epoch, counters_binary_data in enumerate(input_ds["counters"].data):
         # unpack 24 arrays of 90 12-bit unsigned integers
-        counters_binary_data = packet.data["COUNTERS"].raw_value
         counter_ints = [
             int(counters_binary_data[i * 12 : (i + 1) * 12], 2) for i in range(90 * 24)
         ]

diff --git a/imap_processing/hi/l1a/housekeeping.py b/imap_processing/hi/l1a/housekeeping.py
@@ -1,34 +1,26 @@
 """Unpack IMAP-Hi housekeeping data."""
 
 import xarray as xr
-from space_packet_parser.parser import Packet
 
 from imap_processing.hi.hi_cdf_attrs import (
     hi_hk_l1a_attrs,
 )
-from imap_processing.utils import create_dataset, update_epoch_to_datetime
 
 
-def process_housekeeping(packets: list[Packet]) -> xr.Dataset:
+def process_housekeeping(dataset: xr.Dataset) -> xr.Dataset:
     """
     Create dataset for each metadata field.
 
     Parameters
     ----------
-    packets : list[space_packet_parser.ParsedPacket]
-        Packet list.
+    dataset : xarray.Dataset
+        Packet input dataset.
 
     Returns
     -------
     dataset : xarray.Dataset
         Dataset with all metadata field data in xr.DataArray.
     """
-    dataset = create_dataset(
-        packets=packets, spacecraft_time_key="ccsds_met", skip_keys=["INSTR_SPECIFIC"]
-    )
-    # Update epoch to datetime
-    dataset = update_epoch_to_datetime(dataset)
-
     # Add datalevel attrs
     dataset.attrs.update(hi_hk_l1a_attrs.output())
     return dataset
diff --git a/imap_processing/hi/l1a/science_direct_event.py b/imap_processing/hi/l1a/science_direct_event.py
@@ -2,7 +2,6 @@
 
 import numpy as np
 import xarray as xr
-from space_packet_parser.parser import Packet
 
 from imap_processing import imap_module_directory
 from imap_processing.cdf.cdf_attribute_manager import CdfAttributeManager
@@ -297,7 +296,7 @@ def create_dataset(de_data_list: list, packet_met_time: list) -> xr.Dataset:
     return dataset
 
 
-def science_direct_event(packets_data: list[Packet]) -> xr.Dataset:
+def science_direct_event(packets_data: xr.Dataset) -> xr.Dataset:
     """
     Unpack IMAP-Hi direct event data.
 
@@ -309,8 +308,8 @@ def science_direct_event(packets_data: list[Packet]) -> xr.Dataset:
 
     Parameters
     ----------
-    packets_data : list[space_packet_parser.ParsedPacket]
-        List of packets data.
+    packets_data : xarray.Dataset
+        Packets extracted into a dataset.
 
     Returns
     -------
@@ -324,14 +323,14 @@ def science_direct_event(packets_data: list[Packet]) -> xr.Dataset:
     # I am using extend to add another list to the
     # end of the list. This way, I don't need to flatten
     # the list later.
-    for data in packets_data:
+    for i, data in enumerate(packets_data["de_tof"].data):
         # break binary stream data into unit of 48-bits
-        event_48bits_list = break_into_bits_size(data.data["DE_TOF"].raw_value)
+        event_48bits_list = break_into_bits_size(data)
         # parse 48-bits into meaningful data such as metaevent or direct event
         de_data_list.extend([parse_direct_event(event) for event in event_48bits_list])
         # add packet time to packet_met_time
         packet_met_time.extend(
-            [data.data["CCSDS_MET"].raw_value] * len(event_48bits_list)
+            [packets_data["ccsds_met"].data[i]] * len(event_48bits_list)
         )
 
     # create dataset

diff --git a/imap_processing/tests/hi/test_l1a.py b/imap_processing/tests/hi/test_l1a.py
@@ -57,7 +57,7 @@ def test_app_nhk_decom():
     # TODO: compare with validation data once we have it
 
     # Write CDF
-    cem_raw_cdf_filepath = write_cdf(processed_data[0])
+    cem_raw_cdf_filepath = write_cdf(processed_data[0], istp=False)
 
     # TODO: ask Vivek about this date mismatch between the file name
     # and the data. May get resolved when we have good sample data.

diff --git a/imap_processing/utils.py b/imap_processing/utils.py
@@ -197,8 +197,7 @@ def create_dataset(
 
     # NOTE: At this point, we keep epoch time as raw value from packet
     # which is in seconds and spacecraft time. Some instrument uses this
-    # raw value in processing. If you want to convert this to datetime
-    # object, you can use `update_epoch_to_datetime` function afterwards.
+    # raw value in processing.
     epoch_time = xr.DataArray(
         metadata_arrays[spacecraft_time_key],
         name="epoch",
@@ -229,37 +228,9 @@ def create_dataset(
     return dataset
 
 
-def update_epoch_to_datetime(dataset: xr.Dataset) -> xr.Dataset:
-    """
-    Update epoch in dataset to datetime object.
-
-    Parameters
-    ----------
-    dataset : xr.Dataset
-        Dataset to update.
-
-    Returns
-    -------
-    dataset : xr.Dataset
-        Dataset with updated epoch dimension from int to datetime object.
-    """
-    # convert epoch to datetime
-    epoch_converted_time = met_to_j2000ns(dataset["epoch"])
-    # add attrs back to epoch
-    epoch = xr.DataArray(
-        epoch_converted_time,
-        name="epoch",
-        dims=["epoch"],
-        attrs=ConstantCoordinates.EPOCH,
-    )
-    dataset = dataset.assign_coords(epoch=epoch)
-
-    return dataset
-
-
 def packet_file_to_datasets(
     packet_file: Union[str, Path],
-    xtce_packet_definition: str,
+    xtce_packet_definition: Union[str, Path],
     use_derived_value: bool = True,
 ) -> dict[int, xr.Dataset]:
     """