ENH/MNT: Allow array-like time conversion (#653)

* ENH/MNT: Allow array-like time conversion We can convert entire arrays at once to datetime64 from integer MET rather than having to loop through in each individual calling location. This is faster and easier to read. * ENH/MNT: Change from datetime formats to integer epochs We have decided to move to epoch being defined as integer nanoseconds since the J2000 EPOCH. This means that the epoch variable will be of datatype int64 and the new conversion utility will convert MET in seconds to j2000ns for users with a default reference epoch of 2010-01-01 per APL's documentation.
IMAP-Science-Operations-Center · Jun 27, 2024 · 757eccf · 757eccf
1 parent 52aaf70
commit 757eccf
Show file tree

Hide file tree

Showing 28 changed files with 168 additions and 183 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [windows-2019, ubuntu-latest, macos-latest]
+        os: [windows-latest, ubuntu-latest, macos-latest]
         python-version: ['3.9', '3.10', '3.11', '3.12']
     defaults:
       run:

diff --git a/imap_processing/__init__.py b/imap_processing/__init__.py
@@ -14,8 +14,6 @@
 # This directory is used by the imap_processing package to find the packet definitions.
 from pathlib import Path
 
-import numpy as np
-
 from imap_processing._version import __version__, __version_tuple__  # noqa: F401
 
 # Eg. imap_module_directory = /usr/local/lib/python3.11/site-packages/imap_processing
@@ -34,6 +32,3 @@
     "swe": ["l0", "l1a", "l1b", "l2"],
     "ultra": ["l0", "l1a", "l1b", "l1c", "l2"],
 }
-
-# Reference start time (launch time or epoch)
-launch_time = np.datetime64("2010-01-01T00:01:06.184", "ns")
diff --git a/imap_processing/cdf/config/imap_hi_variable_attrs.yaml b/imap_processing/cdf/config/imap_hi_variable_attrs.yaml
@@ -70,7 +70,7 @@ default_epoch: &default_epoch
   TIME_BASE: J2000
   TIME_SCALE: Terrestrial Time
   REFERENCE_POSITION: Rotating Earth Geoid
-  dtype: datetime64[ns]
+  dtype: int64
 
 # ------- L1A DE Section -------
 hi_de_ccsds_met:

diff --git a/imap_processing/cdf/utils.py b/imap_processing/cdf/utils.py
@@ -16,39 +16,49 @@
 logger = logging.getLogger(__name__)
 
 
-def calc_start_time(
-    shcoarse_time: float,
-    launch_time: Optional[np.datetime64] = imap_processing.launch_time,
-) -> np.datetime64:
-    """
-    Calculate the datetime64 from the CCSDS secondary header information.
+# Reference start time (launch time or epoch)
+# DEFAULT_EPOCH = np.datetime64("2010-01-01T00:01:06.184", "ns")
+IMAP_EPOCH = np.datetime64("2010-01-01T00:00:00", "ns")
+J2000_EPOCH = np.datetime64("2000-01-01T11:58:55.816", "ns")
+
 
-    Since all instrument has SHCOARSE or MET seconds, we need convert it to
-    UTC. Took this from IDEX code.
+def met_to_j2000ns(
+    met: np.typing.ArrayLike,
+    reference_epoch: Optional[np.datetime64] = IMAP_EPOCH,
+) -> np.typing.ArrayLike:
+    """
+    Convert mission elapsed time (MET) to nanoseconds from J2000.
 
     Parameters
     ----------
-    shcoarse_time : float
-        Number of seconds since epoch (nominally the launch time).
-    launch_time : np.datetime64
-        The time of launch to use as the baseline.
+    met : array_like
+        Number of seconds since epoch according to the spacecraft clock.
+    reference_epoch : np.datetime64
+        The time of reference for the mission elapsed time. The standard
+        reference time for IMAP is January 1, 2010 00:00:00 UTC. Per APL's
+        IMAP Timekeeping System Design document.
 
     Returns
     -------
-    np.timedelta64
-        The time of the event.
+    array_like or scalar, int64
+        The mission elapsed time converted to nanoseconds since the J2000 epoch.
 
     Notes
     -----
-    TODO - move this into imap-data-access? How should it be used?
-    This conversion is temporary for now, and will need SPICE in the future.
-    Nick Dutton mentioned that s/c clock start epoch is
-        jan-1-2010-00:01:06.184 ET
-    We will use this for now.
+    This conversion is temporary for now, and will need SPICE in the future to
+    account for spacecraft clock drift.
     """
-    # Get the datetime of Jan 1 2010 as the start date
-    time_delta = np.timedelta64(int(shcoarse_time * 1e9), "ns")
-    return launch_time + time_delta
+    # Mission elapsed time is in seconds, convert to nanoseconds
+    # NOTE: We need to multiply the incoming met by 1e9 first because we could have
+    #       float input and we want to keep ns precision in those floats
+    # NOTE: We need int64 here when running on 32bit systems as plain int will default
+    #       to 32bit and overflow due to the nanosecond multiplication
+    time_array = (np.asarray(met, dtype=float) * 1e9).astype(np.int64)
+    # Calculate the time difference between our reference system and J2000
+    j2000_offset = (
+        (reference_epoch - J2000_EPOCH).astype("timedelta64[ns]").astype(np.int64)
+    )
+    return j2000_offset + time_array
 
 
 def load_cdf(
@@ -64,19 +74,14 @@ def load_cdf(
     remove_xarray_attrs : bool
         Whether to remove the xarray attributes that get injected by the
         cdf_to_xarray function from the output xarray.Dataset. Default is True.
-    **kwargs : {dict} optional
-        Keyword arguments for ``cdf_to_xarray``. This function overrides the
-        ``cdf_to_xarray`` default keyword value `to_datetime=False` with
-        ``to_datetime=True`.
+    **kwargs : dict, optional
+        Keyword arguments for ``cdf_to_xarray``.
 
     Returns
     -------
     dataset : xarray.Dataset
         The ``xarray`` dataset for the CDF file.
     """
-    # TODO: remove this when cdflib is updated to version >1.3.0
-    if "to_datetime" not in kwargs:
-        kwargs["to_datetime"] = True
     dataset = cdf_to_xarray(file_path, kwargs)
 
     # cdf_to_xarray converts single-value attributes to lists
@@ -121,9 +126,9 @@ def write_cdf(dataset: xr.Dataset):
     # Create the filename from the global attributes
     # Logical_source looks like "imap_swe_l2_counts-1min"
     instrument, data_level, descriptor = dataset.attrs["Logical_source"].split("_")[1:]
-    start_time = np.datetime_as_string(dataset["epoch"].values[0], unit="D").replace(
-        "-", ""
-    )
+    # Convert J2000 epoch referenced data to datetime64
+    dt64 = J2000_EPOCH + dataset["epoch"].values[0].astype("timedelta64[ns]")
+    start_time = np.datetime_as_string(dt64, unit="D").replace("-", "")
 
     # Will now accept vXXX or XXX formats, as batch starter sends versions as vXXX.
     r = re.compile(r"v\d{3}")
@@ -159,7 +164,6 @@ def write_cdf(dataset: xr.Dataset):
     xarray_to_cdf(
         dataset,
         str(file_path),
-        datetime64_to_cdftt2000=True,
         terminate_on_warning=True,
     )  # Terminate if not ISTP compliant
 

diff --git a/imap_processing/cli.py b/imap_processing/cli.py
@@ -618,15 +618,15 @@ def do_processing(self, dependencies):
         elif self.data_level == "l1b":
             data_dict = {}
             for dependency in dependencies:
-                dataset = load_cdf(dependency, to_datetime=True)
+                dataset = load_cdf(dependency)
                 data_dict[dataset.attrs["Logical_source"]] = dataset
             dataset = lo_l1b.lo_l1b(data_dict, self.version)
             return [dataset]
 
         elif self.data_level == "l1c":
             data_dict = {}
             for dependency in dependencies:
-                dataset = load_cdf(dependency, to_datetime=True)
+                dataset = load_cdf(dependency)
                 data_dict[dataset.attrs["Logical_source"]] = dataset
             dataset = lo_l1c.lo_l1c(data_dict, self.version)
             return [dataset]

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
@@ -22,7 +22,7 @@
 
 from imap_processing import imap_module_directory
 from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes
-from imap_processing.cdf.utils import calc_start_time
+from imap_processing.cdf.utils import IMAP_EPOCH, met_to_j2000ns
 from imap_processing.codice import constants
 from imap_processing.codice.codice_l0 import decom_packets
 from imap_processing.codice.utils import CODICEAPID, create_hskp_dataset
@@ -78,18 +78,16 @@ def __init__(self, table_id: int, plan_id: int, plan_step: int, view_id: int):
         self.plan_step = plan_step
         self.view_id = view_id
 
-    def create_science_dataset(
-        self, start_time: np.datetime64, data_version: str
-    ) -> xr.Dataset:
+    def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset:
         """
         Create an ``xarray`` dataset for the unpacked science data.
 
         The dataset can then be written to a CDF file.
 
         Parameters
         ----------
-        start_time : numpy.datetime64
-            The start time of the packet, used to determine epoch data variable.
+        met : numpy.int64
+            The mission elapsed time of the packet, used to determine epoch data.
         data_version : str
             Version of the data product being created.
 
@@ -106,10 +104,7 @@ def create_science_dataset(
 
         # Define coordinates
         epoch = xr.DataArray(
-            [
-                start_time,
-                start_time + np.timedelta64(1, "s"),
-            ],  # TODO: Fix after SIT-3 (see note below)
+            met_to_j2000ns(met),  # TODO: Fix after SIT-3 (see note below)
             name="epoch",
             dims=["epoch"],
             attrs=cdf_attrs.get_variable_attributes("epoch_attrs"),
@@ -380,11 +375,8 @@ def process_codice_l1a(file_path: Path | str, data_version: str) -> xr.Dataset:
                 packets = sort_by_time(grouped_data[apid], "SHCOARSE")
 
                 # Determine the start time of the packet
-                start_time = calc_start_time(
-                    packets[0].data["ACQ_START_SECONDS"].raw_value,
-                    launch_time=np.datetime64("2010-01-01T00:01:06.184", "ns"),
-                )
-
+                met = packets[0].data["ACQ_START_SECONDS"].raw_value
+                met = [met, met + 1]  # TODO: Remove after SIT-3
                 # Extract the data
                 science_values = packets[0].data["DATA"].raw_value
 
@@ -397,7 +389,7 @@ def process_codice_l1a(file_path: Path | str, data_version: str) -> xr.Dataset:
                 pipeline.get_acquisition_times()
                 pipeline.get_data_products(apid)
                 pipeline.unpack_science_data(science_values)
-                dataset = pipeline.create_science_dataset(start_time, data_version)
+                dataset = pipeline.create_science_dataset(met, data_version)
 
     # TODO: Temporary workaround in order to create hi data products in absence
     #       of simulated data. This is essentially the same process as is for
@@ -417,15 +409,15 @@ def process_codice_l1a(file_path: Path | str, data_version: str) -> xr.Dataset:
             apid = CODICEAPID.COD_HI_SECT_SPECIES_COUNTS
             table_id, plan_id, plan_step, view_id = (1, 0, 0, 6)
 
-        start_time = np.datetime64(
-            "2024-04-29T00:00:00", "ns"
-        )  # Using this to match the other data products
+        met0 = (np.datetime64("2024-04-29T00:00") - IMAP_EPOCH).astype("timedelta64[s]")
+        met0 = met0.astype(np.int64)
+        met = [met0, met0 + 1]  # Using this to match the other data products
         science_values = ""  # Currently don't have simulated data for this
 
         pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id)
         pipeline.get_data_products(apid)
         pipeline.unpack_science_data(science_values)
-        dataset = pipeline.create_science_dataset(start_time, data_version)
+        dataset = pipeline.create_science_dataset(met, data_version)
 
     # Write dataset to CDF
     logger.info(f"\nFinal data product:\n{dataset}\n")

diff --git a/imap_processing/codice/utils.py b/imap_processing/codice/utils.py
@@ -13,7 +13,7 @@
 
 from imap_processing.cdf.global_attrs import ConstantCoordinates
 from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes
-from imap_processing.cdf.utils import calc_start_time
+from imap_processing.cdf.utils import met_to_j2000ns
 
 
 class CODICEAPID(IntEnum):
@@ -129,12 +129,10 @@ def create_hskp_dataset(packets, data_version: str) -> xr.Dataset:
 
     # TODO: Is there a way to get the attrs from the YAML-based method?
     epoch = xr.DataArray(
-        [
-            calc_start_time(
-                item, launch_time=np.datetime64("2010-01-01T00:01:06.184", "ns")
-            )
-            for item in metadata_arrays["SHCOARSE"]
-        ],
+        met_to_j2000ns(
+            metadata_arrays["SHCOARSE"],
+            reference_epoch=np.datetime64("2010-01-01T00:01:06.184", "ns"),
+        ),
         name="epoch",
         dims=["epoch"],
         attrs=ConstantCoordinates.EPOCH,

diff --git a/imap_processing/glows/l1a/glows_l1a.py b/imap_processing/glows/l1a/glows_l1a.py
@@ -8,7 +8,7 @@
 import xarray as xr
 
 from imap_processing.cdf.global_attrs import ConstantCoordinates
-from imap_processing.cdf.utils import calc_start_time
+from imap_processing.cdf.utils import J2000_EPOCH, met_to_j2000ns
 from imap_processing.glows import __version__, glows_cdf_attrs
 from imap_processing.glows.l0.decom_glows import decom_packets
 from imap_processing.glows.l0.glows_l0_data import DirectEventL0
@@ -46,7 +46,7 @@ def glows_l1a(packet_filepath: Path, data_version: str) -> list[xr.Dataset]:
         hist_l1a = HistogramL1A(hist)
         # Split by IMAP start time
         # TODO: Should this be MET?
-        hist_day = calc_start_time(hist.SEC).astype("datetime64[D]")
+        hist_day = (J2000_EPOCH + met_to_j2000ns(hist.SEC)).astype("datetime64[D]")
         hists_by_day[hist_day].append(hist_l1a)
 
     # Generate CDF files for each day
@@ -85,7 +85,7 @@ def process_de_l0(
     de_by_day = dict()
 
     for de in de_l0:
-        de_day = calc_start_time(de.MET).astype("datetime64[D]")
+        de_day = (J2000_EPOCH + met_to_j2000ns(de.MET)).astype("datetime64[D]")
         if de_day not in de_by_day:
             de_by_day[de_day] = [DirectEventL1A(de)]
         elif de.SEQ != 0:
@@ -163,7 +163,7 @@ def generate_de_dataset(
 
     for index, de in enumerate(de_l1a_list):
         # Set the timestamp to the first timestamp of the direct event list
-        epoch_time = calc_start_time(de.l0.MET).astype("datetime64[ns]")
+        epoch_time = met_to_j2000ns(de.l0.MET).astype("datetime64[ns]")
 
         # determine if the length of the direct_events numpy array is long enough,
         # and extend the direct_events length dimension if necessary.
@@ -332,7 +332,7 @@ def generate_histogram_dataset(
 
     for index, hist in enumerate(hist_l1a_list):
         # TODO: Should this be MET?
-        epoch_time = calc_start_time(hist.imap_start_time.to_seconds())
+        epoch_time = met_to_j2000ns(hist.imap_start_time.to_seconds())
         hist_data[index] = hist.histograms
 
         support_data["flags_set_onboard"].append(hist.flags["flags_set_onboard"])

diff --git a/imap_processing/hi/l1a/histogram.py b/imap_processing/hi/l1a/histogram.py
@@ -7,7 +7,7 @@
 from space_packet_parser.parser import Packet
 
 from imap_processing.cdf.global_attrs import ConstantCoordinates
-from imap_processing.cdf.utils import calc_start_time
+from imap_processing.cdf.utils import met_to_j2000ns
 from imap_processing.hi import hi_cdf_attrs
 
 # TODO: Verify that these names are OK for counter variables in the CDF
@@ -57,9 +57,7 @@ def create_dataset(packets: list[Packet]) -> xr.Dataset:
 
     # unpack the packets data into the Dataset
     for i_epoch, packet in enumerate(packets):
-        dataset.epoch.data[i_epoch] = calc_start_time(
-            packet.data["CCSDS_MET"].raw_value
-        )
+        dataset.epoch.data[i_epoch] = met_to_j2000ns(packet.data["CCSDS_MET"].raw_value)
         dataset.ccsds_met[i_epoch] = packet.data["CCSDS_MET"].raw_value
         dataset.esa_step[i_epoch] = packet.data["ESA_STEP"].raw_value
 

diff --git a/imap_processing/hi/l1a/science_direct_event.py b/imap_processing/hi/l1a/science_direct_event.py
@@ -4,9 +4,10 @@
 import xarray as xr
 from space_packet_parser.parser import Packet
 
-from imap_processing import imap_module_directory, launch_time
+from imap_processing import imap_module_directory
 from imap_processing.cdf.cdf_attribute_manager import CdfAttributeManager
 from imap_processing.cdf.global_attrs import ConstantCoordinates
+from imap_processing.cdf.utils import met_to_j2000ns
 
 # TODO: read LOOKED_UP_DURATION_OF_TICK from
 # instrument status summary later. This value
@@ -20,25 +21,6 @@
 MICROSECOND_TO_NS = 1e3
 
 
-def get_direct_event_time(time_in_ns: int) -> np.datetime64:
-    """
-    Create MET(Mission Elapsed Time) time using input times.
-
-    Parameters
-    ----------
-    time_in_ns : int
-        Time in nanoseconds.
-
-    Returns
-    -------
-    met_datetime : numpy.datetime64
-        Human-readable MET time.
-    """
-    met_datetime = launch_time + np.timedelta64(int(time_in_ns), "ns")
-
-    return met_datetime
-
-
 def parse_direct_event(event_data: str) -> dict:
     """
     Parse event data.
@@ -267,7 +249,7 @@ def create_dataset(de_data_list: list, packet_met_time: list) -> xr.Dataset:
             + event["de_tag"] * LOOKED_UP_DURATION_OF_TICK * MICROSECOND_TO_NS
         )
         data_dict["event_met"].append(de_met_in_ns)
-        data_dict["epoch"].append(get_direct_event_time(de_met_in_ns))
+        data_dict["epoch"].append(met_to_j2000ns(de_met_in_ns / 1e9))
         data_dict["esa_stepping_num"].append(current_esa_step)
         # start_bitmask_data is 1, 2, 3 for detector A, B, C
         # respectively. This is used to identify which detector