diff --git a/imap_processing/cdf/utils.py b/imap_processing/cdf/utils.py index b8bf155cf..6dc198113 100644 --- a/imap_processing/cdf/utils.py +++ b/imap_processing/cdf/utils.py @@ -2,11 +2,12 @@ import logging import re +from pathlib import Path import imap_data_access import numpy as np import xarray as xr -from cdflib.xarray import xarray_to_cdf +from cdflib.xarray import cdf_to_xarray, xarray_to_cdf from imap_processing import launch_time @@ -41,6 +42,33 @@ def calc_start_time(shcoarse_time: float) -> np.datetime64: return launch_time + time_delta +def load_cdf(file_path: Path, **kwargs: dict) -> xr.Dataset: + """Load the contents of a CDF file into an ``xarray`` dataset. + + Parameters + ---------- + file_path : Path + The path to the CDF file + **kwargs : dict, optional + Keyword arguments for ``cdf_to_xarray`` + + Returns + ------- + dataset : xr.Dataset + The ``xarray`` dataset for the CDF file + """ + dataset = cdf_to_xarray(file_path, kwargs) + + # cdf_to_xarray converts single-value attributes to lists + # convert these back to single values where applicable + for attribute in dataset.attrs: + value = dataset.attrs[attribute] + if isinstance(value, list) and len(value) == 1: + dataset.attrs[attribute] = value[0] + + return dataset + + def write_cdf(dataset: xr.Dataset): """Write the contents of "data" to a CDF file using cdflib.xarray_to_cdf. @@ -53,13 +81,13 @@ def write_cdf(dataset: xr.Dataset): Parameters ---------- - dataset : xarray.Dataset - The dataset object to convert to a CDF + dataset : xarray.Dataset + The dataset object to convert to a CDF Returns ------- - pathlib.Path - Path to the file created + file_path: pathlib.Path + Path to the file created """ # Create the filename from the global attributes # Logical_source looks like "imap_swe_l2_counts-1min" diff --git a/imap_processing/cli.py b/imap_processing/cli.py index aface71d9..af7bbd262 100644 --- a/imap_processing/cli.py +++ b/imap_processing/cli.py @@ -19,7 +19,6 @@ from urllib.error import HTTPError import imap_data_access -from cdflib.xarray import cdf_to_xarray import imap_processing @@ -31,7 +30,7 @@ # from imap_processing import cdf # In code: # call cdf.utils.write_cdf -from imap_processing.cdf.utils import write_cdf +from imap_processing.cdf.utils import load_cdf, write_cdf from imap_processing.mag.l1a.mag_l1a import mag_l1a from imap_processing.swe.l1a.swe_l1a import swe_l1a from imap_processing.swe.l1b.swe_l1b import swe_l1b @@ -352,7 +351,7 @@ def process(self): elif self.data_level == "l1b": # read CDF file - l1a_dataset = cdf_to_xarray(dependencies[0]) + l1a_dataset = load_cdf(dependencies[0]) processed_data = swe_l1b(l1a_dataset) # TODO: Pass in the proper version and descriptor cdf_file_path = write_cdf(data=processed_data) diff --git a/imap_processing/tests/cdf/test_utils.py b/imap_processing/tests/cdf/test_utils.py index 1c2dc01b1..459dfe13a 100644 --- a/imap_processing/tests/cdf/test_utils.py +++ b/imap_processing/tests/cdf/test_utils.py @@ -1,21 +1,26 @@ +"""Tests for the ``cdf.utils`` module.""" + import imap_data_access import numpy as np +import pytest import xarray as xr from imap_processing import launch_time from imap_processing.cdf.global_attrs import ConstantCoordinates -from imap_processing.cdf.utils import calc_start_time, write_cdf +from imap_processing.cdf.utils import calc_start_time, load_cdf, write_cdf from imap_processing.swe.swe_cdf_attrs import swe_l1a_global_attrs -def test_calc_start_time(): - assert calc_start_time(0) == launch_time - assert calc_start_time(1) == launch_time + np.timedelta64(1, "s") +@pytest.fixture() +def test_dataset(): + """Create a simple ``xarray`` dataset to be used in testing + Returns + ------- + dataset : xarray.Dataset + The ``xarray`` dataset object + """ -def test_write_cdf(): - # Set up a fake dataset - # lots of requirements on attributes, so depend on SWE for now dataset = xr.Dataset( { "epoch": ( @@ -28,11 +33,63 @@ def test_write_cdf(): ) }, attrs=swe_l1a_global_attrs.output() - | {"Logical_source": "imap_swe_l1_sci", "Data_version": "001"}, + | { + "Logical_source": "imap_swe_l1_sci", + "Data_version": "001", + "Logical_file_id": "imap_swe_l1_sci_20100101_v001", + }, ) dataset["epoch"].attrs = ConstantCoordinates.EPOCH + dataset["epoch"].attrs["DEPEND_0"] = "epoch" + + return dataset + + +def test_calc_start_time(): + """Tests the ``calc_start_time`` function""" + + assert calc_start_time(0) == launch_time + assert calc_start_time(1) == launch_time + np.timedelta64(1, "s") - file_path = write_cdf(dataset) + +def test_load_cdf(test_dataset): + """Tests the ``load_cdf`` function.""" + + # Write the dataset to a CDF to be used to test the load function + file_path = write_cdf(test_dataset) + + # Load the CDF and ensure the function returns a dataset + dataset = load_cdf(file_path) + assert isinstance(dataset, xr.core.dataset.Dataset) + + +def test_write_cdf(test_dataset): + """Tests the ``write_cdf`` function. + + Parameters + ---------- + dataset : xarray.Dataset + An ``xarray`` dataset object to test with + """ + + file_path = write_cdf(test_dataset) assert file_path.exists() assert file_path.name == "imap_swe_l1_sci_20100101_v001.cdf" assert file_path.relative_to(imap_data_access.config["DATA_DIR"]) + + +def test_written_and_loaded_dataset(test_dataset): + """Tests that a dataset that is written to CDF and then loaded results in + the original dataset. + + Parameters + ---------- + dataset : xarray.Dataset + An ``xarray`` dataset object to test with + """ + + new_dataset = load_cdf(write_cdf(test_dataset), to_datetime=True) + new_dataset.attrs["PI_affiliation"] = tuple( + new_dataset.attrs["PI_affiliation"] + ) # The PI_affiliation attribute should be a tuple + assert str(test_dataset) == str(new_dataset) diff --git a/imap_processing/tests/codice/test_codice_l1a.py b/imap_processing/tests/codice/test_codice_l1a.py index 5b9c638a6..aa9b80eff 100644 --- a/imap_processing/tests/codice/test_codice_l1a.py +++ b/imap_processing/tests/codice/test_codice_l1a.py @@ -2,12 +2,12 @@ from pathlib import Path -import cdflib import numpy as np import pytest import xarray as xr from imap_processing import imap_module_directory +from imap_processing.cdf.utils import load_cdf from imap_processing.codice.codice_l0 import decom_packets from imap_processing.codice.codice_l1a import process_codice_l1a @@ -140,7 +140,7 @@ def test_l1a_data_array_values(test_l1a_data: xr.Dataset, validation_data: Path) """ generated_dataset = test_l1a_data - validation_dataset = cdflib.xarray.cdf_to_xarray(validation_data) + validation_dataset = load_cdf(validation_data) # Ensure the processed data matches the validation data for variable in validation_dataset: diff --git a/imap_processing/tests/idex/test_l1_cdfs.py b/imap_processing/tests/idex/test_l1_cdfs.py index f659c0c92..82042d3c6 100644 --- a/imap_processing/tests/idex/test_l1_cdfs.py +++ b/imap_processing/tests/idex/test_l1_cdfs.py @@ -5,11 +5,10 @@ import numpy as np import pytest import xarray as xr -from cdflib.xarray import cdf_to_xarray from cdflib.xarray.xarray_to_cdf import ISTPError from imap_processing import imap_module_directory -from imap_processing.cdf.utils import write_cdf +from imap_processing.cdf.utils import load_cdf, write_cdf from imap_processing.idex.idex_packet_parser import PacketParser @@ -74,7 +73,5 @@ def test_idex_tof_high_data_from_cdf(decom_test_data): data = np.array([int(line.rstrip()) for line in f]) file_name = write_cdf(decom_test_data) - l1_data = cdf_to_xarray( - file_name - ) # Read in the data from the CDF file to an xarray object + l1_data = load_cdf(file_name) assert (l1_data["TOF_High"][13].data == data).all() diff --git a/imap_processing/tests/mag/test_mag_decom.py b/imap_processing/tests/mag/test_mag_decom.py index f02c13e6a..e2217275e 100644 --- a/imap_processing/tests/mag/test_mag_decom.py +++ b/imap_processing/tests/mag/test_mag_decom.py @@ -1,10 +1,9 @@ from pathlib import Path import pandas as pd -from cdflib.xarray import cdf_to_xarray from imap_processing.cdf import global_attrs -from imap_processing.cdf.utils import write_cdf +from imap_processing.cdf.utils import load_cdf, write_cdf from imap_processing.mag import mag_cdf_attrs from imap_processing.mag.l0.decom_mag import decom_packets, generate_dataset @@ -91,12 +90,12 @@ def test_mag_raw_cdf_generation(): assert output.exists() assert output.name == "imap_mag_l1a_norm-raw_20231025_v001.cdf" - input_xarray = cdf_to_xarray(output) + input_xarray = load_cdf(output) assert input_xarray.attrs.keys() == norm_data.attrs.keys() output = write_cdf(burst_data) assert output.exists() assert output.name == "imap_mag_l1a_burst-raw_20231025_v001.cdf" - input_xarray = cdf_to_xarray(output) + input_xarray = load_cdf(output) assert input_xarray.attrs.keys() == burst_data.attrs.keys() diff --git a/imap_processing/tests/swe/test_swe_l1b.py b/imap_processing/tests/swe/test_swe_l1b.py index 2effd5f3e..46cdac570 100644 --- a/imap_processing/tests/swe/test_swe_l1b.py +++ b/imap_processing/tests/swe/test_swe_l1b.py @@ -1,9 +1,8 @@ import pandas as pd import pytest -from cdflib.xarray import cdf_to_xarray from imap_processing import imap_module_directory -from imap_processing.cdf.utils import write_cdf +from imap_processing.cdf.utils import load_cdf, write_cdf from imap_processing.swe.l0 import decom_swe from imap_processing.swe.l1a.swe_l1a import swe_l1a from imap_processing.swe.l1a.swe_science import swe_science @@ -132,7 +131,7 @@ def test_cdf_creation(): assert hk_l1a_filepath.name == "imap_swe_l1a_sci_20230927_v001.cdf" # reads data from CDF file and passes to l1b - l1a_cdf_dataset = cdf_to_xarray(hk_l1a_filepath, to_datetime=True) + l1a_cdf_dataset = load_cdf(hk_l1a_filepath, to_datetime=True) l1b_dataset = swe_l1b(l1a_cdf_dataset) hk_l1b_filepath = write_cdf(l1b_dataset) diff --git a/imap_processing/tests/ultra/unit/test_ultra_l1a.py b/imap_processing/tests/ultra/unit/test_ultra_l1a.py index 0c4be375e..8e638f93c 100644 --- a/imap_processing/tests/ultra/unit/test_ultra_l1a.py +++ b/imap_processing/tests/ultra/unit/test_ultra_l1a.py @@ -1,8 +1,8 @@ import dataclasses import pytest -from cdflib.xarray import cdf_to_xarray +from imap_processing.cdf.utils import load_cdf from imap_processing.ultra import ultra_cdf_attrs from imap_processing.ultra.l0.decom_ultra import decom_ultra_apids from imap_processing.ultra.l0.ultra_utils import ( @@ -185,7 +185,7 @@ def test_cdf_aux( assert test_data_path.name == "imap_ultra_l1a_sci_20220530_v001.cdf" dataset_aux = create_dataset({ULTRA_AUX.apid[0]: decom_ultra_aux}) - input_xarray_aux = cdf_to_xarray(test_data_path) + input_xarray_aux = load_cdf(test_data_path) assert input_xarray_aux.attrs.keys() == dataset_aux.attrs.keys() @@ -203,7 +203,7 @@ def test_cdf_rates( assert test_data_path.name == "imap_ultra_l1a_sci_20220530_v001.cdf" dataset_rates = create_dataset({ULTRA_RATES.apid[0]: decom_ultra_rates}) - input_xarray_rates = cdf_to_xarray(test_data_path) + input_xarray_rates = load_cdf(test_data_path) assert input_xarray_rates.attrs.keys() == dataset_rates.attrs.keys() @@ -219,7 +219,7 @@ def test_cdf_tof( assert test_data_path.name == "imap_ultra_l1a_sci_20240124_v001.cdf" dataset_tof = create_dataset({ULTRA_TOF.apid[0]: decom_ultra_tof}) - input_xarray_tof = cdf_to_xarray(test_data_path) + input_xarray_tof = load_cdf(test_data_path) assert input_xarray_tof.attrs.keys() == dataset_tof.attrs.keys() @@ -243,6 +243,6 @@ def test_cdf_events( dataset_events = create_dataset( {ULTRA_EVENTS.apid[0]: decom_ultra_events, ULTRA_AUX.apid[0]: decom_ultra_aux} ) - input_xarray_events = cdf_to_xarray(test_data_path) + input_xarray_events = load_cdf(test_data_path) assert input_xarray_events.attrs.keys() == dataset_events.attrs.keys()