diff --git a/imap_processing/cdf/utils.py b/imap_processing/cdf/utils.py index c854c71b6..212414db6 100644 --- a/imap_processing/cdf/utils.py +++ b/imap_processing/cdf/utils.py @@ -1,11 +1,15 @@ """Various utility functions to support creation of CDF files.""" -import os +import logging +from pathlib import Path +from typing import Optional import numpy as np import xarray as xr from cdflib.xarray import xarray_to_cdf +import imap_processing + def calc_start_time(shcoarse_time: int): """Calculate the datetime64 from the CCSDS secondary header information. @@ -36,7 +40,10 @@ def calc_start_time(shcoarse_time: int): def write_cdf( - data: xr.Dataset, description: str = "", mode: str = "", directory: str = "" + data: xr.Dataset, + description: str = "", + mode: str = "", + directory: Optional[Path] = None, ): """Write the contents of "data" to a CDF file using cdflib.xarray_to_cdf. @@ -57,8 +64,8 @@ def write_cdf( Returns ------- - str - The name of the file created + Path + Path to the file created """ # Determine the start date of the data in the file, # based on the time of the first dust impact @@ -98,7 +105,21 @@ def write_cdf( + date_string + f"_v{data.attrs['Data_version']}.cdf" ) - filename_and_path = os.path.join(directory, filename) + + if directory is None: + # Storage directory + # mission/instrument/data_level/year/month/filename + # ///// + _, instrument, data_level = data.attrs["Logical_source"].split("_") + directory = imap_processing.DATA_DIR / instrument / data_level + directory /= date_string[:4] / date_string[4:6] + filename_and_path = Path(directory) + if not filename_and_path.exists(): + logging.info( + "The directory does not exist, creating directory %s", filename_and_path + ) + filename_and_path.mkdir(parents=True) + filename_and_path /= filename # Insert the final attribute: # The Logical_file_id is always the name of the file without the extension diff --git a/imap_processing/idex/tests/test_l1_cdfs.py b/imap_processing/idex/tests/test_l1_cdfs.py index afe9f213f..dc61620f2 100644 --- a/imap_processing/idex/tests/test_l1_cdfs.py +++ b/imap_processing/idex/tests/test_l1_cdfs.py @@ -1,6 +1,3 @@ -import os -from pathlib import Path - import numpy as np import pytest import xarray as xr @@ -26,11 +23,11 @@ def test_idex_cdf_file(decom_test_data, temp_path): # Verify that a CDF file can be created with no errors thrown by xarray_to_cdf file_name = write_cdf(decom_test_data.data, description="", directory=temp_path) date_to_test = "20250724" - assert file_name == os.path.join( - temp_path, - f"{decom_test_data.data.attrs['Logical_source']}_{date_to_test}_v{idex.__version__}.cdf", + assert file_name.name == ( + f"{decom_test_data.data.attrs['Logical_source']}_" + f"{date_to_test}_v{idex.__version__}.cdf" ) - assert Path(file_name).exists() + assert file_name.exists() def test_bad_cdf_attributes(decom_test_data, temp_path): @@ -75,11 +72,11 @@ def test_descriptor_in_file_name(decom_test_data, temp_path): decom_test_data.data, description="impact-lab-test001", directory=temp_path ) date_to_test = "20250724" - assert file_name == os.path.join( - temp_path, - f"{decom_test_data.data.attrs['Logical_source']}_impact-lab-test001_{date_to_test}_v{idex.__version__}.cdf", + assert file_name.name == ( + f"{decom_test_data.data.attrs['Logical_source']}_" + f"impact-lab-test001_{date_to_test}_v{idex.__version__}.cdf" ) - assert Path(file_name).exists() + assert file_name.exists() def test_idex_tof_high_data_from_cdf(decom_test_data, temp_path): diff --git a/imap_processing/swe/l1a/swe_l1a.py b/imap_processing/swe/l1a/swe_l1a.py index 70c1a0cae..467f03714 100644 --- a/imap_processing/swe/l1a/swe_l1a.py +++ b/imap_processing/swe/l1a/swe_l1a.py @@ -28,8 +28,8 @@ def swe_l1a(packets, cdf_filepath): Returns ------- - str - Path name of where CDF file was created. + pathlib.Path + Path to where the CDF file was created. This is used to upload file from local to s3. TODO: test this later. """ diff --git a/imap_processing/swe/l1b/swe_l1b.py b/imap_processing/swe/l1b/swe_l1b.py index 118eccdb9..5a01bac6e 100644 --- a/imap_processing/swe/l1b/swe_l1b.py +++ b/imap_processing/swe/l1b/swe_l1b.py @@ -22,7 +22,7 @@ def swe_l1b(l1a_dataset: xr.Dataset, cdf_filepath: str): Returns ------- - str + pathlib.Path Path to the L1B file. Raises diff --git a/imap_processing/swe/tests/test_swe_l1a.py b/imap_processing/swe/tests/test_swe_l1a.py index 85c2c7e1d..a270bdfe5 100644 --- a/imap_processing/swe/tests/test_swe_l1a.py +++ b/imap_processing/swe/tests/test_swe_l1a.py @@ -1,5 +1,3 @@ -import os - import pytest from imap_processing import imap_module_directory @@ -53,5 +51,5 @@ def test_cdf_creation(decom_test_data, tmp_path): grouped_data = group_by_apid(decom_test_data) sci_cdf_filepath = swe_l1a(grouped_data[SWEAPID.SWE_SCIENCE], cdf_filepath) hk_cdf_filepath = swe_l1a(grouped_data[SWEAPID.SWE_APP_HK], cdf_filepath) - assert os.path.basename(sci_cdf_filepath) == "imap_swe_l1a_sci_20230927_v01.cdf" - assert os.path.basename(hk_cdf_filepath) == "imap_swe_l1a_lveng_hk_20230927_v01.cdf" + assert sci_cdf_filepath.name == "imap_swe_l1a_sci_20230927_v01.cdf" + assert hk_cdf_filepath.name == "imap_swe_l1a_lveng_hk_20230927_v01.cdf" diff --git a/imap_processing/swe/tests/test_swe_l1b.py b/imap_processing/swe/tests/test_swe_l1b.py index fcf9855bf..99d169fa8 100644 --- a/imap_processing/swe/tests/test_swe_l1b.py +++ b/imap_processing/swe/tests/test_swe_l1b.py @@ -1,5 +1,3 @@ -from pathlib import Path - import pandas as pd import pytest from cdflib.xarray import cdf_to_xarray @@ -154,5 +152,5 @@ def test_cdf_creation(decom_test_data, l1a_test_data, tmp_path): l1a_dataset = cdf_to_xarray(hk_l1a_filepath) hk_l1b_filepath = swe_l1b(l1a_dataset, cdf_filepath) - assert Path(hk_l1b_filepath).name == "imap_swe_l1b_lveng_hk_20230927_v01.cdf" - assert Path(sci_l1b_filepath).name == "imap_swe_l1b_sci_20230927_v01.cdf" + assert hk_l1b_filepath.name == "imap_swe_l1b_lveng_hk_20230927_v01.cdf" + assert sci_l1b_filepath.name == "imap_swe_l1b_sci_20230927_v01.cdf" diff --git a/pyproject.toml b/pyproject.toml index 065426149..156ced512 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ select = ["B", "D", "E", "F", "I", "N", "S", "W", "PL", "PT", "UP", "RUF"] ignore = ["D104", "PLR2004", "S101"] [tool.ruff.per-file-ignores] -# S603: unchecked input in subprocess call is fine in our tests +# S603 unchecked input in subprocess call is fine in our tests "*/tests/*" = ["D", "S603"] "tools/xtce*" = ["D"] # TODO: Too many statements, this could be refactored and removed