Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
* Updating CLI to provide more flexible API for generating files, updating CDF generation to move file name creation into imap-data-access
  • Loading branch information
maxinelasp authored Feb 23, 2024
1 parent 96e87c5 commit c8c3a39
Show file tree
Hide file tree
Showing 13 changed files with 594 additions and 499 deletions.
3 changes: 2 additions & 1 deletion imap_processing/cdf/global_attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ class AttrBase:
scale_type : str, default="linear"
The scale of the axis, "SCALETYP" attribute
label_axis : str, default=None
Axis label, "LABLAXIS" attribute
Axis label, "LABLAXIS" attribute. Required. Should be close to 6 letters.
format : str, default=None
The format of the data, in Fortran format
units : str, default=None
Expand Down Expand Up @@ -259,6 +259,7 @@ class ScienceAttrs(AttrBase):
depend_0 : str = None
The first degree of dependent coordinate variables.
Although this is an optional keyword, it is required for every instance.
This should be the "Epoch" dimension, and should be type datetime64.
depend_1 : str = None, optional
The second degree of dependent coordinate variables. This is used for 2d data.
depend_2 : str = None, optional
Expand Down
74 changes: 11 additions & 63 deletions imap_processing/cdf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@

import logging
from pathlib import Path
from typing import Optional

import imap_data_access
import numpy as np
import xarray as xr
from cdflib.xarray import xarray_to_cdf
Expand All @@ -26,7 +24,7 @@ def calc_start_time(shcoarse_time: int):
np.datetime64
The time of the event
TODO
TODO - move this into imap-data-access? How should it be used?
-----
This conversion is temporary for now, and will need SPICE in the future.
Nick Dutton mentioned that s/c clock start epoch is
Expand All @@ -38,15 +36,11 @@ def calc_start_time(shcoarse_time: int):
return launch_time + np.timedelta64(shcoarse_time, "s")


def write_cdf(
data: xr.Dataset,
descriptor: str,
directory: Optional[Path] = None,
):
def write_cdf(data: xr.Dataset, filepath: Path):
"""Write the contents of "data" to a CDF file using cdflib.xarray_to_cdf.
This function determines the file name to use from the global attributes,
fills in the the final attributes, and converts the whole dataset to a CDF.
fills in the final attributes, and converts the whole dataset to a CDF.
The date in the file name is determined by the time of the first Epoch in the
xarray Dataset. The first 3 file name fields (mission, instrument, level) are
determined by the "Logical_source" attribute. The version is determiend from
Expand All @@ -56,74 +50,28 @@ def write_cdf(
----------
data : xarray.Dataset
The dataset object to convert to a CDF
descriptor : str
The descriptor to insert into the file name after the
orbit, before the SPICE field. No underscores allowed.
directory : pathlib.Path, optional
The directory to write the file to. The default is obtained
from the global imap_data_access.config["DATA_DIR"].
filepath: Path
The output path, including filename, to write the CDF to.
Returns
-------
pathlib.Path
Path to the file created
"""
# Determine the start date of the data in the file,
# based on the time of the first dust impact
file_start_date = None
if "idex" in data.attrs["Logical_source"]:
file_start_date = data["Epoch"][0].data
else:
start_time = data["Epoch"].data[0]
file_start_date = calc_start_time(start_time)
if file_start_date is None:
raise ValueError(
"Unable to determine file start date. Check Logical_source value"
)

date_string = np.datetime_as_string(file_start_date, unit="D").replace("-", "")

# Determine the file name based on the attributes in the xarray
# Set file name based on this convention:
# imap_<instrument>_<datalevel>_<descriptor>_<startdate>_<enddate>_<version>.cdf
# data.attrs["Logical_source"] has the mission, instrument, and level
# like this:
# imap_idex_l1
# TODO: add logics for adding endate
filename = (
f"{data.attrs['Logical_source']}"
f"_{descriptor}"
f"_{date_string}"
f"_{date_string}"
f"_v{data.attrs['Data_version']}.cdf"
)

if directory is None:
# Storage directory
# mission/instrument/data_level/year/month/filename
# /<directory | DATA_DIR>/<instrument>/<data_level>/<year>/<month>
_, instrument, data_level = data.attrs["Logical_source"].split("_")
directory = imap_data_access.config["DATA_DIR"] / instrument / data_level
directory /= date_string[:4]
directory /= date_string[4:6]
filename_and_path = Path(directory)
if not filename_and_path.exists():
logging.info(
"The directory does not exist, creating directory %s", filename_and_path
)
filename_and_path.mkdir(parents=True)
filename_and_path /= filename
if not filepath.parent.exists():
logging.info("The directory does not exist, creating directory %s", filepath)
filepath.parent.mkdir(parents=True)

# Insert the final attribute:
# The Logical_file_id is always the name of the file without the extension
data.attrs["Logical_file_id"] = filename.split(".")[0]
data.attrs["Logical_file_id"] = filepath.stem

# Convert the xarray object to a CDF
xarray_to_cdf(
data,
filename_and_path,
str(filepath),
datetime64_to_cdftt2000=True,
terminate_on_warning=True,
) # Terminate if not ISTP compliant

return filename_and_path
return filepath
Loading

0 comments on commit c8c3a39

Please sign in to comment.