IMAP-Science-Operations-Center · anamanica · Jul 1, 2024 · Jun 13, 2024 · Jun 13, 2024 · Jun 14, 2024
diff --git a/imap_processing/cdf/cdf_attribute_manager.py b/imap_processing/cdf/cdf_attribute_manager.py
@@ -6,6 +6,7 @@
 
 from __future__ import annotations
 
+import logging
 from pathlib import Path
 
 import yaml
@@ -75,10 +76,10 @@ def __init__(self, data_dir: Path):
         self.variable_attribute_schema = self._load_default_variable_attr_schema()
 
         # Load Default IMAP Global Attributes
-        self.global_attributes = CdfAttributeManager._load_yaml_data(
+        self._global_attributes = CdfAttributeManager._load_yaml_data(
             self.source_dir / DEFAULT_GLOBAL_CDF_ATTRS_FILE
         )
-        self.variable_attributes = dict()
+        self._variable_attributes = dict()
 
     def _load_default_global_attr_schema(self) -> dict:
         """
@@ -123,7 +124,7 @@ def load_global_attributes(self, file_path: str):
         file_path : str
             File path to load, under self.source_dir.
         """
-        self.global_attributes.update(
+        self._global_attributes.update(
             CdfAttributeManager._load_yaml_data(self.source_dir / file_path)
         )
 
@@ -145,7 +146,7 @@ def add_global_attribute(self, attribute_name: str, attribute_value: str) -> Non
         attribute_value : str
             The value of the attribute to add.
         """
-        self.global_attributes[attribute_name] = attribute_value
+        self._global_attributes[attribute_name] = attribute_value
 
     @staticmethod
     def _load_yaml_data(file_path: str | Path) -> dict:
@@ -191,15 +192,15 @@ def get_global_attributes(self, instrument_id: str | None = None) -> dict:
         """
         output = dict()
         for attr_name, attr_schema in self.global_attribute_schema.items():
-            if attr_name in self.global_attributes:
-                output[attr_name] = self.global_attributes[attr_name]
+            if attr_name in self._global_attributes:
+                output[attr_name] = self._global_attributes[attr_name]
             # Retrieve instrument specific global attributes from the variable file
             elif (
                 instrument_id is not None
-                and attr_name in self.global_attributes[instrument_id]
+                and attr_name in self._global_attributes[instrument_id]
             ):
-                output[attr_name] = self.global_attributes[instrument_id][attr_name]
-            elif attr_schema["required"] and attr_name not in self.global_attributes:
+                output[attr_name] = self._global_attributes[instrument_id][attr_name]
+            elif attr_schema["required"] and attr_name not in self._global_attributes:
                 # TODO throw an error
                 output[attr_name] = None
 
@@ -219,9 +220,9 @@ def load_variable_attributes(self, file_name: str) -> None:
         raw_var_attrs = CdfAttributeManager._load_yaml_data(self.source_dir / file_name)
         var_attrs = raw_var_attrs.copy()
 
-        self.variable_attributes.update(var_attrs)
+        self._variable_attributes.update(var_attrs)
 
-    def get_variable_attributes(self, variable_name: str) -> dict:
+    def get_variable_attributes(self, variable_name: str, check_schema=True) -> dict:
         """
         Get the attributes for a given variable name.
 
@@ -233,13 +234,81 @@ def get_variable_attributes(self, variable_name: str) -> dict:
         variable_name : str
             The name of the variable to retrieve attributes for.
 
+        check_schema : bool
+            Flag to bypass schema validation.
+
         Returns
         -------
         dict
-            I have no idea todo check.
+            Information containing specific variable attributes
+            associated with "variable_name".
         """
-        # TODO: Create a variable attribute schema file, validate here
-        if variable_name in self.variable_attributes:
-            return self.variable_attributes[variable_name]
-        # TODO: throw an error?
-        return {}
+        # Case to handle attributes not in schema
+        if check_schema is False:
+            if variable_name in self._variable_attributes:
+                return self._variable_attributes[variable_name]
+            # TODO: throw an error?
+            return {}
+
+        output = dict()
+        for attr_name in self.variable_attribute_schema["attribute_key"]:
+            # Standard case
+            if attr_name in self._variable_attributes[variable_name]:
+                output[attr_name] = self._variable_attributes[variable_name][attr_name]
+            # Case to handle DEPEND_i schema issues
+            elif attr_name == "DEPEND_i":
+                # DEFAULT_0 is not required, UNLESS we are dealing with
+                # variable_name = epoch
+                # Find all the attributes of variable_name that contain "DEPEND"
+                variable_depend_attrs = [
+                    key
+                    for key in self._variable_attributes[variable_name].keys()
+                    if "DEPEND" in key
+                ]
+                # Confirm that each DEPEND_i attribute is unique
+                if len(set(variable_depend_attrs)) != len(variable_depend_attrs):
+                    logging.warning(
+                        f"Found duplicate DEPEND_i attribute in variable "
+                        f"{variable_name}: {variable_depend_attrs}"
+                    )
+                for variable_depend_attr in variable_depend_attrs:
+                    output[variable_depend_attr] = self._variable_attributes[
+                        variable_name
+                    ][variable_depend_attr]
+                # TODO: Add more DEPEND_0 variable checks!
+            # Case to handle LABL_PTR_i schema issues
+            elif attr_name == "LABL_PTR_i":
+                # Find all the attributes of variable_name that contain "LABL_PTR"
+                variable_labl_attrs = [
+                    key
+                    for key in self._variable_attributes[variable_name].keys()
+                    if "LABL_PTR" in key
+                ]
+                for variable_labl_attr in variable_labl_attrs:
+                    output[variable_labl_attr] = self._variable_attributes[
+                        variable_name
+                    ][variable_labl_attr]
+            # Case to handle REPRESENTATION_i schema issues
+            elif attr_name == "REPRESENTATION_i":
+                # Find all the attributes of variable_name that contain
+                # "REPRESENTATION_i"
+                variable_rep_attrs = [
+                    key
+                    for key in self._variable_attributes[variable_name].keys()
+                    if "REPRESENTATION" in key
+                ]
+                for variable_rep_attr in variable_rep_attrs:
+                    output[variable_rep_attr] = self._variable_attributes[
+                        variable_name
+                    ][variable_rep_attr]
+            # Validating required schema
+            elif (
+                self.variable_attribute_schema["attribute_key"][attr_name]["required"]
+                and attr_name not in self._variable_attributes[variable_name]
+            ):
+                logging.warning(
+                    "Required schema '" + attr_name + "' attribute not present"
+                )
+                output[attr_name] = ""
+
+        return output
diff --git a/imap_processing/cdf/config/shared/default_variable_cdf_attrs_schema.yaml b/imap_processing/cdf/config/shared/default_variable_cdf_attrs_schema.yaml
@@ -120,7 +120,7 @@ attribute_key:
       Explicitly ties a data variable to the time variable on which it depends. All variables
       which change with time must have a DEPEND_0 attribute defined. See section 5.2.1
       which specifies the HERMES usage of DEPEND_0.
-    required: true
+    required: false
     overwrite: false
     valid_values: null
     alternate: null

diff --git a/imap_processing/cdf/tests/__init__.py b/imap_processing/cdf/tests/__init__.py
diff --git a/imap_processing/codice/codice_l1b.py b/imap_processing/codice/codice_l1b.py
@@ -61,7 +61,7 @@ def create_hskp_dataset(l1a_dataset, cdf_attrs) -> xr.Dataset:
         #      catdesc = value.short_description
         # I am holding off making this change until I acquire updated housekeeping
         # packets/validation data that match the latest telemetry definitions
-        attrs = cdf_attrs.variable_attributes["codice_support_attrs"]
+        attrs = cdf_attrs.get_variable_attributes("codice_support_attrs")
         attrs["CATDESC"] = "TBD"
         attrs["DEPEND_0"] = "epoch"
         attrs["FIELDNAM"] = "TBD"

diff --git a/imap_processing/codice/utils.py b/imap_processing/codice/utils.py
@@ -153,7 +153,7 @@ def create_hskp_dataset(packets, data_version: str) -> xr.Dataset:
     # packets/validation data that match the latest telemetry definitions
     # I may also be able to replace this function with utils.create_dataset(?)
     for key, value in metadata_arrays.items():
-        attrs = cdf_attrs.variable_attributes["codice_support_attrs"]
+        attrs = cdf_attrs.get_variable_attributes("codice_support_attrs")
         attrs["CATDESC"] = "TBD"
         attrs["DEPEND_0"] = "epoch"
         attrs["FIELDNAM"] = "TBD"

diff --git a/imap_processing/hi/l1a/science_direct_event.py b/imap_processing/hi/l1a/science_direct_event.py
@@ -284,7 +284,7 @@ def create_dataset(de_data_list: list, packet_met_time: list) -> xr.Dataset:
     )
 
     for var_name, data in data_dict.items():
-        attrs = cdf_manager.get_variable_attributes(f"hi_de_{var_name}").copy()
+        attrs = cdf_manager.get_variable_attributes(f"hi_de_{var_name}", False).copy()
         dtype = attrs.pop("dtype")
         dataset[var_name] = xr.DataArray(
             np.array(data, dtype=np.dtype(dtype)),

diff --git a/imap_processing/hi/l1b/hi_l1b.py b/imap_processing/hi/l1b/hi_l1b.py
@@ -106,7 +106,9 @@ def annotate_direct_events(l1a_dataset):
         "quality_flag",
         "nominal_bin",
     ]:
-        attrs = CDF_MANAGER.get_variable_attributes(f"hi_de_{var}").copy()
+        attrs = CDF_MANAGER.get_variable_attributes(
+            f"hi_de_{var}", check_schema=False
+        ).copy()
         dtype = attrs.pop("dtype")
         if attrs["FILLVAL"] == "NaN":
             attrs["FILLVAL"] = np.nan

diff --git a/imap_processing/hi/l1c/hi_l1c.py b/imap_processing/hi/l1c/hi_l1c.py
@@ -100,15 +100,19 @@ def allocate_pset_dataset(n_esa_steps: int, sensor_str: str):
     # preallocate coordinates xr.DataArrays
     coords = dict()
     # epoch coordinate has only 1 entry for pointing set
-    attrs = cdf_manager.get_variable_attributes("hi_pset_epoch").copy()
+    attrs = cdf_manager.get_variable_attributes(
+        "hi_pset_epoch", check_schema=False
+    ).copy()
     dtype = attrs.pop("dtype")
     coords["epoch"] = xr.DataArray(
         np.empty(1, dtype=dtype),
         name="epoch",
         dims=["epoch"],
         attrs=attrs,
     )
-    attrs = cdf_manager.get_variable_attributes("hi_pset_esa_step").copy()
+    attrs = cdf_manager.get_variable_attributes(
+        "hi_pset_esa_step", check_schema=False
+    ).copy()
     dtype = attrs.pop("dtype")
     coords["esa_step"] = xr.DataArray(
         np.full(n_esa_steps, attrs["FILLVAL"], dtype=dtype),
@@ -117,7 +121,9 @@ def allocate_pset_dataset(n_esa_steps: int, sensor_str: str):
         attrs=attrs,
     )
     # spin angle bins are 0.1 degree bins for full 360 degree spin
-    attrs = cdf_manager.get_variable_attributes("hi_pset_spin_angle_bin").copy()
+    attrs = cdf_manager.get_variable_attributes(
+        "hi_pset_spin_angle_bin", check_schema=False
+    ).copy()
     dtype = attrs.pop("dtype")
     coords["spin_angle_bin"] = xr.DataArray(
         np.arange(int(360 / 0.1), dtype=dtype),
@@ -142,7 +148,9 @@ def allocate_pset_dataset(n_esa_steps: int, sensor_str: str):
     ]:
         data_vars[var_name] = full_dataarray(
             var_name,
-            cdf_manager.get_variable_attributes(f"hi_pset_{var_name}"),
+            cdf_manager.get_variable_attributes(
+                f"hi_pset_{var_name}", check_schema=False
+            ),
             coords,
             shape=var_shapes.get(var_name, None),
         )
@@ -152,19 +160,25 @@ def allocate_pset_dataset(n_esa_steps: int, sensor_str: str):
         coords["esa_step"].values.astype(str),
         name="esa_step_label",
         dims=["esa_step"],
-        attrs=cdf_manager.get_variable_attributes("hi_pset_esa_step_label"),
+        attrs=cdf_manager.get_variable_attributes(
+            "hi_pset_esa_step_label", check_schema=False
+        ),
     )
     data_vars["spin_bin_label"] = xr.DataArray(
         coords["spin_angle_bin"].values.astype(str),
         name="spin_bin_label",
         dims=["spin_angle_bin"],
-        attrs=cdf_manager.get_variable_attributes("hi_pset_spin_bin_label"),
+        attrs=cdf_manager.get_variable_attributes(
+            "hi_pset_spin_bin_label", check_schema=False
+        ),
     )
     data_vars["label_vector_HAE"] = xr.DataArray(
         np.array(["x HAE", "y HAE", "z HAE"], dtype=str),
         name="label_vector_HAE",
         dims=[" "],
-        attrs=cdf_manager.get_variable_attributes("hi_pset_label_vector_HAE"),
+        attrs=cdf_manager.get_variable_attributes(
+            "hi_pset_label_vector_HAE", check_schema=False
+        ),
     )
 
     pset_global_attrs = cdf_manager.get_global_attributes(

diff --git a/.../tests/imap_default_global_cdf_attrs.yaml → ...ts/cdf/imap_default_global_cdf_attrs.yaml b/.../tests/imap_default_global_cdf_attrs.yaml → ...ts/cdf/imap_default_global_cdf_attrs.yaml
diff --git a/...ared/default_global_cdf_attrs_schema.yaml → ...ared/default_global_cdf_attrs_schema.yaml b/...ared/default_global_cdf_attrs_schema.yaml → ...ared/default_global_cdf_attrs_schema.yaml
diff --git a/...ed/default_variable_cdf_attrs_schema.yaml → ...ed/default_variable_cdf_attrs_schema.yaml b/...ed/default_variable_cdf_attrs_schema.yaml → ...ed/default_variable_cdf_attrs_schema.yaml
@@ -120,7 +120,7 @@ attribute_key:
       Explicitly ties a data variable to the time variable on which it depends. All variables
       which change with time must have a DEPEND_0 attribute defined. See section 5.2.1
       which specifies the HERMES usage of DEPEND_0.
-    required: true
+    required: false
     overwrite: false
     valid_values: null
     alternate: null