Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CDF Updates #660

Merged
merged 65 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from 64 commits
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
272ba6f
Writing initial TODO comments.
anamanica Jun 13, 2024
537b341
Troubleshooting
anamanica Jun 13, 2024
526d0d4
Schema tests passed
anamanica Jun 14, 2024
2407f18
I think I understand instrument_id
anamanica Jun 14, 2024
32c568d
Fixing test file
anamanica Jun 17, 2024
3f19842
Var attribute tests
anamanica Jun 17, 2024
42face3
First draft
anamanica Jun 17, 2024
e08ea48
Merge branch 'IMAP-Science-Operations-Center:dev' into cdf-attribute-…
anamanica Jun 17, 2024
649d2da
Deleting file
anamanica Jun 17, 2024
d0c3be2
Test
anamanica Jun 17, 2024
2033c8b
Fixing pulled errors
anamanica Jun 17, 2024
ac9d8d9
Merge branch 'IMAP-Science-Operations-Center:dev' into cdf-attribute-…
anamanica Jun 18, 2024
c1f60ff
Quick
anamanica Jun 17, 2024
308d09a
Trying different things to pass pre-checks
anamanica Jun 18, 2024
b964266
Testing
anamanica Jun 18, 2024
191c234
TEST
anamanica Jun 18, 2024
a023663
Fixing failed pre-check
anamanica Jun 18, 2024
4719b28
Test2
anamanica Jun 18, 2024
7fcb53e
Fixing pre-commit issues
anamanica Jun 18, 2024
97db92b
Fixing prechecks
anamanica Jun 18, 2024
5e6c8e5
Removing test statements
anamanica Jun 18, 2024
c1b6a55
Moving files
anamanica Jun 18, 2024
706e981
Fixing PR draft comments
anamanica Jun 18, 2024
a8ec74c
Adding additional tests
anamanica Jun 18, 2024
8b99182
Breaking up test functions
anamanica Jun 18, 2024
f2f45c1
Adding more tests
anamanica Jun 18, 2024
46eb203
Getting closer to final draft
anamanica Jun 18, 2024
7032161
Finishing touches
anamanica Jun 18, 2024
8fa0535
Merge branch 'IMAP-Science-Operations-Center:dev' into cdf-attribute-…
anamanica Jun 19, 2024
7906ed6
Codcov check
anamanica Jun 20, 2024
d2fea9a
Codcov check
anamanica Jun 20, 2024
4620de5
Else Testing
anamanica Jun 20, 2024
7af1e5a
Hopefully done
anamanica Jun 20, 2024
b5baf40
Fixing PR comments
anamanica Jun 20, 2024
20d5ec2
More work done
anamanica Jun 21, 2024
bf05f47
Cleaning up
anamanica Jun 21, 2024
12f371c
Figured out Path issue
anamanica Jun 21, 2024
456c465
Final touches (again)
anamanica Jun 21, 2024
cfcd446
Merge branch 'IMAP-Science-Operations-Center:dev' into cdf-attribute-…
anamanica Jun 21, 2024
16dbbe2
Comment mods
anamanica Jun 24, 2024
04b78d9
Pre-commit check
anamanica Jun 24, 2024
9b0a472
Pre-commit numpydoc
anamanica Jun 24, 2024
229f75c
Pre
anamanica Jun 24, 2024
951d0dc
Final Final Commit
anamanica Jun 24, 2024
39bcb67
Finished get_variable_attributes
anamanica Jun 25, 2024
fa7f91c
Changed global_attributes, and variable_attributes to private methods.
anamanica Jun 25, 2024
cddd552
Fixing usage of .variable_attributes, and .global_attributes
anamanica Jun 25, 2024
cdb2d27
Fixing pre-commits
anamanica Jun 25, 2024
9d9cada
Reverting changes
anamanica Jun 25, 2024
7a3ac5e
Looking for failing tests in pre-commits
anamanica Jun 25, 2024
7fd4225
Merge branch 'dev' of github.com:IMAP-Science-Operations-Center/imap_…
anamanica Jun 25, 2024
0a9395a
Push before Pull
anamanica Jun 25, 2024
0297af0
Resolving conflicts
anamanica Jun 25, 2024
7c6320a
Test change
anamanica Jun 25, 2024
8138cfb
Fixing test errors
anamanica Jun 25, 2024
e5e5b05
Updating code, and marking test as fail.
anamanica Jun 26, 2024
d904c81
Stupid solution
anamanica Jun 26, 2024
1d4c487
Added DEPEND_i case
anamanica Jun 26, 2024
f288f6a
Added check_schema tag so hi tests pass
anamanica Jun 26, 2024
0099a81
Excluding DEPEND_0 as required just to get tests to pass.
anamanica Jun 26, 2024
87c64a3
Cleaning up my code
anamanica Jun 27, 2024
69dff37
Changing file locations
anamanica Jun 27, 2024
4875d20
Cleaning up code, changing attriburte schema required, adding _i cases.
anamanica Jun 27, 2024
a02fa7e
Merge branch 'dev' of github.com:IMAP-Science-Operations-Center/imap_…
anamanica Jun 27, 2024
b32f77b
PR comment changes
anamanica Jun 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 86 additions & 17 deletions imap_processing/cdf/cdf_attribute_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from __future__ import annotations

import logging
from pathlib import Path

import yaml
Expand Down Expand Up @@ -75,10 +76,10 @@ def __init__(self, data_dir: Path):
self.variable_attribute_schema = self._load_default_variable_attr_schema()

# Load Default IMAP Global Attributes
self.global_attributes = CdfAttributeManager._load_yaml_data(
self._global_attributes = CdfAttributeManager._load_yaml_data(
self.source_dir / DEFAULT_GLOBAL_CDF_ATTRS_FILE
)
self.variable_attributes = dict()
self._variable_attributes = dict()

def _load_default_global_attr_schema(self) -> dict:
"""
Expand Down Expand Up @@ -123,7 +124,7 @@ def load_global_attributes(self, file_path: str):
file_path : str
File path to load, under self.source_dir.
"""
self.global_attributes.update(
self._global_attributes.update(
CdfAttributeManager._load_yaml_data(self.source_dir / file_path)
)

Expand All @@ -145,7 +146,7 @@ def add_global_attribute(self, attribute_name: str, attribute_value: str) -> Non
attribute_value : str
The value of the attribute to add.
"""
self.global_attributes[attribute_name] = attribute_value
self._global_attributes[attribute_name] = attribute_value

@staticmethod
def _load_yaml_data(file_path: str | Path) -> dict:
Expand Down Expand Up @@ -191,15 +192,15 @@ def get_global_attributes(self, instrument_id: str | None = None) -> dict:
"""
output = dict()
for attr_name, attr_schema in self.global_attribute_schema.items():
if attr_name in self.global_attributes:
output[attr_name] = self.global_attributes[attr_name]
if attr_name in self._global_attributes:
output[attr_name] = self._global_attributes[attr_name]
# Retrieve instrument specific global attributes from the variable file
elif (
instrument_id is not None
and attr_name in self.global_attributes[instrument_id]
and attr_name in self._global_attributes[instrument_id]
):
output[attr_name] = self.global_attributes[instrument_id][attr_name]
elif attr_schema["required"] and attr_name not in self.global_attributes:
output[attr_name] = self._global_attributes[instrument_id][attr_name]
elif attr_schema["required"] and attr_name not in self._global_attributes:
# TODO throw an error
output[attr_name] = None

Expand All @@ -219,9 +220,9 @@ def load_variable_attributes(self, file_name: str) -> None:
raw_var_attrs = CdfAttributeManager._load_yaml_data(self.source_dir / file_name)
var_attrs = raw_var_attrs.copy()

self.variable_attributes.update(var_attrs)
self._variable_attributes.update(var_attrs)

def get_variable_attributes(self, variable_name: str) -> dict:
def get_variable_attributes(self, variable_name: str, check_schema=True) -> dict:
"""
Get the attributes for a given variable name.

Expand All @@ -233,13 +234,81 @@ def get_variable_attributes(self, variable_name: str) -> dict:
variable_name : str
The name of the variable to retrieve attributes for.

check_schema : bool
Flag to bypass schema validation.

Returns
-------
dict
I have no idea todo check.
Information containing specific variable attributes
associated with "variable_name".
"""
# TODO: Create a variable attribute schema file, validate here
if variable_name in self.variable_attributes:
return self.variable_attributes[variable_name]
# TODO: throw an error?
return {}
# Case to handle attributes not in schema
if check_schema is False:
if variable_name in self._variable_attributes:
return self._variable_attributes[variable_name]
# TODO: throw an error?
return {}

output = dict()
for attr_name in self.variable_attribute_schema["attribute_key"]:
# Standard case
if attr_name in self._variable_attributes[variable_name]:
output[attr_name] = self._variable_attributes[variable_name][attr_name]
# Case to handle DEPEND_i schema issues
elif attr_name == "DEPEND_i":
# DEFAULT_0 is not required, UNLESS we are dealing with
# variable_name = epoch
# Find all the attributes of variable_name that contain "DEPEND"
variable_depend_attrs = [
key
for key in self._variable_attributes[variable_name].keys()
if "DEPEND" in key
]
# Confirm that each DEPEND_i attribute is unique
if len(set(variable_depend_attrs)) != len(variable_depend_attrs):
logging.warning(
f"Found duplicate DEPEND_i attribute in variable "
f"{variable_name}: {variable_depend_attrs}"
)
for variable_depend_attr in variable_depend_attrs:
output[variable_depend_attr] = self._variable_attributes[
variable_name
][variable_depend_attr]
# TODO: Add more DEPEND_0 variable checks!
# Case to handle LABL_PTR_i schema issues
elif attr_name == "LABL_PTR_i":
# Find all the attributes of variable_name that contain "LABL_PTR"
variable_labl_attrs = [
key
for key in self._variable_attributes[variable_name].keys()
if "LABL_PTR" in key
]
for variable_labl_attr in variable_labl_attrs:
output[variable_labl_attr] = self._variable_attributes[
variable_name
][variable_labl_attr]
# Case to handle REPRESENTATION_i schema issues
elif attr_name == "REPRESENTATION_i":
# Find all the attributes of variable_name that contain
# "REPRESENTATION_i"
variable_rep_attrs = [
key
for key in self._variable_attributes[variable_name].keys()
if "REPRESENTATION" in key
]
for variable_rep_attr in variable_rep_attrs:
output[variable_rep_attr] = self._variable_attributes[
variable_name
][variable_rep_attr]
# Validating required schema
elif (
self.variable_attribute_schema["attribute_key"][attr_name]["required"]
and attr_name not in self._variable_attributes[variable_name]
):
logging.warning(
"Required schema '" + attr_name + "' attribute not present"
)
output[attr_name] = ""

return output
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ attribute_key:
Explicitly ties a data variable to the time variable on which it depends. All variables
which change with time must have a DEPEND_0 attribute defined. See section 5.2.1
which specifies the HERMES usage of DEPEND_0.
required: true
required: false
overwrite: false
valid_values: null
alternate: null
Expand Down
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commenting on the __init__.py file but this is relevant to all files in this directory...

All .py files in this directory should be moved to imap_processing/tests/cdf/
All .yaml files should be moved under the parent directory imap_processing/tests/cdf/test_data

Basically, we want the directory structure under imap_processing/tests to mirror the directory structure of under imap_processing with the exception of test data.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sweet! I had to leave one .yaml file under /tests/cdf due to the way the code is written in __init__, but I will be sure to ask Maxine about that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason we left the tests where they were, is because this code is going to be moved to another repo very soon and hopefully removed from here. So, I wanted it to be isolated from everything else.

but, in general, all tests should go under imap_processing/tests. So I don't feel strongly about this, but I'd slightly prefer to keep it all together to make it easier to move and feel confident I got all of it.

Empty file.
2 changes: 1 addition & 1 deletion imap_processing/codice/codice_l1b.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def create_hskp_dataset(l1a_dataset, cdf_attrs) -> xr.Dataset:
# catdesc = value.short_description
# I am holding off making this change until I acquire updated housekeeping
# packets/validation data that match the latest telemetry definitions
attrs = cdf_attrs.variable_attributes["codice_support_attrs"]
attrs = cdf_attrs.get_variable_attributes("codice_support_attrs")
attrs["CATDESC"] = "TBD"
attrs["DEPEND_0"] = "epoch"
attrs["FIELDNAM"] = "TBD"
Expand Down
2 changes: 1 addition & 1 deletion imap_processing/codice/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def create_hskp_dataset(packets, data_version: str) -> xr.Dataset:
# packets/validation data that match the latest telemetry definitions
# I may also be able to replace this function with utils.create_dataset(?)
for key, value in metadata_arrays.items():
attrs = cdf_attrs.variable_attributes["codice_support_attrs"]
attrs = cdf_attrs.get_variable_attributes("codice_support_attrs")
attrs["CATDESC"] = "TBD"
attrs["DEPEND_0"] = "epoch"
attrs["FIELDNAM"] = "TBD"
Expand Down
2 changes: 1 addition & 1 deletion imap_processing/hi/l1a/science_direct_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def create_dataset(de_data_list: list, packet_met_time: list) -> xr.Dataset:
)

for var_name, data in data_dict.items():
attrs = cdf_manager.get_variable_attributes(f"hi_de_{var_name}").copy()
attrs = cdf_manager.get_variable_attributes(f"hi_de_{var_name}", False).copy()
dtype = attrs.pop("dtype")
dataset[var_name] = xr.DataArray(
np.array(data, dtype=np.dtype(dtype)),
Expand Down
4 changes: 3 additions & 1 deletion imap_processing/hi/l1b/hi_l1b.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,9 @@ def annotate_direct_events(l1a_dataset):
"quality_flag",
"nominal_bin",
]:
attrs = CDF_MANAGER.get_variable_attributes(f"hi_de_{var}").copy()
attrs = CDF_MANAGER.get_variable_attributes(
f"hi_de_{var}", check_schema=False
).copy()
dtype = attrs.pop("dtype")
if attrs["FILLVAL"] == "NaN":
attrs["FILLVAL"] = np.nan
Expand Down
28 changes: 21 additions & 7 deletions imap_processing/hi/l1c/hi_l1c.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,15 +100,19 @@ def allocate_pset_dataset(n_esa_steps: int, sensor_str: str):
# preallocate coordinates xr.DataArrays
coords = dict()
# epoch coordinate has only 1 entry for pointing set
attrs = cdf_manager.get_variable_attributes("hi_pset_epoch").copy()
attrs = cdf_manager.get_variable_attributes(
"hi_pset_epoch", check_schema=False
).copy()
dtype = attrs.pop("dtype")
coords["epoch"] = xr.DataArray(
np.empty(1, dtype=dtype),
name="epoch",
dims=["epoch"],
attrs=attrs,
)
attrs = cdf_manager.get_variable_attributes("hi_pset_esa_step").copy()
attrs = cdf_manager.get_variable_attributes(
"hi_pset_esa_step", check_schema=False
).copy()
dtype = attrs.pop("dtype")
coords["esa_step"] = xr.DataArray(
np.full(n_esa_steps, attrs["FILLVAL"], dtype=dtype),
Expand All @@ -117,7 +121,9 @@ def allocate_pset_dataset(n_esa_steps: int, sensor_str: str):
attrs=attrs,
)
# spin angle bins are 0.1 degree bins for full 360 degree spin
attrs = cdf_manager.get_variable_attributes("hi_pset_spin_angle_bin").copy()
attrs = cdf_manager.get_variable_attributes(
"hi_pset_spin_angle_bin", check_schema=False
).copy()
dtype = attrs.pop("dtype")
coords["spin_angle_bin"] = xr.DataArray(
np.arange(int(360 / 0.1), dtype=dtype),
Expand All @@ -142,7 +148,9 @@ def allocate_pset_dataset(n_esa_steps: int, sensor_str: str):
]:
data_vars[var_name] = full_dataarray(
var_name,
cdf_manager.get_variable_attributes(f"hi_pset_{var_name}"),
cdf_manager.get_variable_attributes(
f"hi_pset_{var_name}", check_schema=False
),
coords,
shape=var_shapes.get(var_name, None),
)
Expand All @@ -152,19 +160,25 @@ def allocate_pset_dataset(n_esa_steps: int, sensor_str: str):
coords["esa_step"].values.astype(str),
name="esa_step_label",
dims=["esa_step"],
attrs=cdf_manager.get_variable_attributes("hi_pset_esa_step_label"),
attrs=cdf_manager.get_variable_attributes(
"hi_pset_esa_step_label", check_schema=False
),
)
data_vars["spin_bin_label"] = xr.DataArray(
coords["spin_angle_bin"].values.astype(str),
name="spin_bin_label",
dims=["spin_angle_bin"],
attrs=cdf_manager.get_variable_attributes("hi_pset_spin_bin_label"),
attrs=cdf_manager.get_variable_attributes(
"hi_pset_spin_bin_label", check_schema=False
),
)
data_vars["label_vector_HAE"] = xr.DataArray(
np.array(["x HAE", "y HAE", "z HAE"], dtype=str),
name="label_vector_HAE",
dims=[" "],
attrs=cdf_manager.get_variable_attributes("hi_pset_label_vector_HAE"),
attrs=cdf_manager.get_variable_attributes(
"hi_pset_label_vector_HAE", check_schema=False
),
)

pset_global_attrs = cdf_manager.get_global_attributes(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ attribute_key:
Explicitly ties a data variable to the time variable on which it depends. All variables
which change with time must have a DEPEND_0 attribute defined. See section 5.2.1
which specifies the HERMES usage of DEPEND_0.
required: true
required: false
overwrite: false
valid_values: null
alternate: null
Expand Down
Loading
Loading