Feature/empty dataset (#283)

* update get time functions * update to fix pylint flake8 * update project dependencies * fix tests * update libraries for snyk * poetry update * update pytoml * update test retry time * add harmony deployment into github actions * update harmony deployment script * update pytoml version to develop * update tests delays * fix harmony deploy url * removed debug print statement * update copy of empty data, and check bounds for empty datasets * update tests for empty datasets * update to fix temporal subsetting * fix ghrsst dataset temporal subsetting * add try except for getting long name * optimize mask and scale code * poetry update * update changelog
podaac · Jul 29, 2024 · f418d5a · f418d5a
1 parent 3956b9c
commit f418d5a
Show file tree

Hide file tree

Showing 5 changed files with 16 additions and 7 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [issue/267](https://github.com/podaac/l2ss-py/pull/261): Add xtrack and atrack dimension options for get_nd_indexers when bounding box subsetting is performed on SNDR.
 - Fix temporal subsetting ghrsst dataset by adding time delta to time variable.
 - Add a function to test ghrsst dataset ability to access variables when mask_and_scale is true. 
+- Update l2ss-py to allow the return of empty granules.
 ### Changed
 ### Deprecated 
 ### Removed

diff --git a/podaac/subsetter/subset.py b/podaac/subsetter/subset.py
@@ -1099,7 +1099,6 @@ def open_as_nc_dataset(filepath: str) -> Tuple[nc.Dataset, bool]:
  try:
  nc_dataset = nc.Dataset(filepath, mode='r')
  has_groups = bool(nc_dataset.groups)
-
  # If dataset has groups, transform to work with xarray
  if has_groups:
  nc_dataset = transform_grouped_dataset(nc_dataset, filepath)
@@ -1249,7 +1248,6 @@ def subset(file_to_subset: str, bbox: np.ndarray, output_file: str,
  if 'ScanTime' in [var.split('__')[-2] for var in list(nc_dataset.variables.keys())]:
  gc.change_var_dims(nc_dataset, variables)
  hdf_type = 'GPM'
-
  args = {
  'decode_coords': False,
  'mask_and_scale': False,

diff --git a/podaac/subsetter/subset_harmony.py b/podaac/subsetter/subset_harmony.py
@@ -199,8 +199,10 @@ def filter_by_subtype(variables, subtype):
  if result_bbox is not None:
  if message.subset:
  message.subset.process('bbox')
- result.bbox = podaac_to_harmony_bbox(result_bbox)
- result.geometry = bbox_to_geometry(result.bbox)
+ bounding_box_array = np.array(podaac_to_harmony_bbox(result_bbox))
+ if not np.all(np.isnan(bounding_box_array)):
+ result.bbox = podaac_to_harmony_bbox(result_bbox)
+ result.geometry = bbox_to_geometry(result.bbox)
 
  # Return the STAC record
  return result

diff --git a/podaac/subsetter/xarray_enhancements.py b/podaac/subsetter/xarray_enhancements.py
@@ -145,8 +145,8 @@ def copy_empty_dataset(dataset: xr.Dataset) -> xr.Dataset:
  # Create a dict object where each key is a variable in the dataset and the value is an
  # array initialized to the fill value for that variable or NaN if there is no fill value
  # attribute for the variable
- empty_data = {k: np.full(v.shape, dataset.variables[k].attrs.get('_FillValue', np.nan)) for k, v in
-   dataset.items()}
+
+ empty_data = {k: np.full(v.shape, dataset.variables[k].attrs.get('_FillValue', np.nan), dtype=v.dtype) for k, v in dataset.items()}
 
  # Create a copy of the dataset filled with the empty data. Then select the first index along each
  # dimension and return the result

diff --git a/tests/test_subset.py b/tests/test_subset.py
@@ -350,7 +350,15 @@ def test_subset_empty_bbox(test_file, data_dir, subset_output_dir, request):
 
  # Ensure all variables are present but empty.
  for _, variable in empty_dataset.data_vars.items():
- assert np.all(variable.data == variable.attrs.get('_FillValue', np.nan) or np.isnan(variable.data))
+ fill_value = variable.attrs.get('_FillValue', np.nan)
+ data = variable.data
+
+ # Perform the main check
+ condition = np.all(data == fill_value) or np.all(np.isnan(data))
+
+ # Handle the specific integer dtype case
+ if not condition and not (np.isnan(fill_value) and np.issubdtype(variable.dtype, np.integer)):
+ assert condition, f"Data does not match fill value for variable: {variable}"
 
  assert test_input_dataset.dims.keys() == empty_dataset.dims.keys()