Skip to content

Commit

Permalink
Feature/empty dataset (#283)
Browse files Browse the repository at this point in the history
* update get time functions

* update to fix pylint flake8

* update project dependencies

* fix tests

* update libraries for snyk

* poetry update

* update pytoml

* update test retry time

* add harmony deployment into github actions

* update harmony deployment script

* update pytoml version to develop

* update tests delays

* fix harmony deploy url

* removed debug print statement

* update copy of empty data, and check bounds for empty datasets

* update tests for empty datasets

* update to fix temporal subsetting

* fix ghrsst dataset temporal subsetting

* add try except for getting long name

* optimize mask and scale code

* poetry update

* update changelog
  • Loading branch information
sliu008 authored Jul 29, 2024
1 parent 3956b9c commit f418d5a
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [issue/267](https://github.com/podaac/l2ss-py/pull/261): Add xtrack and atrack dimension options for get_nd_indexers when bounding box subsetting is performed on SNDR.
- Fix temporal subsetting ghrsst dataset by adding time delta to time variable.
- Add a function to test ghrsst dataset ability to access variables when mask_and_scale is true.
- Update l2ss-py to allow the return of empty granules.
### Changed
### Deprecated
### Removed
Expand Down
2 changes: 0 additions & 2 deletions podaac/subsetter/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,7 +1099,6 @@ def open_as_nc_dataset(filepath: str) -> Tuple[nc.Dataset, bool]:
try:
nc_dataset = nc.Dataset(filepath, mode='r')
has_groups = bool(nc_dataset.groups)

# If dataset has groups, transform to work with xarray
if has_groups:
nc_dataset = transform_grouped_dataset(nc_dataset, filepath)
Expand Down Expand Up @@ -1249,7 +1248,6 @@ def subset(file_to_subset: str, bbox: np.ndarray, output_file: str,
if 'ScanTime' in [var.split('__')[-2] for var in list(nc_dataset.variables.keys())]:
gc.change_var_dims(nc_dataset, variables)
hdf_type = 'GPM'

args = {
'decode_coords': False,
'mask_and_scale': False,
Expand Down
6 changes: 4 additions & 2 deletions podaac/subsetter/subset_harmony.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,10 @@ def filter_by_subtype(variables, subtype):
if result_bbox is not None:
if message.subset:
message.subset.process('bbox')
result.bbox = podaac_to_harmony_bbox(result_bbox)
result.geometry = bbox_to_geometry(result.bbox)
bounding_box_array = np.array(podaac_to_harmony_bbox(result_bbox))
if not np.all(np.isnan(bounding_box_array)):
result.bbox = podaac_to_harmony_bbox(result_bbox)
result.geometry = bbox_to_geometry(result.bbox)

# Return the STAC record
return result
Expand Down
4 changes: 2 additions & 2 deletions podaac/subsetter/xarray_enhancements.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ def copy_empty_dataset(dataset: xr.Dataset) -> xr.Dataset:
# Create a dict object where each key is a variable in the dataset and the value is an
# array initialized to the fill value for that variable or NaN if there is no fill value
# attribute for the variable
empty_data = {k: np.full(v.shape, dataset.variables[k].attrs.get('_FillValue', np.nan)) for k, v in
dataset.items()}

empty_data = {k: np.full(v.shape, dataset.variables[k].attrs.get('_FillValue', np.nan), dtype=v.dtype) for k, v in dataset.items()}

# Create a copy of the dataset filled with the empty data. Then select the first index along each
# dimension and return the result
Expand Down
10 changes: 9 additions & 1 deletion tests/test_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,15 @@ def test_subset_empty_bbox(test_file, data_dir, subset_output_dir, request):

# Ensure all variables are present but empty.
for _, variable in empty_dataset.data_vars.items():
assert np.all(variable.data == variable.attrs.get('_FillValue', np.nan) or np.isnan(variable.data))
fill_value = variable.attrs.get('_FillValue', np.nan)
data = variable.data

# Perform the main check
condition = np.all(data == fill_value) or np.all(np.isnan(data))

# Handle the specific integer dtype case
if not condition and not (np.isnan(fill_value) and np.issubdtype(variable.dtype, np.integer)):
assert condition, f"Data does not match fill value for variable: {variable}"

assert test_input_dataset.dims.keys() == empty_dataset.dims.keys()

Expand Down

0 comments on commit f418d5a

Please sign in to comment.