Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Failed extrapolate_water_elevation_to_dry_areas() when loading training dataset in chunks. #145

Open
FariborzDaneshvar-NOAA opened this issue Jul 10, 2024 · 2 comments
Assignees

Comments

@FariborzDaneshvar-NOAA
Copy link
Collaborator

If I use chunks='auto' for reading the subset file:

xarray.open_dataset(subset_filename, chunks='auto')

Then extrapolate_water_elevation_to_dry_areas() function that uses training set from the subset (training_set = subset.sel(run=training_perturbations['run'])) as an input (da=training_set) will fail with the following KeyError:

KeyError: 'Indexing with a boolean dask array is not allowed. This will result in a dask array of unknown shape. Such arrays are unsupported by Xarray. Please compute the indexer first using .compute()'

If I add .compute() to line 887 (as explained in this PR), then I will get this new NotImplementedError:

`NotImplementedError: xarray can't set arrays with multiple array indices to dask yet.`

Full error messages in the following comments.

@FariborzDaneshvar-NOAA
Copy link
Collaborator Author

First Error (KeyError):

KeyError                                  Traceback (most recent call last)
Cell In[44], line 13
     10     training_set_adjusted = training_set.copy(deep=True)
     11 else:
     12     # make an adjusted training set for dry areas..
---> 13     training_set_adjusted = extrapolate_water_elevation_to_dry_areas(
     14         da=training_set,
     15         k_neighbors=k_neighbors,
     16         idw_order=idw_order,
     17         compute_headloss=True,
     18         mann_coef=mann_coef,
     19         u_ref=0.4,
     20         d_ref=1,
     21     )

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/ensembleperturbation/parsing/adcirc.py:890, in extrapolate_water_elevation_to_dry_areas(da, k_neighbors, idw_order, compute_headloss, mann_coef, u_ref, d_ref, min_depth)
    888 tree = KDTree(projected_coordinates[~null])
    889 dd, nn = tree.query(projected_coordinates[null], k=k_neighbors)
--> 890 max_allowable_values = da['depth'][null].values + min_depth - numpy.finfo(float).eps
    891 if k_neighbors == 1:
    892     headloss = dd * friction_factor  # hydraulic friction loss

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/dataarray.py:823, in DataArray.__getitem__(self, key)
    820     return self._getitem_coord(key)
    821 else:
    822     # xarray-style array indexing
--> 823     return self.isel(indexers=self._item_key_to_dict(key))

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/dataarray.py:1413, in DataArray.isel(self, indexers, drop, missing_dims, **indexers_kwargs)
   1410 indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
   1412 if any(is_fancy_indexer(idx) for idx in indexers.values()):
-> 1413     ds = self._to_temp_dataset()._isel_fancy(
   1414         indexers, drop=drop, missing_dims=missing_dims
   1415     )
   1416     return self._from_temp_dataset(ds)
   1418 # Much faster algorithm for when all indexers are ints, slices, one-dimensional
   1419 # lists, or zero or one-dimensional np.ndarray's

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/dataset.py:2704, in Dataset._isel_fancy(self, indexers, drop, missing_dims)
   2700 var_indexers = {
   2701     k: v for k, v in valid_indexers.items() if k in var.dims
   2702 }
   2703 if var_indexers:
-> 2704     new_var = var.isel(indexers=var_indexers)
   2705     # drop scalar coordinates
   2706     # https://github.com/pydata/xarray/issues/6554
   2707     if name in self.coords and drop and new_var.ndim == 0:

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/variable.py:1378, in Variable.isel(self, indexers, missing_dims, **indexers_kwargs)
   1375 indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)
   1377 key = tuple(indexers.get(dim, slice(None)) for dim in self.dims)
-> 1378 return self[key]

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/variable.py:899, in Variable.__getitem__(self, key)
    886 def __getitem__(self: T_Variable, key) -> T_Variable:
    887     """Return a new Variable object whose contents are consistent with
    888     getting the provided key from the underlying data.
    889 
   (...)
    897     array `x.values` directly.
    898     """
--> 899     dims, indexer, new_order = self._broadcast_indexes(key)
    900     data = as_indexable(self._data)[indexer]
    901     if new_order:

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/variable.py:732, in Variable._broadcast_indexes(self, key)
    729 if all(isinstance(k, BASIC_INDEXING_TYPES) for k in key):
    730     return self._broadcast_indexes_basic(key)
--> 732 self._validate_indexers(key)
    733 # Detect it can be mapped as an outer indexer
    734 # If all key is unlabeled, or
    735 # key can be mapped as an OuterIndexer.
    736 if all(not isinstance(k, Variable) for k in key):

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/variable.py:784, in Variable._validate_indexers(self, key)
    779     raise IndexError(
    780         "{}-dimensional boolean indexing is "
    781         "not supported. ".format(k.ndim)
    782     )
    783 if is_duck_dask_array(k.data):
--> 784     raise KeyError(
    785         "Indexing with a boolean dask array is not allowed. "
    786         "This will result in a dask array of unknown shape. "
    787         "Such arrays are unsupported by Xarray."
    788         "Please compute the indexer first using .compute()"
    789     )
    790 if getattr(k, "dims", (dim,)) != (dim,):
    791     raise IndexError(
    792         "Boolean indexer should be unlabeled or on the "
    793         "same dimension to the indexed array. Indexer is "
   (...)
    796         )
    797     )

KeyError: 'Indexing with a boolean dask array is not allowed. This will result in a dask array of unknown shape. Such arrays are unsupported by Xarray.Please compute the indexer first using .compute()'

@FariborzDaneshvar-NOAA
Copy link
Collaborator Author

Second Error (NotImplementedError):

NotImplementedError                       Traceback (most recent call last)
Cell In[21], line 13
     10     training_set_adjusted = training_set.copy(deep=True)
     11 else:
     12     # make an adjusted training set for dry areas..
---> 13     training_set_adjusted = extrapolate_water_elevation_to_dry_areas(
     14         da=training_set,
     15         k_neighbors=k_neighbors,
     16         idw_order=idw_order,
     17         compute_headloss=True,
     18         mann_coef=mann_coef,
     19         u_ref=0.4,
     20         d_ref=1,
     21     )

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/ensembleperturbation/parsing/adcirc.py:893, in extrapolate_water_elevation_to_dry_areas(da, k_neighbors, idw_order, compute_headloss, mann_coef, u_ref, d_ref, min_depth)
    891 if k_neighbors == 1:
    892     headloss = dd * friction_factor  # hydraulic friction loss
--> 893     da_adjusted[run, null] = numpy.fmin(
    894         da[run, nodes[~null][nn]].values - headloss, max_allowable_values
    895     )
    896 else:
    897     for kk in range(k_neighbors):

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/dataarray.py:840, in DataArray.__setitem__(self, key, value)
    835 # DataArray key -> Variable key
    836 key = {
    837     k: v.variable if isinstance(v, DataArray) else v
    838     for k, v in self._item_key_to_dict(key).items()
    839 }
--> 840 self.variable[key] = value

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/variable.py:977, in Variable.__setitem__(self, key, value)
    974     value = np.moveaxis(value, new_order, range(len(new_order)))
    976 indexable = as_indexable(self._data)
--> 977 indexable[index_tuple] = value

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/indexing.py:1460, in DaskIndexingAdapter.__setitem__(self, key, value)
   1458 num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in key.tuple)
   1459 if num_non_slices > 1:
-> 1460     raise NotImplementedError(
   1461         "xarray can't set arrays with multiple "
   1462         "array indices to dask yet."
   1463     )
   1464 self.array[key.tuple] = value

NotImplementedError: xarray can't set arrays with multiple array indices to dask yet.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant