From 5a4034b9174f8d8e5ed0cb5327fca109fcdfba58 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Wed, 18 Sep 2019 11:10:54 -0700 Subject: [PATCH 1/2] Fix isel performance regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xref GH2227 Before: indexing.BooleanIndexing.time_indexing 898±0ms After indexing.BooleanIndexing.time_indexing 401±0ms --- asv_bench/benchmarks/indexing.py | 13 +++++++++++++ xarray/core/dataset.py | 12 ++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index cd212895d99..c4cfbbbdfdf 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -125,3 +125,16 @@ def setup(self, key): requires_dask() super().setup(key) self.ds = self.ds.chunk({"x": 100, "y": 50, "t": 50}) + + +class BooleanIndexing: + # https://github.com/pydata/xarray/issues/2227 + def setup(self): + self.ds = xr.Dataset( + {"a": ("time", np.arange(10_000_000))}, + coords={"time": np.arange(10_000_000)}, + ) + self.time_filter = self.ds.time > 50_000 + + def time_indexing(self): + self.ds.isel(time=self.time_filter) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 693e94e22dd..af19b353bb6 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1780,7 +1780,7 @@ def _validate_indexers( elif isinstance(v, Dataset): raise TypeError("cannot use a Dataset as an indexer") elif isinstance(v, Sequence) and len(v) == 0: - v = IndexVariable((k,), np.zeros((0,), dtype="int64")) + v = Variable((k,), np.zeros((0,), dtype="int64")) else: v = np.asarray(v) @@ -1794,16 +1794,13 @@ def _validate_indexers( if v.ndim == 0: v = Variable((), v) elif v.ndim == 1: - v = IndexVariable((k,), v) + v = Variable((k,), v) else: raise IndexError( "Unlabeled multi-dimensional array cannot be " "used for indexing: {}".format(k) ) - if v.ndim == 1: - v = v.to_index_variable() - indexers_list.append((k, v)) return indexers_list @@ -2366,7 +2363,10 @@ def interp( if kwargs is None: kwargs = {} coords = either_dict_or_kwargs(coords, coords_kwargs, "interp") - indexers = OrderedDict(self._validate_indexers(coords)) + indexers = OrderedDict( + (k, v.to_index_variable() if v.ndim == 1 else v) + for k, v in self._validate_indexers(coords) + ) obj = self if assume_sorted else self.sortby([k for k in coords]) From 5c708b6bb23333842fe7060c982950d9ce0b9551 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Wed, 18 Sep 2019 11:14:46 -0700 Subject: [PATCH 2/2] mypy fix --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index af19b353bb6..1de90d72200 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2364,7 +2364,7 @@ def interp( kwargs = {} coords = either_dict_or_kwargs(coords, coords_kwargs, "interp") indexers = OrderedDict( - (k, v.to_index_variable() if v.ndim == 1 else v) + (k, v.to_index_variable() if isinstance(v, Variable) and v.ndim == 1 else v) for k, v in self._validate_indexers(coords) )