pydata · shoyer · Jul 19, 2016 · Mar 24, 2016 · Mar 24, 2016 · Mar 24, 2016
diff --git a/doc/indexing.rst b/doc/indexing.rst
@@ -294,6 +294,60 @@ elements that are fully masked:
 
     arr2.where(arr2.y < 2, drop=True)
 
+.. _multi-level indexing:
+
+Multi-level indexing
+--------------------
+
+The ``loc`` and ``sel`` methods of ``Dataset`` and ``DataArray`` both accept
+dictionaries for label-based indexing on multi-index dimensions:
+
+.. ipython:: python
+
+    idx = pd.MultiIndex.from_product([list('abc'), [0, 1]],
+                                     names=('one', 'two'))
+    da_midx = xr.DataArray(np.random.rand(6, 3),
+                           [('x', idx), ('y', range(3))])
+    da_midx
+    da_midx.sel(x={'one': 'a', 'two': 0})
+    da_midx.loc[{'one': 'a'}, ...]
+
+As shown in the last example above, xarray handles partial selection on
+pandas multi-index ; it automatically renames the dimension and replaces the
+coordinate when a single index is returned (level drop).
+
+Like pandas, it is also possible to slice a multi-indexed dimension by providing
+a tuple of multiple indexers (i.e., slices, labels, list of labels, or any
+selector allowed by pandas). Note that for now xarray doesn't fully handle
+partial selection in that case (no level drop is done):
+
+.. ipython:: python
+
+   da_midx.sel(x=(list('ab'), [0]))
+
+Lists or slices of tuples can be used to select several combinations of
+multi-index labels:
+
+.. ipython:: python
+
+   da_midx.sel(x=[('a', 0), ('b', 1)])
+
+A single, flat tuple can be used to select a given combination of
+multi-index labels:
+
+.. ipython:: python
+
+   da_midx.sel(x=('a', 0))
+
+Unlike pandas, xarray can't make the distinction between index levels and
+dimensions when using ``loc`` in some ambiguous cases. For example, for
+``da_midx.loc[{'one': 'a', 'two': 0}]`` and ``da_midx.loc['a', 0]`` xarray
+always interprets ('one', 'two') and ('a', 0) as the names and
+labels of the 1st and 2nd dimension, respectively. You must specify all
+dimensions or use the ellipsis in the ``loc`` specifier, e.g. in the example
+above, ``da_midx.loc[{'one': 'a', 'two': 0}, :]`` or
+``da_midx.loc[('a', 0), ...]``.
+
 Multi-dimensional indexing
 --------------------------
 

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -39,10 +39,14 @@ Enhancements
   attributes are retained in the resampled object. By
   `Jeremy McGibbon <https://github.com/mcgibbon>`_.
 
+- DataArray and Dataset methods :py:meth:`sel` and :py:meth:`loc` now
+  accept dictionaries or nested tuples for indexing on multi-index dimensions.
+  By `Benoit Bovy <https://github.com/benbovy>`_.
+
 - New (experimental) decorators :py:func:`~xarray.register_dataset_accessor` and
   :py:func:`~xarray.register_dataarray_accessor` for registering custom xarray
   extensions without subclassing. They are described in the new documentation
-  page on :ref:`internals`. By `Stephan Hoyer <https://github.com/shoyer>`
+  page on :ref:`internals`. By `Stephan Hoyer <https://github.com/shoyer>`_.
 
 - Round trip boolean datatypes. Previously, writing boolean datatypes to netCDF
   formats would raise an error since netCDF does not have a `bool` datatype.

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -86,24 +86,22 @@ def __init__(self, data_array):
         self.data_array = data_array
 
     def _remap_key(self, key):
-        def lookup_positions(dim, labels):
-            index = self.data_array.indexes[dim]
-            return indexing.convert_label_indexer(index, labels)
-
-        if utils.is_dict_like(key):
-            return dict((dim, lookup_positions(dim, labels))
-                        for dim, labels in iteritems(key))
-        else:
+        if not utils.is_dict_like(key):
             # expand the indexer so we can handle Ellipsis
-            key = indexing.expanded_indexer(key, self.data_array.ndim)
-            return tuple(lookup_positions(dim, labels) for dim, labels
-                         in zip(self.data_array.dims, key))
+            labels = indexing.expanded_indexer(key, self.data_array.ndim)
+            key = dict(zip(self.data_array.dims, labels))
+        return indexing.remap_label_indexers(self.data_array, key)
 
     def __getitem__(self, key):
-        return self.data_array[self._remap_key(key)]
+        pos_indexers, new_indexes = self._remap_key(key)
+        ds = self.data_array[pos_indexers]._to_temp_dataset()
+        return self.data_array._from_temp_dataset(
+            ds._replace_indexes(new_indexes)
+        )
 
     def __setitem__(self, key, value):
-        self.data_array[self._remap_key(key)] = value
+        pos_indexers, new_indexes = self._remap_key(key)
+        self.data_array[pos_indexers] = value
 
 
 class _ThisArray(object):
@@ -599,8 +597,10 @@ def sel(self, method=None, tolerance=None, **indexers):
         Dataset.sel
         DataArray.isel
         """
-        return self.isel(**indexing.remap_label_indexers(
-            self, indexers, method=method, tolerance=tolerance))
+        ds = self._to_temp_dataset().sel(
+            method=method, tolerance=tolerance, **indexers
+        )
+        return self._from_temp_dataset(ds)
 
     def isel_points(self, dim='points', **indexers):
         """Return a new DataArray whose dataset is given by pointwise integer

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -419,6 +419,18 @@ def _replace_vars_and_dims(self, variables, coord_names=None,
             obj = self._construct_direct(variables, coord_names, dims, attrs)
         return obj
 
+    def _replace_indexes(self, indexes):
+        variables = OrderedDict()
+        for k, v in iteritems(self._variables):
+            if k in indexes.keys():
+                idx = indexes[k]
+                variables[k] = Coordinate(idx.name, idx)
+            else:
+                variables[k] = v
+        obj = self._replace_vars_and_dims(variables)
+        dim_names = {dim: idx.name for dim, idx in iteritems(indexes)}
+        return obj.rename(dim_names)
+
     def copy(self, deep=False):
         """Returns a copy of this dataset.
 
@@ -954,7 +966,9 @@ def sel(self, method=None, tolerance=None, **indexers):
             Requires pandas>=0.17.
         **indexers : {dim: indexer, ...}
             Keyword arguments with names matching dimensions and values given
-            by scalars, slices or arrays of tick labels.
+            by scalars, slices or arrays of tick labels. For dimensions with
+            multi-index, the indexer may also be a dict-like object with keys
+            matching index level names.
 
         Returns
         -------
@@ -972,8 +986,10 @@ def sel(self, method=None, tolerance=None, **indexers):
         Dataset.isel_points
         DataArray.sel
         """
-        return self.isel(**indexing.remap_label_indexers(
-            self, indexers, method=method, tolerance=tolerance))
+        pos_indexers, new_indexes = indexing.remap_label_indexers(
+            self, indexers, method=method, tolerance=tolerance
+        )
+        return self.isel(**pos_indexers)._replace_indexes(new_indexes)
 
     def isel_points(self, dim='points', **indexers):
         """Returns a new dataset with each array indexed pointwise along the
@@ -1114,8 +1130,9 @@ def sel_points(self, dim='points', method=None, tolerance=None,
         Dataset.isel_points
         DataArray.sel_points
         """
-        pos_indexers = indexing.remap_label_indexers(
-            self, indexers, method=method, tolerance=tolerance)
+        pos_indexers, new_indexes = indexing.remap_label_indexers(
+            self, indexers, method=method, tolerance=tolerance
+        )
         return self.isel_points(dim=dim, **pos_indexers)
 
     def reindex_like(self, other, method=None, tolerance=None, copy=True):

diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
@@ -4,7 +4,7 @@
 
 from . import utils
 from .pycompat import iteritems, range, dask_array_type, suppress
-from .utils import is_full_slice
+from .utils import is_full_slice, is_dict_like
 
 
 def expanded_indexer(key, ndim):
@@ -135,11 +135,27 @@ def _asarray_tuplesafe(values):
     return result
 
 
+def _is_nested_tuple(tup, index):
+    """Check for a compatible nested tuple and multiindex (taken from
+    pandas.core.indexing.is_nested_tuple).
+    """
+    if not isinstance(tup, tuple):
+        return False
+
+    # are we nested tuple of: tuple,list,slice
+    for i, k in enumerate(tup):
+        if isinstance(k, (tuple, list, slice)):
+            return isinstance(index, pd.MultiIndex)
+
+    return False
+
+
 def convert_label_indexer(index, label, index_name='', method=None,
                           tolerance=None):
     """Given a pandas.Index and labels (e.g., from __getitem__) for one
     dimension, return an indexer suitable for indexing an ndarray along that
-    dimension
+    dimension. If label is a dict-like object and a pandas.MultiIndex is given,
+    also return a new pandas.Index, otherwise return None.
     """
     # backwards compatibility for pandas<0.16 (method) or pandas<0.17
     # (tolerance)
@@ -152,6 +168,8 @@ def convert_label_indexer(index, label, index_name='', method=None,
                 'the tolerance argument requires pandas v0.17 or newer')
         kwargs['tolerance'] = tolerance
 
+    new_index = None
+
     if isinstance(label, slice):
         if method is not None or tolerance is not None:
             raise NotImplementedError(
@@ -166,6 +184,17 @@ def convert_label_indexer(index, label, index_name='', method=None,
             raise KeyError('cannot represent labeled-based slice indexer for '
                            'dimension %r with a slice over integer positions; '
                            'the index is unsorted or non-unique')
+
+    elif is_dict_like(label):
+        if not isinstance(index, pd.MultiIndex):
+            raise ValueError('cannot use a dict-like object for selection on a '
+                             'dimension that does not have a MultiIndex')
+        indexer, new_index = index.get_loc_level(tuple(label.values()),
+                                                 level=tuple(label.keys()))
+
+    elif _is_nested_tuple(label, index):
+        indexer = index.get_locs(label)
+
     else:
         label = _asarray_tuplesafe(label)
         if label.ndim == 0:
@@ -177,18 +206,36 @@ def convert_label_indexer(index, label, index_name='', method=None,
             if np.any(indexer < 0):
                 raise KeyError('not all values found in index %r'
                                % index_name)
-    return indexer
+    return indexer, new_index
 
 
 def remap_label_indexers(data_obj, indexers, method=None, tolerance=None):
     """Given an xarray data object and label based indexers, return a mapping
-    of equivalent location based indexers.
+    of equivalent location based indexers. Also return a mapping of pandas'
+    single index objects returned from multi-index objects.
     """
     if method is not None and not isinstance(method, str):
         raise TypeError('``method`` must be a string')
-    return dict((dim, convert_label_indexer(data_obj[dim].to_index(), label,
-                                            dim, method, tolerance))
-                for dim, label in iteritems(indexers))
+
+    pos_indexers, new_indexes = {}, {}
+    for dim, label in iteritems(indexers):
+        index = data_obj[dim].to_index()
+
+        if isinstance(index, pd.MultiIndex):
+            # set default names for multi-index unnamed levels so that
+            # we can safely rename dimension / coordinate later
+            valid_level_names = [name or '{}_level_{}'.format(dim, i)
+                                 for i, name in enumerate(index.names)]
+            index = index.copy()
+            index.names = valid_level_names
+
+        idxr, new_idx = convert_label_indexer(index, label,
+                                              dim, method, tolerance)
+        pos_indexers[dim] = idxr
+        if new_idx is not None and not isinstance(new_idx, pd.MultiIndex):
+            new_indexes[dim] = new_idx
+
+    return pos_indexers, new_indexes
 
 
 def slice_slice(old_slice, applied_slice, size):

diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py
@@ -486,7 +486,8 @@ def test_loc_single_boolean(self):
         self.assertEqual(data.loc[False], 1)
 
     def test_multiindex(self):
-        idx = pd.MultiIndex.from_product([list('abc'), [0, 1]])
+        idx = pd.MultiIndex.from_product([list('abc'), [0, 1]],
+                                         names=('one', 'two'))
         data = DataArray(range(6), [('x', idx)])
 
         self.assertDataArrayIdentical(data.sel(x=('a', 0)), data.isel(x=0))
@@ -495,6 +496,22 @@ def test_multiindex(self):
         self.assertDataArrayIdentical(data.sel(x=[('a', 0), ('c', 1)]),
                                       data.isel(x=[0, -1]))
         self.assertDataArrayIdentical(data.sel(x='a'), data.isel(x=slice(2)))
+        self.assertVariableNotEqual(data.sel(x={'one': slice(None)}), data)
+        self.assertDataArrayIdentical(data.isel(x=[0]),
+                                      data.sel(x={'one': 'a', 'two': 0}))
+        self.assertDataArrayIdentical(data.isel(x=[0, 1]), data.sel(x='a'))
+        self.assertVariableIdentical(
+            data.sel(x={'one': 'a'}),
+            data.unstack('x').sel(one='a').dropna('two')
+        )
+        self.assertDataArrayIdentical(data.sel(x=('a', slice(None))),
+                                      data.isel(x=[0, 1]))
+
+        self.assertDataArrayIdentical(data.loc['a'], data[:2])
+        self.assertDataArrayIdentical(data.loc[{'one': 'a', 'two': 0}, ...],
+                                       data[[0]])
+        self.assertDataArrayIdentical(data.loc[{'one': 'a'}, ...],
+                                      data.sel(x={'one': 'a'}))
 
     def test_time_components(self):
         dates = pd.date_range('2000-01-01', periods=10)
@@ -1818,29 +1835,29 @@ def test_full_like(self):
         actual = _full_like(DataArray([1, 2, 3]), fill_value=np.nan)
         self.assertEqual(actual.dtype, np.float)
         np.testing.assert_equal(actual.values, np.nan)
-    
+
     def test_dot(self):
         x = np.linspace(-3, 3, 6)
         y = np.linspace(-3, 3, 5)
-        z = range(4) 
+        z = range(4)
         da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4))
         da = DataArray(da_vals, coords=[x, y, z], dims=['x', 'y', 'z'])
-        
+
         dm_vals = range(4)
         dm = DataArray(dm_vals, coords=[z], dims=['z'])
-        
+
         # nd dot 1d
         actual = da.dot(dm)
         expected_vals = np.tensordot(da_vals, dm_vals, [2, 0])
         expected = DataArray(expected_vals, coords=[x, y], dims=['x', 'y'])
         self.assertDataArrayEqual(expected, actual)
-        
+
         # all shared dims
         actual = da.dot(da)
         expected_vals = np.tensordot(da_vals, da_vals, axes=([0, 1, 2], [0, 1, 2]))
         expected = DataArray(expected_vals)
         self.assertDataArrayEqual(expected, actual)
-        
+
         # multiple shared dims
         dm_vals = np.arange(20 * 5 * 4).reshape((20, 5, 4))
         j = np.linspace(-3, 3, 20)
@@ -1849,7 +1866,7 @@ def test_dot(self):
         expected_vals = np.tensordot(da_vals, dm_vals, axes=([1, 2], [1, 2]))
         expected = DataArray(expected_vals, coords=[x, j], dims=['x', 'j'])
         self.assertDataArrayEqual(expected, actual)
-        
+
         with self.assertRaises(NotImplementedError):
             da.dot(dm.to_dataset(name='dm'))
         with self.assertRaises(TypeError):

diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py
@@ -840,6 +840,33 @@ def test_loc(self):
         with self.assertRaises(TypeError):
             data.loc[dict(dim3='a')] = 0
 
+    def test_multiindex(self):
+        idx = pd.MultiIndex.from_product([list('abc'), [0, 1]],
+                                         names=('one', 'two'))
+        data = Dataset(data_vars={'var': ('x', range(6))}, coords={'x': idx})
+
+        self.assertDatasetIdentical(data.sel(x=('a', 0)), data.isel(x=0))
+        self.assertDatasetIdentical(data.sel(x=('c', 1)), data.isel(x=-1))
+        self.assertDatasetIdentical(data.sel(x=[('a', 0)]), data.isel(x=[0]))
+        self.assertDatasetIdentical(data.sel(x=[('a', 0), ('c', 1)]),
+                                    data.isel(x=[0, -1]))
+        self.assertDatasetIdentical(data.sel(x=(['a', 'c'], [0, 1])),
+                                    data.isel(x=[0, 1, -2, -1]))
+        self.assertDatasetIdentical(data.sel(x='a'), data.isel(x=slice(2)))
+        self.assertVariableNotEqual(data.sel(x={'one': slice(None)})['var'],
+                                    data['var'])
+        self.assertDatasetIdentical(data.isel(x=[0]),
+                                      data.sel(x={'one': 'a', 'two': 0}))
+        self.assertDatasetIdentical(data.isel(x=[0, 1]), data.sel(x='a'))
+        self.assertVariableIdentical(
+            data.sel(x={'one': 'a'})['var'],
+            data.unstack('x').sel(one='a').dropna('two')['var']
+        )
+
+        self.assertDatasetIdentical(data.loc[{'x': 'a'}], data.sel(x='a'))
+        self.assertDatasetIdentical(data.loc[{'x': {'one': 'a', 'two': 0}}],
+                                    data.sel(x={'one': 'a', 'two': 0}))
+
     def test_reindex_like(self):
         data = create_test_data()
         data['letters'] = ('dim3', 10 * ['a'])