pydata · shoyer · Jun 25, 2019 · Nov 5, 2018 · Nov 6, 2018 · Nov 7, 2018
diff --git a/doc/api.rst b/doc/api.rst
@@ -19,6 +19,8 @@ Top-level functions
    broadcast
    concat
    merge
+   auto_combine
+   manual_combine
    where
    set_options
    full_like

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -25,6 +25,23 @@ Breaking changes
   Python 3 only. (:issue:`1876`).
   By `Joe Hamman <https://github.com/jhamman>`_.
 
+
+- Combining datasets along N dimensions:
+
+  - ``open_mfdataset`` and ``auto_combine`` can now combine datasets along any
+    number of dimensions, instead of just a one-dimensional list of datasets.
+
+    If the datasets have monotonic global dimension coordinates then the new
+    ``auto_combine`` should be used. If not then the new ``manual_combine``
+    will accept the datasets as a a nested list-of-lists, and combine by
+    applying a series of concat and merge operations.
+
+    Breaking because some lists that were previously valid inputs to
+    ``open_mfdataset`` and ``auto_combine`` may no longer be valid, and should
+    now be combined explicitly using ``manual_combine`` instead.
+    (:issue:`2159`) By `Tom Nicholas <http://github.com/TomNicholas>`_.
+
+
 Enhancements
 ~~~~~~~~~~~~
 

diff --git a/xarray/__init__.py b/xarray/__init__.py
@@ -9,7 +9,8 @@
 
 from .core.alignment import align, broadcast, broadcast_arrays
 from .core.common import full_like, zeros_like, ones_like
-from .core.combine import concat, auto_combine
+from .core.concat import concat
+from .core.combine import auto_combine, manual_combine
 from .core.computation import apply_ufunc, dot, where
 from .core.extensions import (register_dataarray_accessor,
                               register_dataset_accessor)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -8,12 +8,13 @@
 
 import numpy as np
 
-from .. import Dataset, backends, conventions
+from .. import Dataset, DataArray, backends, conventions
 from ..core import indexing
-from ..core.combine import (
-    _CONCAT_DIM_DEFAULT, _auto_combine, _infer_concat_order_from_positions)
+from .. import auto_combine
+from ..core.combine import (_manual_combine, _CONCAT_DIM_DEFAULT,
+                            _infer_concat_order_from_positions)
 from ..core.pycompat import basestring, path_type
-from ..core.utils import close_on_error, is_grib_path, is_remote_uri
+from ..core.utils import (close_on_error, is_grib_path, is_remote_uri)
 from .common import ArrayWriter
 from .locks import _get_scheduler
 
@@ -487,35 +488,42 @@ def close(self):
 def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
                    compat='no_conflicts', preprocess=None, engine=None,
                    lock=None, data_vars='all', coords='different',
-                   autoclose=None, parallel=False, **kwargs):
+                   combine='auto', autoclose=None, parallel=False, **kwargs):
     """Open multiple files as a single dataset.
 
+    If combine='auto' then the function `auto_combine` is used to combine the
+    datasets into one before returning the result, and if combine='manual' then
+    `manual_combine` is used. The filepaths must be structured according to
+    which combining function is used, the details of which are given in the
+    documentation for ``auto_combine`` and ``manual_combine``.
     Requires dask to be installed. See documentation for details on dask [1].
     Attributes from the first dataset file are used for the combined dataset.
 
     Parameters
     ----------
     paths : str or sequence
         Either a string glob in the form "path/to/my/files/*.nc" or an explicit
-        list of files to open.  Paths can be given as strings or as pathlib
-        Paths.
+        list of files to open. Paths can be given as strings or as pathlib
+        Paths. If concatenation along more than one dimension is desired, then
+        ``paths`` must be a nested list-of-lists (see ``manual_combine`` for
+        details).
     chunks : int or dict, optional
         Dictionary with keys given by dimension names and values given by chunk
         sizes. In general, these should divide the dimensions of each dataset.
         If int, chunk each dimension by ``chunks``.
         By default, chunks will be chosen to load entire input files into
         memory at once. This has a major impact on performance: please see the
         full documentation for more details [2].
-    concat_dim : None, str, DataArray or Index, optional
-        Dimension to concatenate files along. This argument is passed on to
-        :py:func:`xarray.auto_combine` along with the dataset objects. You only
-        need to provide this argument if the dimension along which you want to
-        concatenate is not a dimension in the original datasets, e.g., if you
-        want to stack a collection of 2D arrays along a third dimension.
+    concat_dim : str, or list of str, DataArray, Index or None, optional
+        Dimensions to concatenate files along.  You only
+        need to provide this argument if any of the dimensions along which you
+        want to concatenate is not a dimension in the original datasets, e.g.,
+        if you want to stack a collection of 2D arrays along a third dimension.
         By default, xarray attempts to infer this argument by examining
-        component files. Set ``concat_dim=None`` explicitly to disable
-        concatenation.
-    compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts'}, optional
+        component files. Set ``concat_dim=[..., None, ...]`` explicitly to
+        disable concatenation along a particular dimension.
+    compat : {'identical', 'equals', 'broadcast_equals',
+              'no_conflicts'}, optional
         String indicating how to compare variables of the same name for
         potential conflicts when merging:
          * 'broadcast_equals': all values must be equal when variables are
@@ -542,20 +550,18 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
         active dask scheduler.
     data_vars : {'minimal', 'different', 'all' or list of str}, optional
         These data variables will be concatenated together:
-
-         * 'minimal': Only data variables in which the dimension already
-           appears are included.
-         * 'different': Data variables which are not equal (ignoring
-           attributes) across all datasets are also concatenated (as well as
-           all for which dimension already appears). Beware: this option may
-           load the data payload of data variables into memory if they are not
-           already loaded.
-         * 'all': All data variables will be concatenated.
-         * list of str: The listed data variables will be concatenated, in
-           addition to the 'minimal' data variables.
-    coords : {'minimal', 'different', 'all' o list of str}, optional
+          * 'minimal': Only data variables in which the dimension already
+            appears are included.
+          * 'different': Data variables which are not equal (ignoring
+            attributes) across all datasets are also concatenated (as well as
+            all for which dimension already appears). Beware: this option may
+            load the data payload of data variables into memory if they are not
+            already loaded.
+          * 'all': All data variables will be concatenated.
+          * list of str: The listed data variables will be concatenated, in
+            addition to the 'minimal' data variables.
+    coords : {'minimal', 'different', 'all' or list of str}, optional
         These coordinate variables will be concatenated together:
-
          * 'minimal': Only coordinates in which the dimension already appears
            are included.
          * 'different': Coordinates which are not equal (ignoring attributes)
@@ -570,6 +576,9 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
     parallel : bool, optional
         If True, the open and preprocess steps of this function will be
         performed in parallel using ``dask.delayed``. Default is False.
+    combine : {'auto', 'manual'}, optional
+        Whether ``xarray.auto_combine`` or ``xarray.manual_combine`` is used to
+        combine all the data. Default is 'auto'.
     **kwargs : optional
         Additional arguments passed on to :py:func:`xarray.open_dataset`.
 
@@ -580,6 +589,7 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
     See Also
     --------
     auto_combine
+    manual_combine
     open_dataset
 
     References
@@ -601,22 +611,15 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
     if not paths:
         raise IOError('no files to open')
 
-    # Coerce 1D input into ND to maintain backwards-compatible API until API
-    # for N-D combine decided
-    # (see https://github.com/pydata/xarray/pull/2553/#issuecomment-445892746)
-    if concat_dim is None or concat_dim is _CONCAT_DIM_DEFAULT:
-        concat_dims = concat_dim
-    elif not isinstance(concat_dim, list):
-        concat_dims = [concat_dim]
-    else:
-        concat_dims = concat_dim
-    infer_order_from_coords = False
-
-    # If infer_order_from_coords=True then this is unnecessary, but quick.
-    # If infer_order_from_coords=False then this creates a flat list which is
-    # easier to iterate over, while saving the originally-supplied structure
+    # If combine='auto' then this is unnecessary, but quick.
+    # If combine='manual' then this creates a flat list which is easier to
+    # iterate over, while saving the originally-supplied structure as "ids"
+    if combine is 'manual':
+        if concat_dim is not _CONCAT_DIM_DEFAULT:
+            if isinstance(concat_dim, (str, DataArray)) or concat_dim is None:
+                concat_dim = [concat_dim]
     combined_ids_paths, concat_dims = _infer_concat_order_from_positions(
-        paths, concat_dims)
+        paths, concat_dim)
     ids, paths = (
         list(combined_ids_paths.keys()), list(combined_ids_paths.values()))
 
@@ -644,18 +647,24 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
         # the underlying datasets will still be stored as dask arrays
         datasets, file_objs = dask.compute(datasets, file_objs)
 
-    # Close datasets in case of a ValueError
+    # Combine all datasets, closing them in case of a ValueError
     try:
-        if infer_order_from_coords:
-            # Discard ordering because it should be redone from coordinates
-            ids = False
-
-        combined = _auto_combine(
-            datasets, concat_dims=concat_dims,
-            compat=compat,
-            data_vars=data_vars, coords=coords,
-            infer_order_from_coords=infer_order_from_coords,
-            ids=ids)
+        if combine is 'auto':
+            # Will redo ordering from coordinates, ignoring how they were
+            # ordered previously
+            if concat_dim is not _CONCAT_DIM_DEFAULT:
+                raise ValueError("Cannot specify dimensions to concatenate "
+                                 "along when auto-combining")
+
+            combined = auto_combine(datasets, compat=compat,
+                                    data_vars=data_vars, coords=coords)
+
+        else:
+            # Combined nested list by successive concat and merge operations
+            # along each dimension, using structure given by "ids"
+            combined = _manual_combine(datasets, concat_dims=concat_dim,
+                                       compat=compat, data_vars=data_vars,
+                                       coords=coords, ids=ids)
     except ValueError:
         for ds in datasets:
             ds.close()