diff --git a/xarray/core/combine.py b/xarray/core/combine.py index d139151064b..dce4b948b6a 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -13,8 +13,8 @@ def concat(objs, dim=None, data_vars='all', coords='different', - compat='equals', positions=None, indexers=None, mode=None, - concat_over=None): + compat='equals', positions=None, prealigned=False, + indexers=None,mode=None, concat_over=None): """Concatenate xarray objects along a new or existing dimension. Parameters @@ -66,6 +66,10 @@ def concat(objs, dim=None, data_vars='all', coords='different', List of integer arrays which specifies the integer positions to which to assign each dataset along the concatenated dimension. If not supplied, objects are concatenated in the provided order. + prealigned : bool, optional + If True, the objects will be assumed to be already aligned. Coordinates + will be taken from the first object and ignored from the subsequent + objects. indexers, mode, concat_over : deprecated Returns @@ -117,7 +121,7 @@ def concat(objs, dim=None, data_vars='all', coords='different', else: raise TypeError('can only concatenate xarray Dataset and DataArray ' 'objects, got %s' % type(first_obj)) - return f(objs, dim, data_vars, coords, compat, positions) + return f(objs, dim, data_vars, coords, compat, positions, prealigned) def _calc_concat_dim_coord(dim): @@ -195,7 +199,8 @@ def differs(vname): return concat_over -def _dataset_concat(datasets, dim, data_vars, coords, compat, positions): +def _dataset_concat(datasets, dim, data_vars, coords, compat, positions, + prealigned): """ Concatenate a sequence of datasets along a new or existing dimension """ @@ -207,7 +212,10 @@ def _dataset_concat(datasets, dim, data_vars, coords, compat, positions): dim, coord = _calc_concat_dim_coord(dim) datasets = [as_dataset(ds) for ds in datasets] - datasets = align(*datasets, join='outer', copy=False, exclude=[dim]) + if not prealigned: + datasets = align(*datasets, join='outer', copy=False, exclude=[dim]) + else: + coords = 'minimal' concat_over = _calc_concat_over(datasets, dim, data_vars, coords) @@ -228,21 +236,22 @@ def insert_result_variable(k, v): # check that global attributes and non-concatenated variables are fixed # across all datasets - for ds in datasets[1:]: - if (compat == 'identical' and - not utils.dict_equiv(ds.attrs, result_attrs)): - raise ValueError('dataset global attributes not equal') - for k, v in iteritems(ds.variables): - if k not in result_vars and k not in concat_over: - raise ValueError('encountered unexpected variable %r' % k) - elif (k in result_coord_names) != (k in ds.coords): - raise ValueError('%r is a coordinate in some datasets but not ' - 'others' % k) - elif (k in result_vars and k != dim and - not getattr(v, compat)(result_vars[k])): - verb = 'equal' if compat == 'equals' else compat - raise ValueError( - 'variable %r not %s across datasets' % (k, verb)) + if not prealigned: + for ds in datasets[1:]: + if (compat == 'identical' and + not utils.dict_equiv(ds.attrs, result_attrs)): + raise ValueError('dataset global attributes not equal') + for k, v in iteritems(ds.variables): + if k not in result_vars and k not in concat_over: + raise ValueError('encountered unexpected variable %r' % k) + elif (k in result_coord_names) != (k in ds.coords): + raise ValueError('%r is a coordinate in some datasets but not ' + 'others' % k) + elif (k in result_vars and k != dim and + not getattr(v, compat)(result_vars[k])): + verb = 'equal' if compat == 'equals' else compat + raise ValueError( + 'variable %r not %s across datasets' % (k, verb)) # we've already verified everything is consistent; now, calculate # shared dimension sizes so we can expand the necessary variables @@ -284,7 +293,7 @@ def ensure_common_dims(vars): def _dataarray_concat(arrays, dim, data_vars, coords, compat, - positions): + positions, prealigned): arrays = list(arrays) if data_vars != 'all': @@ -303,7 +312,7 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat, datasets.append(arr._to_temp_dataset()) ds = _dataset_concat(datasets, dim, data_vars, coords, compat, - positions) + positions, prealigned) return arrays[0]._from_temp_dataset(ds, name) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 7813378277a..fe1a0e95580 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -123,6 +123,30 @@ def test_concat_autoalign(self): coords={'x': [1, 2, 3]})}) self.assertDatasetIdentical(expected, actual) + def test_concat_prealigned(self): + # concat over new dimension + ds1 = Dataset({'foo': (['x'], [1, 2])}, + coords={'x': (['x'], [1, 2]), 'z': (['x'], ['a', 'b'])}) + ds2 = Dataset({'foo': (['x'], [1, 2])}, + coords={'x': (['x'], [1, 3]), 'z': (['x'], ['f', 'g'])}) + actual = concat([ds1, ds2], 'y', prealigned=True) + # the concatenated datset should just ignore all coords in ds2 and only + # concat data variables, regardless of whether they are the same + expected = Dataset({'foo': (['y', 'x'], [[1, 2], [1, 2]])}, + coords=ds1.coords) + self.assertDatasetIdentical(expected, actual) + + # concat over existing dimension + data = create_test_data() + for k in list(data): + if 'dim3' in data[k].dims: + del data[k] + + split_data = [data.isel(dim1=slice(3)), + data.isel(dim1=slice(3, None))] + concat_data = concat(split_data, 'dim1', prealigned=True) + self.assertDatasetIdentical(data, concat_data) + def test_concat_errors(self): data = create_test_data() split_data = [data.isel(dim1=slice(3)),