From f7720baa659a05946e0a1d20fdf2488513819eaa Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 3 Jan 2016 19:56:16 -0800 Subject: [PATCH 01/13] Basic support for MultiIndex --- xray/core/indexing.py | 54 ++++++++++++++++++++++++------------- xray/core/utils.py | 7 +++++ xray/core/variable.py | 36 +++++++------------------ xray/test/test_dataarray.py | 15 ++++++++--- xray/test/test_indexing.py | 11 ++++++++ xray/test/test_variable.py | 6 +++++ 6 files changed, 80 insertions(+), 49 deletions(-) diff --git a/xray/core/indexing.py b/xray/core/indexing.py index 277428966a5..d03d110b65c 100644 --- a/xray/core/indexing.py +++ b/xray/core/indexing.py @@ -117,6 +117,24 @@ def _try_get_item(x): return x +def _asarray_tuplesafe(values): + """ + Convert values into a numpy array of at most 1-dimension, while preserving + tuples. + + Adapted from pandas.core.common._asarray_tuplesafe + """ + if isinstance(values, tuple): + result = utils.tuple_to_0darray(values) + else: + result = np.asarray(values) + if result.ndim == 2: + result = np.empty(len(values), dtype=object) + result[:] = values + + return result + + def convert_label_indexer(index, label, index_name='', method=None, tolerance=None): """Given a pandas.Index (or xray.Coordinate) and labels (e.g., from @@ -149,9 +167,9 @@ def convert_label_indexer(index, label, index_name='', method=None, 'dimension %r with a slice over integer positions; ' 'the index is unsorted or non-unique') else: - label = np.asarray(label) + label = _asarray_tuplesafe(label) if label.ndim == 0: - indexer = index.get_loc(np.asscalar(label), **kwargs) + indexer = index.get_loc(label.item(), **kwargs) elif label.dtype.kind == 'b': indexer, = np.nonzero(label) else: @@ -399,22 +417,22 @@ def __getitem__(self, key): # objects don't like tuples) key, = key - if isinstance(key, (int, np.integer)): - value = self.array[key] - if value is pd.NaT: - # work around the impossibility of casting NaT with asarray - # note: it probably would be better in general to return - # pd.Timestamp rather np.than datetime64 but this is easier - # (for now) - value = np.datetime64('NaT', 'ns') - elif isinstance(value, timedelta): - value = np.timedelta64(getattr(value, 'value', value), 'ns') - else: - value = np.asarray(value, dtype=self.dtype) - else: - value = PandasIndexAdapter(self.array[key], dtype=self.dtype) - - return value + result = self.array[key] + + if isinstance(result, pd.Index): + result = PandasIndexAdapter(result, dtype=self.dtype) + elif result is pd.NaT: + # work around the impossibility of casting NaT with asarray + # note: it probably would be better in general to return + # pd.Timestamp rather np.than datetime64 but this is easier + # (for now) + result = np.datetime64('NaT', 'ns') + elif isinstance(result, timedelta): + result = np.timedelta64(getattr(result, 'value', result), 'ns') + elif self.dtype != object: + result = np.asarray(result, dtype=self.dtype) + + return result def __repr__(self): return ('%s(array=%r, dtype=%r)' diff --git a/xray/core/utils.py b/xray/core/utils.py index 660a69af211..2ea84da9e02 100644 --- a/xray/core/utils.py +++ b/xray/core/utils.py @@ -178,6 +178,13 @@ def is_valid_numpy_dtype(dtype): return True +def tuple_to_0darray(value): + result = np.empty((1,), dtype=object) + result[:] = [value] + result.shape = () + return result + + def dict_equiv(first, second, compat=equivalent): """Test equivalence of two dict-like objects. If any of the values are numpy arrays, compare them correctly. diff --git a/xray/core/variable.py b/xray/core/variable.py index 405bb906d52..64f0d8b0c15 100644 --- a/xray/core/variable.py +++ b/xray/core/variable.py @@ -105,15 +105,15 @@ def as_compatible_data(data, fastpath=False): return data if isinstance(data, pd.Index): - if isinstance(data, pd.MultiIndex): - raise NotImplementedError( - 'no support yet for using a pandas.MultiIndex in an ' - 'xray.Coordinate') return _maybe_wrap_data(data) + if isinstance(data, tuple): + data = utils.tuple_to_0darray(data) + if isinstance(data, pd.Timestamp): # TODO: convert, handle datetime objects, too data = np.datetime64(data.value, 'ns') + if isinstance(data, timedelta): data = np.timedelta64(getattr(data, 'value', data), 'ns') @@ -250,7 +250,7 @@ def data(self, data): self._data = data def _data_cached(self): - if not isinstance(self._data, np.ndarray): + if not isinstance(self._data, (np.ndarray, PandasIndexAdapter)): self._data = np.asarray(self._data) return self._data @@ -989,9 +989,10 @@ def to_index(self): # n.b. creating a new pandas.Index from an old pandas.Index is # basically free as pandas.Index objects are immutable assert self.ndim == 1 - return pd.Index(self._data_cached().array, name=self.dims[0]) - - # pandas.Index like properties: + index = self._data_cached().array + if not isinstance(index, pd.MultiIndex): + index = index.set_names(self.name) + return index @property def name(self): @@ -1001,25 +1002,6 @@ def name(self): def name(self, value): raise AttributeError('cannot modify name of Coordinate in-place') - def get_indexer(self, label): - return self.to_index().get_indexer(label) - - def slice_indexer(self, start=None, stop=None, step=None): - return self.to_index().slice_indexer(start, stop, step) - - def slice_locs(self, start=None, stop=None): - return self.to_index().slice_locs(start, stop) - - def get_loc(self, label): - return self.to_index().get_loc(label) - - @property - def is_monotonic(self): - return self.to_index().is_monotonic - - def is_numeric(self): - return self.to_index().is_numeric() - def _unified_dims(variables): # validate dimensions diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py index 854069ce7fb..1e2a9daf386 100644 --- a/xray/test/test_dataarray.py +++ b/xray/test/test_dataarray.py @@ -215,10 +215,6 @@ def test_constructor_from_self_described(self): actual = DataArray(Coordinate('foo', ['a', 'b'])) self.assertDataArrayIdentical(expected, actual) - s = pd.Series(range(2), pd.MultiIndex.from_product([['a', 'b'], [0]])) - with self.assertRaisesRegexp(NotImplementedError, 'MultiIndex'): - DataArray(s) - def test_constructor_from_0d(self): expected = Dataset({None: ([], 0)})[None] actual = DataArray(0) @@ -481,6 +477,17 @@ def test_loc_single_boolean(self): self.assertEqual(data.loc[True], 0) self.assertEqual(data.loc[False], 1) + def test_multiindex(self): + idx = pd.MultiIndex.from_product([list('abc'), [0, 1]]) + data = DataArray(range(6), [('x', idx)]) + + self.assertDataArrayIdentical(data.sel(x=('a', 0)), data.isel(x=0)) + self.assertDataArrayIdentical(data.sel(x=('c', 1)), data.isel(x=-1)) + self.assertDataArrayIdentical(data.sel(x=[('a', 0)]), data.isel(x=[0])) + self.assertDataArrayIdentical(data.sel(x=[('a', 0), ('c', 1)]), + data.isel(x=[0, -1])) + self.assertDataArrayIdentical(data.sel(x='a'), data.isel(x=slice(2))) + def test_time_components(self): dates = pd.date_range('2000-01-01', periods=10) da = DataArray(np.arange(1, 11), [('time', dates)]) diff --git a/xray/test/test_indexing.py b/xray/test/test_indexing.py index 51d5862d9f5..3b335204302 100644 --- a/xray/test/test_indexing.py +++ b/xray/test/test_indexing.py @@ -67,6 +67,17 @@ def test_orthogonal_indexer(self): with self.assertRaisesRegexp(ValueError, 'invalid subkey'): print(indexing.orthogonal_indexer((1.5 * y, 1.5 * y), x.shape)) + def test_asarray_tuplesafe(self): + res = indexing._asarray_tuplesafe(('a', 1)) + assert isinstance(res, np.ndarray) + assert res.ndim == 0 + assert res.item() == ('a', 1) + + res = indexing._asarray_tuplesafe([(0,), (1,)]) + assert res.shape == (2,) + assert res[0] == (0,) + assert res[1] == (1,) + def test_convert_label_indexer(self): # TODO: add tests that aren't just for edge cases index = pd.Index([1, 2, 3]) diff --git a/xray/test/test_variable.py b/xray/test/test_variable.py index 272816ca432..48a541dba86 100644 --- a/xray/test/test_variable.py +++ b/xray/test/test_variable.py @@ -416,6 +416,12 @@ def test_pandas_datetime64_with_tz(self): # pandas is new enough that it has datetime64 with timezone dtype assert v.dtype == 'object' + def test_multiindex(self): + idx = pd.MultiIndex.from_product([list('abc'), [0, 1]]) + v = self.cls('x', idx) + self.assertVariableIdentical(Variable((), ('a', 0)), v[0]) + self.assertVariableIdentical(v, v[:]) + class TestVariable(TestCase, VariableSubclassTestCases): cls = staticmethod(Variable) From 356424663d39cf3f7615f4f57e4c87b05b92ba23 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 3 Jan 2016 18:04:55 -0800 Subject: [PATCH 02/13] Stack xray.Variable dimensions --- xray/core/variable.py | 46 ++++++++++++++++++++++++++++++++++++++ xray/test/test_variable.py | 22 ++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/xray/core/variable.py b/xray/core/variable.py index 64f0d8b0c15..bb32c5788a2 100644 --- a/xray/core/variable.py +++ b/xray/core/variable.py @@ -720,6 +720,52 @@ def expand_dims(self, dims, shape=None): self._encoding, fastpath=True) return expanded_var.transpose(*dims) + def _stack_once(self, dims, new_dim): + if not set(dims) <= set(self.dims): + raise ValueError('invalid existing dimensions: %s' % dims) + + if new_dim in self.dims: + raise ValueError('cannot create a new dimension with the same ' + 'name as an existing dimension') + + if len(dims) == 0: + # don't stack + return self.copy(deep=False) + + other_dims = [d for d in self.dims if d not in dims] + new_dim_order = other_dims + list(dims) + reordered = self.transpose(*new_dim_order) + + new_shape = reordered.shape[:len(other_dims)] + (-1,) + new_data = reordered.data.reshape(new_shape) + new_dims = reordered.dims[:len(other_dims)] + (new_dim,) + + return Variable(new_dims, new_data, self._attrs, self._encoding, + fastpath=True) + + def stack(self, **dimensions): + """ + Stack any number of existing dimensions into new dimensions. + + New dimensions will be added at the end, and the order of the data + along each new dimension will be in contiguous (C) order. + + Parameters + ---------- + **dimensions : keyword arguments of the form new_name=(dim1, dim2, ...) + Names of new dimensions, and the existing dimensions that they + replace. + + Returns + ------- + stack : Variable + Variable with the same attributes but stacked data. + """ + result = self + for new_dim, dims in dimensions.items(): + result = result._stack_once(dims, new_dim) + return result + def fillna(self, value): return self._fillna(value) diff --git a/xray/test/test_variable.py b/xray/test/test_variable.py index 48a541dba86..00ed3d05ae2 100644 --- a/xray/test/test_variable.py +++ b/xray/test/test_variable.py @@ -760,6 +760,28 @@ def test_expand_dims(self): with self.assertRaisesRegexp(ValueError, 'must be a superset'): v.expand_dims(['z']) + def test_stack(self): + v = Variable(['x', 'y'], [[0, 1], [2, 3]], {'foo': 'bar'}) + actual = v.stack(z=('x', 'y')) + expected = Variable('z', [0, 1, 2, 3], v.attrs) + self.assertVariableIdentical(actual, expected) + + actual = v.stack(z=('x',)) + expected = Variable(('y', 'z'), v.data.T, v.attrs) + self.assertVariableIdentical(actual, expected) + + actual = v.stack(z=(),) + self.assertVariableIdentical(actual, v) + + actual = v.stack(X=('x',), Y=('y',)).transpose('X', 'Y') + expected = Variable(('X', 'Y'), v.data, v.attrs) + self.assertVariableIdentical(actual, expected) + + with self.assertRaisesRegexp(ValueError, 'invalid existing dim'): + v.stack(z=('x1',)) + with self.assertRaisesRegexp(ValueError, 'cannot create a new dim'): + v.stack(x=('x',)) + def test_broadcasting_math(self): x = np.random.randn(2, 3) v = Variable(['a', 'b'], x) From 26deef132d0fbde83ee35c20663021fc062b64e1 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 3 Jan 2016 19:48:07 -0800 Subject: [PATCH 03/13] Add Variable.unstack --- xray/core/variable.py | 66 +++++++++++++++++++++++++++++++++++--- xray/test/test_variable.py | 42 ++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 4 deletions(-) diff --git a/xray/core/variable.py b/xray/core/variable.py index bb32c5788a2..dee02e1ba05 100644 --- a/xray/core/variable.py +++ b/xray/core/variable.py @@ -733,8 +733,8 @@ def _stack_once(self, dims, new_dim): return self.copy(deep=False) other_dims = [d for d in self.dims if d not in dims] - new_dim_order = other_dims + list(dims) - reordered = self.transpose(*new_dim_order) + dim_order = other_dims + list(dims) + reordered = self.transpose(*dim_order) new_shape = reordered.shape[:len(other_dims)] + (-1,) new_data = reordered.data.reshape(new_shape) @@ -745,7 +745,7 @@ def _stack_once(self, dims, new_dim): def stack(self, **dimensions): """ - Stack any number of existing dimensions into new dimensions. + Stack any number of existing dimensions into a single new dimension. New dimensions will be added at the end, and the order of the data along each new dimension will be in contiguous (C) order. @@ -758,14 +758,72 @@ def stack(self, **dimensions): Returns ------- - stack : Variable + stacked : Variable Variable with the same attributes but stacked data. + + See also + -------- + Variable.unstack """ result = self for new_dim, dims in dimensions.items(): result = result._stack_once(dims, new_dim) return result + def _unstack_once(self, dims, old_dim): + new_dim_names = tuple(dims.keys()) + new_dim_sizes = tuple(dims.values()) + + if old_dim not in self.dims: + raise ValueError('invalid existing dimension: %s' % old_dim) + + if set(new_dim_names).intersection(self.dims): + raise ValueError('cannot create a new dimension with the same ' + 'name as an existing dimension') + + axis = self.get_axis_num(old_dim) + if np.prod(new_dim_sizes) != self.shape[axis]: + raise ValueError('the product of the new dimension sizes must ' + 'equal the size of the old dimension') + + other_dims = [d for d in self.dims if d != old_dim] + dim_order = other_dims + [old_dim] + reordered = self.transpose(*dim_order) + + new_shape = reordered.shape[:len(other_dims)] + new_dim_sizes + new_data = reordered.data.reshape(new_shape) + new_dims = reordered.dims[:len(other_dims)] + new_dim_names + + return Variable(new_dims, new_data, self._attrs, self._encoding, + fastpath=True) + + def unstack(self, **dimensions): + """ + Unstack an existing dimensions into multiple new dimensions. + + New dimensions will be added at the end, and the order of the data + along each new dimension will be in contiguous (C) order. + + Parameters + ---------- + **dimensions : keyword arguments of the form old_dim={dim1: size1, ...} + Names of existing dimensions, and the new dimensions and sizes that they + map to. + + Returns + ------- + unstacked : Variable + Variable with the same attributes but unstacked data. + + See also + -------- + Variable.stack + """ + result = self + for old_dim, dims in dimensions.items(): + result = result._unstack_once(dims, old_dim) + return result + def fillna(self, value): return self._fillna(value) diff --git a/xray/test/test_variable.py b/xray/test/test_variable.py index 00ed3d05ae2..501376b2d3a 100644 --- a/xray/test/test_variable.py +++ b/xray/test/test_variable.py @@ -777,11 +777,53 @@ def test_stack(self): expected = Variable(('X', 'Y'), v.data, v.attrs) self.assertVariableIdentical(actual, expected) + def test_stack_errors(self): + v = Variable(['x', 'y'], [[0, 1], [2, 3]], {'foo': 'bar'}) + with self.assertRaisesRegexp(ValueError, 'invalid existing dim'): v.stack(z=('x1',)) with self.assertRaisesRegexp(ValueError, 'cannot create a new dim'): v.stack(x=('x',)) + def test_unstack(self): + v = Variable('z', [0, 1, 2, 3], {'foo': 'bar'}) + actual = v.unstack(z=OrderedDict([('x', 2), ('y', 2)])) + expected = Variable(('x', 'y'), [[0, 1], [2, 3]], v.attrs) + self.assertVariableIdentical(actual, expected) + + actual = v.unstack(z=OrderedDict([('x', 4), ('y', 1)])) + expected = Variable(('x', 'y'), [[0], [1], [2], [3]], v.attrs) + self.assertVariableIdentical(actual, expected) + + actual = v.unstack(z=OrderedDict([('x', 4)])) + expected = Variable('x', [0, 1, 2, 3], v.attrs) + self.assertVariableIdentical(actual, expected) + + def test_unstack_errors(self): + v = Variable('z', [0, 1, 2, 3]) + with self.assertRaisesRegexp(ValueError, 'invalid existing dim'): + v.unstack(foo={'x': 4}) + with self.assertRaisesRegexp(ValueError, 'cannot create a new dim'): + v.stack(z=('z',)) + with self.assertRaisesRegexp(ValueError, 'the product of the new dim'): + v.unstack(z={'x': 5}) + + def test_unstack_2d(self): + v = Variable(['x', 'y'], [[0, 1], [2, 3]]) + actual = v.unstack(y={'z': 2}) + expected = Variable(['x', 'z'], v.data) + self.assertVariableIdentical(actual, expected) + + actual = v.unstack(x={'z': 2}) + expected = Variable(['y', 'z'], v.data.T) + self.assertVariableIdentical(actual, expected) + + def test_stack_unstack_consistency(self): + v = Variable(['x', 'y'], [[0, 1], [2, 3]]) + actual = (v.stack(z=('x', 'y')) + .unstack(z=OrderedDict([('x', 2), ('y', 2)]))) + self.assertVariableIdentical(actual, v) + def test_broadcasting_math(self): x = np.random.randn(2, 3) v = Variable(['a', 'b'], x) From 6b6d82eb75563bbc4a2785e08c8898a93fe7bc51 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 3 Jan 2016 20:41:14 -0800 Subject: [PATCH 04/13] Add Dataset.stack and Dataset.unstack --- xray/core/dataset.py | 100 ++++++++++++++++++++++++++++++++++++++ xray/core/variable.py | 2 +- xray/test/test_dataset.py | 51 +++++++++++++++++++ 3 files changed, 152 insertions(+), 1 deletion(-) diff --git a/xray/core/dataset.py b/xray/core/dataset.py index c974626d3f5..385bedf842e 100644 --- a/xray/core/dataset.py +++ b/xray/core/dataset.py @@ -1308,6 +1308,106 @@ def swap_dims(self, dims_dict, inplace=False): return self._replace_vars_and_dims(variables, coord_names, inplace=inplace) + def _stack_once(self, dims, new_dim): + variables = OrderedDict() + for name, var in self.variables.items(): + if name not in dims: + if any(d in var.dims for d in dims): + add_dims = [d for d in dims if d not in var.dims] + vdims = list(var.dims) + add_dims + shape = [self.dims[d] for d in vdims] + exp_var = var.expand_dims(vdims, shape) + stacked_var = exp_var.stack(**{new_dim: dims}) + variables[name] = stacked_var + else: + variables[name] = var.copy(deep=False) + + idx = pd.MultiIndex.from_product([self.indexes[d] for d in dims], + names=dims) + variables[new_dim] = Coordinate(new_dim, idx) + + coord_names = set(self._coord_names) - set(dims) | set([new_dim]) + + return self._replace_vars_and_dims(variables, coord_names) + + def stack(self, **dimensions): + """ + Stack any number of existing dimensions into a single new dimension. + + New dimensions will be added at the end, and the corresponding + coordinate variables will be combined into a MultiIndex. + + Parameters + ---------- + **dimensions : keyword arguments of the form new_name=(dim1, dim2, ...) + Names of new dimensions, and the existing dimensions that they + replace. + + Returns + ------- + stacked : Dataset + Dataset with stacked data. + + See also + -------- + Dataset.unstack + """ + result = self + for new_dim, dims in dimensions.items(): + result = result._stack_once(dims, new_dim) + return result + + def unstack(self, dim): + """ + Unstack an existing dimension corresponding to a MultiIndex into + multiple new dimensions. + + New dimensions will be added at the end. + + Parameters + ---------- + dim : str + Name of the existing dimension to unstack. + + Returns + ------- + unstacked : Dataset + Dataset with unstacked data. + + See also + -------- + Dataset.stack + """ + if dim not in self.dims: + raise ValueError('invalid dimension: %s' % dim) + + index = self.indexes[dim] + if not isinstance(index, pd.MultiIndex): + raise ValueError('cannot unstack a dimension that does not have ' + 'a MultiIndex') + + new_dim_names = index.names + if any(name is None for name in new_dim_names): + raise ValueError('cannot unstack dimension with unnamed levels') + + new_dim_sizes = [lev.size for lev in index.levels] + + variables = OrderedDict() + for name, var in self.variables.items(): + if name != dim: + if dim in var.dims: + new_dims = OrderedDict(zip(new_dim_names, new_dim_sizes)) + variables[name] = var.unstack(**{dim: new_dims}) + else: + variables[name] = var + + for name, lev in zip(new_dim_names, index.levels): + variables[name] = Coordinate(name, lev) + + coord_names = set(self._coord_names) - set([dim]) | set(new_dim_names) + + return self._replace_vars_and_dims(variables, coord_names) + def update(self, other, inplace=True): """Update this dataset's variables with those from another dataset. diff --git a/xray/core/variable.py b/xray/core/variable.py index dee02e1ba05..c1c69efe9e6 100644 --- a/xray/core/variable.py +++ b/xray/core/variable.py @@ -799,7 +799,7 @@ def _unstack_once(self, dims, old_dim): def unstack(self, **dimensions): """ - Unstack an existing dimensions into multiple new dimensions. + Unstack an existing dimension into multiple new dimensions. New dimensions will be added at the end, and the order of the data along each new dimension will be in contiguous (C) order. diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py index 6443a774b2d..505be42a8a7 100644 --- a/xray/test/test_dataset.py +++ b/xray/test/test_dataset.py @@ -1133,6 +1133,57 @@ def test_swap_dims(self): with self.assertRaisesRegexp(ValueError, 'replacement dimension'): original.swap_dims({'x': 'z'}) + def test_stack(self): + ds = Dataset({'a': ('x', [0, 1]), + 'b': (('x', 'y'), [[0, 1], [2, 3]]), + 'y': ['a', 'b']}) + + exp_index = pd.MultiIndex.from_product([[0, 1], ['a', 'b']], + names=['x', 'y']) + expected = Dataset({'a': ('z', [0, 0, 1, 1]), + 'b': ('z', [0, 1, 2, 3]), + 'z': exp_index}) + actual = ds.stack(z=['x', 'y']) + self.assertDatasetIdentical(expected, actual) + + exp_index = pd.MultiIndex.from_product([['a', 'b'], [0, 1]], + names=['y', 'x']) + expected = Dataset({'a': ('z', [0, 1, 0, 1]), + 'b': ('z', [0, 2, 1, 3]), + 'z': exp_index}) + actual = ds.stack(z=['y', 'x']) + self.assertDatasetIdentical(expected, actual) + + def test_unstack(self): + index = pd.MultiIndex.from_product([[0, 1], ['a', 'b']], + names=['x', 'y']) + ds = Dataset({'b': ('z', [0, 1, 2, 3]), 'z': index}) + expected = Dataset({'b': (('x', 'y'), [[0, 1], [2, 3]]), + 'y': ['a', 'b']}) + actual = ds.unstack('z') + self.assertDatasetIdentical(actual, expected) + + def test_unstack_errors(self): + ds = Dataset({'x': [1, 2, 3]}) + with self.assertRaisesRegexp(ValueError, 'invalid dimension'): + ds.unstack('foo') + with self.assertRaisesRegexp(ValueError, 'does not have a MultiIndex'): + ds.unstack('x') + + ds2 = Dataset({'x': pd.Index([(0, 1)])}) + with self.assertRaisesRegexp(ValueError, 'unnamed levels'): + ds2.unstack('x') + + def test_stack_unstack(self): + ds = Dataset({'a': ('x', [0, 1]), + 'b': (('x', 'y'), [[0, 1], [2, 3]]), + 'y': ['a', 'b']}) + actual = ds.stack(z=['x', 'y']).unstack('z') + assert actual.broadcast_equals(ds) + + actual = ds[['b']].stack(z=['x', 'y']).unstack('z') + assert actual.identical(ds[['b']]) + def test_update(self): data = create_test_data(seed=0) expected = data.copy() From d8ce68eba59feecaa226b6dd6172f084c3cb5c23 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 3 Jan 2016 20:46:27 -0800 Subject: [PATCH 05/13] Add DataArray.stack and .unstack --- xray/core/dataarray.py | 49 +++++++++++++++++++++++++++++++++++++ xray/test/test_dataarray.py | 5 ++++ 2 files changed, 54 insertions(+) diff --git a/xray/core/dataarray.py b/xray/core/dataarray.py index f78663842bb..c701a8f83a2 100644 --- a/xray/core/dataarray.py +++ b/xray/core/dataarray.py @@ -763,6 +763,55 @@ def swap_dims(self, dims_dict): ds = self._to_temp_dataset().swap_dims(dims_dict) return self._from_temp_dataset(ds) + def stack(self, **dimensions): + """ + Stack any number of existing dimensions into a single new dimension. + + New dimensions will be added at the end, and the corresponding + coordinate variables will be combined into a MultiIndex. + + Parameters + ---------- + **dimensions : keyword arguments of the form new_name=(dim1, dim2, ...) + Names of new dimensions, and the existing dimensions that they + replace. + + Returns + ------- + stacked : DataArray + DataArray with stacked data. + + See also + -------- + DataArray.unstack + """ + ds = self._to_temp_dataset().stack(**dimensions) + return self._from_temp_dataset(ds) + + def unstack(self, dim): + """ + Unstack an existing dimension corresponding to a MultiIndex into + multiple new dimensions. + + New dimensions will be added at the end. + + Parameters + ---------- + dim : str + Name of the existing dimension to unstack. + + Returns + ------- + unstacked : DataArray + Array with unstacked data. + + See also + -------- + DataArray.stack + """ + ds = self._to_temp_dataset().unstack(dim) + return self._from_temp_dataset(ds) + def transpose(self, *dims): """Return a new DataArray object with transposed dimensions. diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py index 1e2a9daf386..8246afe9b79 100644 --- a/xray/test/test_dataarray.py +++ b/xray/test/test_dataarray.py @@ -863,6 +863,11 @@ def test_dataset_math(self): actual['tmin'] -= obs['tmin'] self.assertDatasetIdentical(actual, expected) + def test_stack_unstack(self): + orig = DataArray([[0, 1], [2, 3]], dims=['x', 'y'], attrs={'foo': 2}) + actual = orig.stack(z=['x', 'y']).unstack('z') + self.assertDataArrayIdentical(orig, actual) + def test_transpose(self): self.assertVariableEqual(self.dv.variable.transpose(), self.dv.transpose()) From 66cb58029993be78b3895ec0bd2f30d05744de13 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 3 Jan 2016 20:55:00 -0800 Subject: [PATCH 06/13] add test for lazy stacking with dask --- xray/test/test_dask.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/xray/test/test_dask.py b/xray/test/test_dask.py index 7e245b233ba..215cfaa13f8 100644 --- a/xray/test/test_dask.py +++ b/xray/test/test_dask.py @@ -1,4 +1,5 @@ import numpy as np +import pandas as pd from xray import Variable, DataArray, Dataset, concat import xray.ufuncs as xu @@ -288,3 +289,14 @@ def counting_get(*args, **kwargs): with dask.set_options(get=counting_get): ds.load() self.assertEqual(count[0], 1) + + def test_stack(self): + data = da.random.normal(size=(2, 3, 4), chunks=(1, 3, 4)) + arr = DataArray(data, dims=('w', 'x', 'y')) + stacked = arr.stack(z=('x', 'y')) + z = pd.MultiIndex.from_product([np.arange(3), np.arange(4)], + names=['x', 'y']) + expected = DataArray(data.reshape(2, -1), {'w': [0, 1], 'z': z}, + dims=['w', 'z']) + assert stacked.data.chunks == expected.data.chunks + self.assertLazyAndIdentical(expected, stacked) From 93644490c3841c2e0fd7ae9568b8c04fd187acfd Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 3 Jan 2016 21:13:47 -0800 Subject: [PATCH 07/13] Add an example to DataArray.stack --- xray/core/dataarray.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/xray/core/dataarray.py b/xray/core/dataarray.py index c701a8f83a2..95380f80811 100644 --- a/xray/core/dataarray.py +++ b/xray/core/dataarray.py @@ -781,6 +781,24 @@ def stack(self, **dimensions): stacked : DataArray DataArray with stacked data. + Example + ------- + + >>> arr = DataArray(np.arange(6).reshape(2, 3), + ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) + >>> arr + + array([[0, 1, 2], + [3, 4, 5]]) + Coordinates: + * x (x) |S1 'a' 'b' + * y (y) int64 0 1 2 + >>> stacked = arr.stack(z=('x', 'y')) + >>> stacked.indexes['z'] + MultiIndex(levels=[[u'a', u'b'], [0, 1, 2]], + labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], + names=[u'x', u'y']) + See also -------- DataArray.unstack From da94b4f7eeb732f801d01145d3959c66b1142a22 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 3 Jan 2016 21:22:29 -0800 Subject: [PATCH 08/13] reindex in .unstack for pandas consistency --- xray/core/dataset.py | 5 ++++- xray/test/test_dataarray.py | 9 +++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/xray/core/dataset.py b/xray/core/dataset.py index 385bedf842e..90f89f14fee 100644 --- a/xray/core/dataset.py +++ b/xray/core/dataset.py @@ -1386,6 +1386,9 @@ def unstack(self, dim): raise ValueError('cannot unstack a dimension that does not have ' 'a MultiIndex') + full_idx = pd.MultiIndex.from_product(index.levels, names=index.names) + obj = self.reindex(copy=False, **{dim: full_idx}) + new_dim_names = index.names if any(name is None for name in new_dim_names): raise ValueError('cannot unstack dimension with unnamed levels') @@ -1393,7 +1396,7 @@ def unstack(self, dim): new_dim_sizes = [lev.size for lev in index.levels] variables = OrderedDict() - for name, var in self.variables.items(): + for name, var in obj.variables.items(): if name != dim: if dim in var.dims: new_dims = OrderedDict(zip(new_dim_names, new_dim_sizes)) diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py index 8246afe9b79..7c71b9ffafb 100644 --- a/xray/test/test_dataarray.py +++ b/xray/test/test_dataarray.py @@ -868,6 +868,15 @@ def test_stack_unstack(self): actual = orig.stack(z=['x', 'y']).unstack('z') self.assertDataArrayIdentical(orig, actual) + def test_unstack_pandas_consistency(self): + df = pd.DataFrame({'foo': range(3), + 'x': ['a', 'b', 'b'], + 'y': [0, 0, 1]}) + s = df.set_index(['x', 'y'])['foo'] + expected = DataArray(s.unstack(), name='foo') + actual = DataArray(s, dims='z').unstack('z') + self.assertDataArrayIdentical(expected, actual) + def test_transpose(self): self.assertVariableEqual(self.dv.variable.transpose(), self.dv.transpose()) From 9ad477321c47d70545b026ffa46f795b97b98bd6 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 3 Jan 2016 21:40:13 -0800 Subject: [PATCH 09/13] what's new updates --- doc/whats-new.rst | 83 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 67 insertions(+), 16 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a8e97e102cd..371f1cd1e34 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -61,26 +61,58 @@ Breaking changes .. _this stackoverflow report: http://stackoverflow.com/questions/33158558/python-xray-extract-first-and-last-time-value-within-each-month-of-a-timeseries -Bug fixes -~~~~~~~~~ +Enhancements +~~~~~~~~~~~~ -- Fixes for several issues found on ``DataArray`` objects with the same name - as one of their coordinates (see :ref:`v0.7.0.breaking` for more details). -- Attempting to assign a ``Dataset`` or ``DataArray`` variable/attribute using - attribute-style syntax (e.g., ``ds.foo = 42``) now raises an error rather - than silently failing (:issue:`656`, :issue:`714`). +- Basic support for :py:class:`~pandas.MultiIndex` coordinates on xray objects, including + indexing, :py:meth:`~DataArray.stack` and :py:meth:`~DataArray.unstack`: -- ``DataArray.to_masked_array`` always returns masked array with mask being an array -(not a scalar value) (:issue:`684`) -- You can now pass pandas objects with non-numpy dtypes (e.g., ``categorical`` - or ``datetime64`` with a timezone) into xray without an error - (:issue:`716`). + .. ipython:: + :verbatim: -v0.6.2 (unreleased) -------------------- + In [7]: df = pd.DataFrame({'foo': range(3), + ...: 'x': ['a', 'b', 'b'], + ...: 'y': [0, 0, 1]}) -Enhancements -~~~~~~~~~~~~ + In [8]: s = df.set_index(['x', 'y'])['foo'] + + In [12]: arr = xray.DataArray(s, dims='z') + + In [13]: arr + Out[13]: + + array([0, 1, 2]) + Coordinates: + * z (z) object ('a', 0) ('b', 0) ('b', 1) + + In [19]: arr.indexes['z'] + Out[19]: + MultiIndex(levels=[[u'a', u'b'], [0, 1]], + labels=[[0, 1, 1], [0, 0, 1]], + names=[u'x', u'y']) + + In [14]: arr.unstack('z') + Out[14]: + + array([[ 0., nan], + [ 1., 2.]]) + Coordinates: + * x (x) object 'a' 'b' + * y (y) int64 0 1 + + In [26]: arr.unstack('z').stack(z=('x', 'y')) + Out[26]: + + array([ 0., nan, 1., 2.]) + Coordinates: + * z (z) object ('a', 0) ('a', 1) ('b', 0) ('b', 1) + + .. warning:: + + xray's MultiIndex support is still experimental, and we have a long to- + do list of desired additions (:issue:`719`). For example, you cannot yet + save a MultiIndex to a netCDF file. User contributions in this area + would be greatly appreciate :). - Support for reading GRIB, HDF4 and other file formats via PyNIO_. See :ref:`io.pynio` for more details. @@ -120,8 +152,27 @@ Enhancements Bug fixes ~~~~~~~~~ +- Fixes for several issues found on ``DataArray`` objects with the same name + as one of their coordinates (see :ref:`v0.7.0.breaking` for more details). + +- ``DataArray.to_masked_array`` always returns masked array with mask being an array +(not a scalar value) (:issue:`684`) + - Allows for (imperfect) repr of Coords when underlying index is PeriodIndex (:issue:`645`). +- Fixes for several issues found on ``DataArray`` objects with the same name + as one of their coordinates (see :ref:`v0.7.0.breaking` for more details). +- Attempting to assign a ``Dataset`` or ``DataArray`` variable/attribute using + attribute-style syntax (e.g., ``ds.foo = 42``) now raises an error rather + than silently failing (:issue:`656`, :issue:`714`). + +- ``DataArray.to_masked_array`` always returns masked array with mask being an array +(not a scalar value) (:issue:`684`) + +- You can now pass pandas objects with non-numpy dtypes (e.g., ``categorical`` + or ``datetime64`` with a timezone) into xray without an error + (:issue:`716`). + v0.6.1 (21 October 2015) ------------------------ From 8e3f188dba39095a57e6700fdc52e3d5189677f4 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 3 Jan 2016 22:23:21 -0800 Subject: [PATCH 10/13] Fix pandas < v0.15.2 and GH700 --- xray/backends/api.py | 5 ++--- xray/core/indexing.py | 9 +++++++-- xray/test/test_dataarray.py | 9 +++++++++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/xray/backends/api.py b/xray/backends/api.py index 9bf4cf05b98..4ce4d1405d4 100644 --- a/xray/backends/api.py +++ b/xray/backends/api.py @@ -170,9 +170,8 @@ def maybe_decode_store(store, lock=False): else: file_arg = filename_or_obj token = tokenize(file_arg, group, decode_cf, mask_and_scale, - decode_times, concat_characters, - decode_coords, engine, chunks, lock, - drop_variables) + decode_times, concat_characters, decode_coords, + engine, chunks, drop_variables) name_prefix = '%s:%s/' % (filename_or_obj, group or '') ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token, lock=lock) diff --git a/xray/core/indexing.py b/xray/core/indexing.py index d03d110b65c..2dffff2910c 100644 --- a/xray/core/indexing.py +++ b/xray/core/indexing.py @@ -386,10 +386,10 @@ class PandasIndexAdapter(utils.NDArrayMixin): def __init__(self, array, dtype=None): self.array = utils.safe_cast_to_index(array) if dtype is None: - # if a PeriodIndex, force an object dtype if isinstance(array, pd.PeriodIndex): dtype = np.dtype('O') elif hasattr(array, 'categories'): + # category isn't a real numpy dtype dtype = array.categories.dtype elif not utils.is_valid_numpy_dtype(array.dtype): dtype = np.dtype('O') @@ -409,7 +409,12 @@ def __array__(self, dtype=None): with suppress(AttributeError): # this might not be public API array = array.asobject - return np.asarray(array, dtype) + return np.asarray(array.values, dtype=dtype) + + @property + def shape(self): + # .shape is broken on pandas prior to v0.15.2 + return (len(self.array),) def __getitem__(self, key): if isinstance(key, tuple) and len(key) == 1: diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py index 7c71b9ffafb..c1135601ef8 100644 --- a/xray/test/test_dataarray.py +++ b/xray/test/test_dataarray.py @@ -1413,6 +1413,15 @@ def test_to_and_from_series(self): self.assertDataArrayIdentical(expected_da, DataArray.from_series(actual)) + def test_series_categorical_index(self): + # regression test for GH700 + if not hasattr(pd, 'CategoricalIndex'): + raise unittest.SkipTest('requires pandas with CategoricalIndex') + + s = pd.Series(range(5), index=pd.CategoricalIndex(list('aabbc'))) + arr = DataArray(s) + assert "'a'" in repr(arr) # should not error + def test_to_masked_array(self): rs = np.random.RandomState(44) x = rs.random_sample(size=(10, 20)) From 0b728957a2c3621686f67d5b19958f596041d43e Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 17 Jan 2016 13:11:44 -0800 Subject: [PATCH 11/13] Documentation for reshaping data --- doc/api.rst | 44 +++++++++----- doc/computation.rst | 16 ++--- doc/data-structures.rst | 46 +-------------- doc/index.rst | 1 + doc/reshaping.rst | 125 ++++++++++++++++++++++++++++++++++++++++ doc/whats-new.rst | 26 ++++----- 6 files changed, 178 insertions(+), 80 deletions(-) create mode 100644 doc/reshaping.rst diff --git a/doc/api.rst b/doc/api.rst index f0125792a2c..03af7bb46a8 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -5,7 +5,7 @@ API reference ############# This page provides an auto-generated summary of xray's API. For more details -and examples, refer to the relevant chapter in the main part of the +and examples, refer to the relevant chapters in the main part of the documentation. Top-level functions @@ -110,10 +110,7 @@ Computation Dataset.reduce Dataset.groupby Dataset.resample - Dataset.transpose Dataset.diff - Dataset.shift - Dataset.roll **Aggregation**: :py:attr:`~Dataset.all` @@ -155,6 +152,18 @@ Computation :py:attr:`~core.groupby.DatasetGroupBy.fillna` :py:attr:`~core.groupby.DatasetGroupBy.where` +Reshaping and reorganizing +-------------------------- + +.. autosummary:: + :toctree: generated/ + + Dataset.transpose + Dataset.stack + Dataset.unstack + Dataset.shift + Dataset.roll + DataArray ========= @@ -218,6 +227,16 @@ Indexing DataArray.reindex DataArray.reindex_like +Comparisons +----------- + +.. autosummary:: + :toctree: generated/ + + DataArray.equals + DataArray.identical + DataArray.broadcast_equals + Computation ----------- @@ -227,11 +246,8 @@ Computation DataArray.reduce DataArray.groupby DataArray.resample - DataArray.transpose DataArray.get_axis_num DataArray.diff - DataArray.shift - DataArray.roll **Aggregation**: :py:attr:`~DataArray.all` @@ -273,16 +289,18 @@ Computation :py:attr:`~core.groupby.DataArrayGroupBy.fillna` :py:attr:`~core.groupby.DataArrayGroupBy.where` -Comparisons ------------ + +Reshaping and reorganizing +-------------------------- .. autosummary:: :toctree: generated/ - DataArray.equals - DataArray.identical - DataArray.broadcast_equals - + DataArray.transpose + DataArray.stack + DataArray.unstack + DataArray.shift + DataArray.roll .. _api.ufuncs: diff --git a/doc/computation.rst b/doc/computation.rst index 69002ea5b17..6f1ed8bf79b 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -140,6 +140,13 @@ This means, for example, that you always subtract an array from its transpose: c - c.T +You can explicitly broadcast xray data structures by using the +:py:func:`~xray.broadcast` function: + + a2, b2 = xray.broadcast(a, b2) + a2 + b2 + .. _math automatic alignment: Automatic alignment @@ -223,13 +230,6 @@ Datasets support most of the same methods found on data arrays: ds.mean(dim='x') abs(ds) -:py:meth:`~xray.Dataset.transpose` can also be used to reorder dimensions on -all variables: - -.. ipython:: python - - ds.transpose('y', 'x') - Unfortunately, a limitation of the current version of numpy means that we cannot override ufuncs for datasets, because datasets cannot be written as a single array [1]_. :py:meth:`~xray.Dataset.apply` works around this @@ -256,5 +256,5 @@ Arithmetic between two datasets matches data variables of the same name: Similarly to index based alignment, the result has the intersection of all matching variables, and ``ValueError`` is raised if the result would be empty. -.. [1] When numpy 1.10 is released, we should be able to override ufuncs for +.. [1] When numpy 1.12 is released, we should be able to override ufuncs for datasets by making use of ``__numpy_ufunc__``. diff --git a/doc/data-structures.rst b/doc/data-structures.rst index 21f73cd41c8..cd4553e0ef4 100644 --- a/doc/data-structures.rst +++ b/doc/data-structures.rst @@ -436,8 +436,8 @@ dataset variables: ds.rename({'temperature': 'temp', 'precipitation': 'precip'}) -Finally, you can use :py:meth:`~xray.Dataset.swap_dims` to swap dimension and -non-dimension variables: +The related :py:meth:`~xray.Dataset.swap_dims` method allows you do to swap +dimension and non-dimension variables: .. ipython:: python @@ -535,48 +535,6 @@ dimension and whose the values are ``Index`` objects: ds.indexes -Converting datasets and arrays ------------------------------- - -To convert from a Dataset to a DataArray, use :py:meth:`~xray.Dataset.to_array`: - -.. ipython:: python - - arr = ds.to_array() - arr - -This method broadcasts all data variables in the dataset against each other, -then concatenates them along a new dimension into a new array while preserving -coordinates. - -To convert back from a DataArray to a Dataset, use -:py:meth:`~xray.DataArray.to_dataset`: - -.. ipython:: python - - arr.to_dataset(dim='variable') - -The broadcasting behavior of ``to_array`` means that the resulting array -includes the union of data variable dimensions: - -.. ipython:: python - - ds2 = xray.Dataset({'a': 0, 'b': ('x', [3, 4, 5])}) - - # the input dataset has 4 elements - ds2 - - # the resulting array has 6 elements - ds2.to_array() - -Otherwise, the result could not be represented as an orthogonal array. - -If you use ``to_dataset`` without supplying the ``dim`` argument, the DataArray will be converted into a Dataset of one variable: - -.. ipython:: python - - arr.to_dataset(name='combined') - .. [1] Latitude and longitude are 2D arrays because the dataset uses `projected coordinates`__. ``reference_time`` refers to the reference time diff --git a/doc/index.rst b/doc/index.rst index aea6d8f8f0c..0be9822be82 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -36,6 +36,7 @@ Documentation indexing computation groupby + reshaping combining time-series pandas diff --git a/doc/reshaping.rst b/doc/reshaping.rst new file mode 100644 index 00000000000..104d70a5d37 --- /dev/null +++ b/doc/reshaping.rst @@ -0,0 +1,125 @@ +.. _reshape: + +############################### +Reshaping and reorganizing data +############################### + +These methods allow you to reorganize + +.. ipython:: python + :suppress: + + import numpy as np + import pandas as pd + import xray + np.random.seed(123456) + +Reordering dimensions +--------------------- + +To reorder dimensions on a :py:class:`~xray.DataArray` or across all variables +on a :py:class:`~xray.Dataset`, use :py:meth:`xray.DataArray.transpose` or the +``.T`` property: + +.. ipython:: python + + ds = xray.Dataset({'foo': (('x', 'y', 'z'), [[[42]]]), 'bar': (('y', 'z'), [[24]])}) + ds.transpose('y', 'z', 'x') + ds.T + +Converting between datasets and arrays +-------------------------------------- + +To convert from a Dataset to a DataArray, use :py:meth:`~xray.Dataset.to_array`: + +.. ipython:: python + + arr = ds.to_array() + arr + +This method broadcasts all data variables in the dataset against each other, +then concatenates them along a new dimension into a new array while preserving +coordinates. + +To convert back from a DataArray to a Dataset, use +:py:meth:`~xray.DataArray.to_dataset`: + +.. ipython:: python + + arr.to_dataset(dim='variable') + +The broadcasting behavior of ``to_array`` means that the resulting array +includes the union of data variable dimensions: + +.. ipython:: python + + ds2 = xray.Dataset({'a': 0, 'b': ('x', [3, 4, 5])}) + + # the input dataset has 4 elements + ds2 + + # the resulting array has 6 elements + ds2.to_array() + +Otherwise, the result could not be represented as an orthogonal array. + +If you use ``to_dataset`` without supplying the ``dim`` argument, the DataArray will be converted into a Dataset of one variable: + +.. ipython:: python + + arr.to_dataset(name='combined') + +.. _reshape.stack: + +Stack and unstack +----------------- + +As part of xray's nascent support for :py:class:`pandas.MultiIndex`, we have +implemented :py:meth:`~xray.DataArray.stack` and +:py:meth:`~xray.DataArray.unstack` method, for combining or splitting dimensions: + +.. ipython:: python + + array = xray.DataArray(np.random.randn(2, 3), + coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) + stacked = array.stack(z=('x', 'y')) + stacked + stacked.unstack('z') + +These methods are modeled on the :py:class:`pandas.DataFrame` methods of the +same name, although they in xray they always create new dimensions rather than +adding to the existing index or columns. + +Like :py:meth:`DataFrame.unstack`, xray's ``unstack`` always succeeds, even +if the multi-index being unstacked does not contain all possible levels. Missing +levels are filled in with ``NaN`` in the resulting object: + +.. ipython:: python + + stacked2 = stacked[::2] + stacked2 + stacked2.unstack('z') + +However, xray's ``stack`` has an important difference from pandas: unlike +pandas, it does not automatically drop missing values. Compare: + +.. ipython:: python + + array = xray.DataArray([[np.nan, 1], [2, 3]], dims=['x', 'y']) + array.stack(z=('x', 'y')) + array.to_pandas().stack() + +We departed from pandas's behavior here because predictable shapes for new +array dimensions is necessary for :ref:`dask`. + +Shift and roll +-------------- + +To adjust coordinate labels, you can use the :py:meth:`~xray.Dataset.shift` and +:py:meth:`~xray.Dataset.roll` methods: + +.. ipython:: python + + array = xray.DataArray([1, 2, 3, 4], dims='x') + array.shift(x=2) + array.roll(x=2) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 371f1cd1e34..8733b5fd383 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -107,12 +107,15 @@ Enhancements Coordinates: * z (z) object ('a', 0) ('a', 1) ('b', 0) ('b', 1) + See :ref:`reshape.stack` for more details. + .. warning:: xray's MultiIndex support is still experimental, and we have a long to- - do list of desired additions (:issue:`719`). For example, you cannot yet - save a MultiIndex to a netCDF file. User contributions in this area - would be greatly appreciate :). + do list of desired additions (:issue:`719`), including better display of + multi-index levels when printing a ``Dataset``, and support for saving + datasets with a MultiIndex to a netCDF file. User contributions in this + area would be greatly appreciated. - Support for reading GRIB, HDF4 and other file formats via PyNIO_. See :ref:`io.pynio` for more details. @@ -132,10 +135,10 @@ Enhancements Notice that ``shift`` moves data independently of coordinates, but ``roll`` moves both data and coordinates. -- Assigning a ``pandas`` object to the variable of ``Dataset`` directly is now permitted. Its - index names correspond to the ``dims`` of the ``Dataset``, and its data is aligned +- Assigning a ``pandas`` object directly as a ``Dataset`` variable is now permitted. Its + index names correspond to the ``dims`` of the ``Dataset``, and its data is aligned. - Passing a :py:class:`pandas.DataFrame` or :py:class:`pandas.Panel` to a Dataset constructor - is now permitted + is now permitted. - New function :py:func:`~xray.broadcast` for explicitly broadcasting ``DataArray`` and ``Dataset`` objects against each other. For example: @@ -154,21 +157,14 @@ Bug fixes - Fixes for several issues found on ``DataArray`` objects with the same name as one of their coordinates (see :ref:`v0.7.0.breaking` for more details). - -- ``DataArray.to_masked_array`` always returns masked array with mask being an array -(not a scalar value) (:issue:`684`) - +- ``DataArray.to_masked_array`` always returns masked array with mask being an + array (not a scalar value) (:issue:`684`) - Allows for (imperfect) repr of Coords when underlying index is PeriodIndex (:issue:`645`). - - Fixes for several issues found on ``DataArray`` objects with the same name as one of their coordinates (see :ref:`v0.7.0.breaking` for more details). - Attempting to assign a ``Dataset`` or ``DataArray`` variable/attribute using attribute-style syntax (e.g., ``ds.foo = 42``) now raises an error rather than silently failing (:issue:`656`, :issue:`714`). - -- ``DataArray.to_masked_array`` always returns masked array with mask being an array -(not a scalar value) (:issue:`684`) - - You can now pass pandas objects with non-numpy dtypes (e.g., ``categorical`` or ``datetime64`` with a timezone) into xray without an error (:issue:`716`). From 30400b47efffe1591180b8c5ceffa2fea3f80a61 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 17 Jan 2016 13:30:59 -0800 Subject: [PATCH 12/13] what's new updates for v0.7.0 --- doc/whats-new.rst | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8733b5fd383..efa1973a12b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -12,6 +12,31 @@ What's New v0.7.0 (unreleased) ------------------- +This major release includes redesign of :py:class:`~xray.DataArray` +internals, as well as new methods for reshaping, rolling and shifting +data. It includes preliminary support for :py:class:`pandas.MultiIndex`, +as well as a number of other features and bug fixes, several of which +offer improved compatibility with pandas. + +New name +~~~~~~~~ + +The project formerly known as "xray" is now "xarray"! This avoids a namespace +conflict with the entirety of x-ray science. Renaming our project seemed like +the right thing to do, especially because some scientists who work with actual +x-rays are interested in using this project in their work. Thanks for your +understanding and patience in this transition. You can now find our +documentation and code repository at new URLs: + +- http://xarray.pydata.org +- http://github.com/pydata/xarray/ + +To ease the transition, we have simultaneously released v0.7.0 of both +``xray`` and ``xarray`` on the Python Package Index. These packages are +identical, except the former issues a deprecation warning when imported. This +will be the last xray release. We recommend switching your imports going +forward to ``import xarray as xr``. + .. _v0.7.0.breaking: Breaking changes From e03405322039fd92b6af0f28fa41da1b7bf581f3 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 17 Jan 2016 16:00:09 -0800 Subject: [PATCH 13/13] add acknowledgments for v0.7 --- doc/whats-new.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index efa1973a12b..84a537d19af 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -194,6 +194,19 @@ Bug fixes or ``datetime64`` with a timezone) into xray without an error (:issue:`716`). +Acknowledgments +~~~~~~~~~~~~~~~ + +The following individuals contributed to this release: + +- Antony Lee +- Fabien Maussion +- Joe Hamman +- Maximilian Roos +- Stephan Hoyer +- Takeshi Kanmae +- femtotrader + v0.6.1 (21 October 2015) ------------------------