From 918016b68bbd49ff7ccce081c48408c305940e40 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Mon, 2 Mar 2015 12:51:40 -0800 Subject: [PATCH] Switch the name of datetime components from 'time.month' to 'month' Fixes GH345 This lets you write things like: counts = time.groupby('time.month').count() counts.sel(month=2) instead of the previously valid counts.sel(**{'time.month': 2}) which is much more awkward CC jhamman --- doc/data-structures.rst | 8 ++++++++ doc/whats-new.rst | 35 ++++++++++++++++++++++++----------- xray/core/dataarray.py | 6 ++++++ xray/core/dataset.py | 21 +++++++++++---------- xray/test/test_dataset.py | 9 +++++---- 5 files changed, 54 insertions(+), 25 deletions(-) diff --git a/doc/data-structures.rst b/doc/data-structures.rst index 91a830be410..374fe53e916 100644 --- a/doc/data-structures.rst +++ b/doc/data-structures.rst @@ -179,6 +179,14 @@ __ http://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components foo['time.month'] foo['time.dayofyear'] +xray adds ``'season'`` to the list of datetime components supported by pandas: + +.. ipython:: python + + foo['time.season'] + +The set of valid seasons consists of 'DJF', 'MAM', 'JJA' and 'SON', labeled by +the first letters of the corresponding months. Dataset ------- diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b98c7abebb8..38db78fd472 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,14 +35,14 @@ Breaking changes rhs = xray.DataArray([2, 3, 4], [('x', [1, 2, 3])]) lhs + rhs - For :ref:`data construction and merging`, we align based on the + :ref:`For dataset construction and merging`, we align based on the **union** of labels: .. ipython:: python xray.Dataset({'foo': lhs, 'bar': rhs}) - For :ref:`update and __setitem__`, we align based on the **original** + :ref:`For update and __setitem__`, we align based on the **original** object: .. ipython:: python @@ -76,15 +76,19 @@ Breaking changes This functionality can be controlled through the ``compat`` option, which has also been added to the :py:class:`~xray.Dataset` constructor. -- We have updated our use of the terms of "coordinates" and "variables". What - were known in previous versions of xray as "coordinates" and "variables" are - now referred to throughout the documentation as "coordinate variables" and - "data variables". This brings xray in closer alignment to `CF Conventions`_. - The only visible change besides the documentation is that ``Dataset.vars`` - has been renamed ``Dataset.data_vars``. -- You will need to update your code if you have been ignoring deprecation - warnings: methods and attributes that were deprecated in xray v0.3 or earlier - (e.g., ``dimensions``, ``attributes```) have gone away. +- Datetime shortcuts such as ``'time.month'`` now return a ``DataArray`` with + the name ``'month'``, not ``'time.month'`` (:issue:`345`). This makes it + easier to index the resulting arrays when they are used with ``groupby``: + + .. ipython:: python + + time = xray.DataArray(pd.date_range('2000-01-01', periods=365), + dims='time', name='time') + counts = time.groupby('time.month').count() + counts.sel(month=2) + + Previously, you would need to use something like + ``counts.sel(**{'time.month': 2}})``, which is much more awkward. - The ``season`` datetime shortcut now returns an array of string labels such `'DJF'`: @@ -94,6 +98,15 @@ Breaking changes ds['t.season'] Previously, it returned numbered seasons 1 through 4. +- We have updated our use of the terms of "coordinates" and "variables". What + were known in previous versions of xray as "coordinates" and "variables" are + now referred to throughout the documentation as "coordinate variables" and + "data variables". This brings xray in closer alignment to `CF Conventions`_. + The only visible change besides the documentation is that ``Dataset.vars`` + has been renamed ``Dataset.data_vars``. +- You will need to update your code if you have been ignoring deprecation + warnings: methods and attributes that were deprecated in xray v0.3 or earlier + (e.g., ``dimensions``, ``attributes```) have gone away. .. _bottleneck: https://github.com/kwgoodman/bottleneck diff --git a/xray/core/dataarray.py b/xray/core/dataarray.py index ae25235499d..69213f4e298 100644 --- a/xray/core/dataarray.py +++ b/xray/core/dataarray.py @@ -195,6 +195,12 @@ def _new_from_dataset(cls, dataset, name): """ obj = object.__new__(cls) obj._dataset = dataset._copy_listed([name], keep_attrs=False) + if name not in obj._dataset: + # handle virtual variables + try: + _, name = name.split('.', 1) + except Exception: + raise KeyError(name) obj._name = name if name not in dataset._dims: obj._dataset._coord_names.discard(name) diff --git a/xray/core/dataset.py b/xray/core/dataset.py index 75affd31eff..467683a732f 100644 --- a/xray/core/dataset.py +++ b/xray/core/dataset.py @@ -137,12 +137,12 @@ def _get_virtual_variable(variables, key): if not isinstance(key, basestring): raise KeyError(key) - split_key = key.split('.') + split_key = key.split('.', 1) if len(split_key) != 2: raise KeyError(key) - ref_var_name, suffix = split_key - ref_var = variables[ref_var_name] + ref_name, var_name = split_key + ref_var = variables[ref_name] if ref_var.ndim == 1: date = ref_var.to_index() elif ref_var.ndim == 0: @@ -150,14 +150,14 @@ def _get_virtual_variable(variables, key): else: raise KeyError(key) - if suffix == 'season': + if var_name == 'season': # TODO: move 'season' into pandas itself seasons = np.array(['DJF', 'MAM', 'JJA', 'SON']) month = date.month data = seasons[(month // 3) % 4] else: - data = getattr(date, suffix) - return ref_var_name, variable.Variable(ref_var.dims, data) + data = getattr(date, var_name) + return ref_name, var_name, variable.Variable(ref_var.dims, data) def _as_dataset_variable(name, var): @@ -624,10 +624,11 @@ def _copy_listed(self, names, keep_attrs=True): try: variables[name] = self._variables[name] except KeyError: - ref_name, var = _get_virtual_variable(self._variables, name) - variables[name] = var + ref_name, var_name, var = _get_virtual_variable( + self._variables, name) + variables[var_name] = var if ref_name in self._coord_names: - coord_names.add(name) + coord_names.add(var_name) needed_dims = set() for v in variables.values(): @@ -647,7 +648,7 @@ def __copy__(self): return self.copy(deep=False) def __deepcopy__(self, memo=None): - # memo does nothing but is required for compatability with + # memo does nothing but is required for compatibility with # copy.deepcopy return self.copy(deep=True) diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py index 2bb55c8fab8..6e914efd5ca 100644 --- a/xray/test/test_dataset.py +++ b/xray/test/test_dataset.py @@ -965,8 +965,9 @@ def test_getitem(self): def test_virtual_variables(self): # access virtual variables data = create_test_data() - self.assertVariableEqual(data['time.dayofyear'], - Variable('time', 1 + np.arange(20))) + expected = DataArray(1 + np.arange(20), coords=[data['time']], + dims='time', name='dayofyear') + self.assertDataArrayIdentical(expected, data['time.dayofyear']) self.assertArrayEqual(data['time.month'].values, data.variables['time'].to_index().month) self.assertArrayEqual(data['time.season'].values, 'DJF') @@ -975,7 +976,7 @@ def test_virtual_variables(self): self.assertArrayEqual(np.sin(data['time.dayofyear']), np.sin(1 + np.arange(20))) # ensure they become coordinates - expected = Dataset({}, {'time.dayofyear': data['time.dayofyear']}) + expected = Dataset({}, {'dayofyear': data['time.dayofyear']}) actual = data[['time.dayofyear']] self.assertDatasetEqual(expected, actual) # non-coordinate variables @@ -1190,7 +1191,7 @@ def test_groupby_math_virtual(self): grouped = ds.groupby('t.day') actual = grouped - grouped.mean() expected = Dataset({'x': ('t', [0, 0, 0])}, - {'t': ds['t'], 't.day': ds['t.day']}) + ds[['t', 't.day']]) self.assertDatasetIdentical(actual, expected) def test_groupby_nan(self):