From ff2fd49714dc46dbb30d03820cf94ce531ae29c6 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 15 May 2019 09:58:09 -0600 Subject: [PATCH 01/15] Don't set attributes on bounds variables. Fixes #2921 1. Removes certain attributes from bounds variables on encode. 2. open_mfdataset: Sets encoding on variables based on encoding in first file. --- xarray/backends/api.py | 2 ++ xarray/conventions.py | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 7c5040580fe..919dce14f4d 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -724,6 +724,8 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT, combined._file_obj = _MultiFileCloser(file_objs) combined.attrs = datasets[0].attrs + for v in combined: + combined[v].encoding = datasets[0][v].attrs return combined diff --git a/xarray/conventions.py b/xarray/conventions.py index 5f41639e890..ae3044fee7e 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -235,6 +235,7 @@ def encode_cf_variable(var, needs_copy=True, name=None): var = maybe_default_fill_value(var) var = maybe_encode_bools(var) var = ensure_dtype_not_object(var, name=name) + return var @@ -599,7 +600,7 @@ def cf_encoder(variables, attributes): as possible. This includes masking, scaling, character array handling, and CF-time encoding. - Decode a set of CF encoded variables and attributes. + Encode a set of CF encoded variables and attributes. See Also, decode_cf_variable @@ -619,6 +620,19 @@ def cf_encoder(variables, attributes): See also: encode_cf_variable """ + new_vars = OrderedDict((k, encode_cf_variable(v, name=k)) for k, v in variables.items()) + + # Remove attrs from bounds variables before encoding + for var in new_vars.values(): + bounds = var.attrs['bounds'] if 'bounds' in var.attrs else None + if bounds and bounds in new_vars: + # see http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries + for attr in ['units', 'standard_name', 'axis', 'positive', + 'calendar', 'long_name', 'leap_month', 'leap_year', + 'month_lengths']: + if attr in new_vars[bounds].attrs: + new_vars[bounds].attrs.pop(attr) + return new_vars, attributes From 302ab63f5c0bdbcfdf8e8709d2eebe8e906d3b9e Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 15 May 2019 10:01:36 -0600 Subject: [PATCH 02/15] remove whitespace stuff. --- xarray/conventions.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index ae3044fee7e..94247d7b979 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -235,7 +235,6 @@ def encode_cf_variable(var, needs_copy=True, name=None): var = maybe_default_fill_value(var) var = maybe_encode_bools(var) var = ensure_dtype_not_object(var, name=name) - return var @@ -620,7 +619,6 @@ def cf_encoder(variables, attributes): See also: encode_cf_variable """ - new_vars = OrderedDict((k, encode_cf_variable(v, name=k)) for k, v in variables.items()) From 931f9737aefcedf0920e01bdcfb94d054a81186e Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 15 May 2019 10:03:33 -0600 Subject: [PATCH 03/15] Make sure variable exists in first file before assigning encoding --- xarray/backends/api.py | 3 ++- xarray/conventions.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 919dce14f4d..f6e32214e64 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -725,7 +725,8 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT, combined._file_obj = _MultiFileCloser(file_objs) combined.attrs = datasets[0].attrs for v in combined: - combined[v].encoding = datasets[0][v].attrs + if v in datasets[0]: + combined[v].encoding = datasets[0][v].encoding return combined diff --git a/xarray/conventions.py b/xarray/conventions.py index 94247d7b979..9319e778b46 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -623,6 +623,7 @@ def cf_encoder(variables, attributes): for k, v in variables.items()) # Remove attrs from bounds variables before encoding + # See issue #2921 for var in new_vars.values(): bounds = var.attrs['bounds'] if 'bounds' in var.attrs else None if bounds and bounds in new_vars: From 5526fe40d68f83ff45ea75b026aacfac0b2b76b3 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 15 May 2019 13:00:11 -0600 Subject: [PATCH 04/15] Make sure we iterate over coords too. --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index f6e32214e64..6319c05d690 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -724,7 +724,7 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT, combined._file_obj = _MultiFileCloser(file_objs) combined.attrs = datasets[0].attrs - for v in combined: + for v in combined.variables: if v in datasets[0]: combined[v].encoding = datasets[0][v].encoding return combined From 3889ba66c6db877d1e03e14c12231b9ddc8a3ff3 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 15 May 2019 13:16:39 -0600 Subject: [PATCH 05/15] lint fix. --- xarray/conventions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 9319e778b46..0ebe07ad25b 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -627,7 +627,7 @@ def cf_encoder(variables, attributes): for var in new_vars.values(): bounds = var.attrs['bounds'] if 'bounds' in var.attrs else None if bounds and bounds in new_vars: - # see http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries + # see http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries # noqa for attr in ['units', 'standard_name', 'axis', 'positive', 'calendar', 'long_name', 'leap_month', 'leap_year', 'month_lengths']: From 6f2bc0539ec7ce352cd60d9f4f34313a7d94de79 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 15 May 2019 13:18:39 -0600 Subject: [PATCH 06/15] docs/comment fixes. --- doc/whats-new.rst | 4 ++++ xarray/conventions.py | 23 ++++++++++++----------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ac1b5269bfa..0f8553ef8e1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,6 +33,10 @@ Enhancements Bug fixes ~~~~~~~~~ +- Don't set attributes on bounds variables when writing to netCDF. + :py:meth:`xr.open_mfdataset` sets variable encodings to that of variables + in first file.(:issue:`2436`, :issue:`2921`) + By `Deepak Cherian `_. - indexing with an empty list creates an object with zero-length axis (:issue:`2882`) By `Mayeul d'Avezac `_. - Return correct count for scalar datetime64 arrays (:issue:`2770`) diff --git a/xarray/conventions.py b/xarray/conventions.py index 0ebe07ad25b..345e45b7b30 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -491,8 +491,6 @@ def cf_decoder(variables, attributes, """ Decode a set of CF encoded variables and attributes. - See Also, decode_cf_variable - Parameters ---------- variables : dict @@ -514,6 +512,10 @@ def cf_decoder(variables, attributes, A dictionary mapping from variable name to xarray.Variable objects. decoded_attributes : dict A dictionary mapping from attribute name to values. + + See also + -------- + decode_cf_variable """ variables, attributes, _ = decode_cf_variables( variables, attributes, concat_characters, mask_and_scale, decode_times) @@ -594,14 +596,12 @@ def encode_dataset_coordinates(dataset): def cf_encoder(variables, attributes): """ - A function which takes a dicts of variables and attributes - and encodes them to conform to CF conventions as much - as possible. This includes masking, scaling, character - array handling, and CF-time encoding. - Encode a set of CF encoded variables and attributes. + Takes a dicts of variables and attributes and encodes them + to conform to CF conventions as much as possible. + This includes masking, scaling, character array handling, + and CF-time encoding. - See Also, decode_cf_variable Parameters ---------- @@ -617,13 +617,14 @@ def cf_encoder(variables, attributes): encoded_attributes : dict A dictionary mapping from attribute name to value - See also: encode_cf_variable + See also + -------- + decode_cf_variable, encode_cf_variable """ new_vars = OrderedDict((k, encode_cf_variable(v, name=k)) for k, v in variables.items()) - # Remove attrs from bounds variables before encoding - # See issue #2921 + # Remove attrs from bounds variables (issue #2921) for var in new_vars.values(): bounds = var.attrs['bounds'] if 'bounds' in var.attrs else None if bounds and bounds in new_vars: From b903e89f23309c269efd654ef2aa2782e14f9e3a Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 17 May 2019 08:18:04 -0600 Subject: [PATCH 07/15] mfdataset encoding test. --- xarray/tests/test_backends.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a4c0374e158..f2e2e46660e 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2474,6 +2474,26 @@ def test_attrs_mfdataset(self): 'no attribute'): actual.test2 + def test_encoding_mfdataset(self): + original = Dataset({'foo': ('t', np.random.randn(10)), + 't': ('t', pd.date_range(start='2010-01-01', + periods=10, + freq='1D'))}) + original.t.encoding['units'] = 'days since 2010-01-01' + + with create_tmp_file() as tmp1: + with create_tmp_file() as tmp2: + ds1 = original.isel(t=slice(5)) + ds2 = original.isel(t=slice(5, 10)) + ds1.t.encoding['units'] = 'days since 2010-01-01' + ds2.t.encoding['units'] = 'days since 2000-01-01' + ds1.to_netcdf(tmp1) + ds2.to_netcdf(tmp2) + with open_mfdataset([tmp1, tmp2]) as actual: + assert actual.t.encoding['units'] == original.t.encoding['units'] # noqa + assert actual.t.encoding['units'] == ds1.t.encoding['units'] # noqa + assert actual.t.encoding['units'] != ds2.t.encoding['units'] # noqa + def test_preprocess_mfdataset(self): original = Dataset({'foo': ('x', np.random.randn(10))}) with create_tmp_file() as tmp: From 70c8c5cd6014ed27299c77a4ba5c3449756ed591 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 17 May 2019 08:39:47 -0600 Subject: [PATCH 08/15] time_bounds attrs test + allow for slight CF non-compliance. --- xarray/conventions.py | 5 ++-- xarray/tests/test_coding_times.py | 40 +++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 345e45b7b30..9fb45fa3795 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -632,7 +632,8 @@ def cf_encoder(variables, attributes): for attr in ['units', 'standard_name', 'axis', 'positive', 'calendar', 'long_name', 'leap_month', 'leap_year', 'month_lengths']: - if attr in new_vars[bounds].attrs: - new_vars[bounds].attrs.pop(attr) + if attr in new_vars[bounds].attrs and attr in var.attrs: + if new_vars[bounds].attrs[attr] == var.attrs[attr]: + new_vars[bounds].attrs.pop(attr) return new_vars, attributes diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index d40abd4acc3..575c35a4ae0 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -5,11 +5,12 @@ import pandas as pd import pytest -from xarray import DataArray, Variable, coding, decode_cf +from xarray import DataArray, Dataset, Variable, coding, decode_cf +from xarray.backends.api import open_dataset, open_mfdataset from xarray.coding.times import ( _import_cftime, cftime_to_nptime, decode_cf_datetime, encode_cf_datetime) from xarray.coding.variables import SerializationWarning -from xarray.conventions import _update_bounds_attributes +from xarray.conventions import _update_bounds_attributes, cf_encoder from xarray.core.common import contains_cftime_datetimes from xarray.testing import assert_equal @@ -671,6 +672,41 @@ def test_decode_cf_time_bounds(): _update_bounds_attributes(ds.variables) +def test_encode_time_bounds(): + # create time and time_bounds DataArrays for Jan-1850 and Feb-1850 + time_bounds_vals = np.array([[0.0, 31.0], [31.0, 59.0]], + dtype=np.int64) + time_vals = time_bounds_vals.mean(axis=1) + + time_var = DataArray(time_vals, dims='time', + coords={'time': time_vals}) + time_bounds_var = DataArray(time_bounds_vals, dims=('time', 'd2'), + coords={'time': time_vals}) + + # create Dataset of time and time_bounds + ds = Dataset(coords={'time': time_var}, + data_vars={'time_bounds': time_bounds_var}) + ds.time.attrs = {'bounds': 'time_bounds', 'calendar': 'noleap', + 'units': 'days since 1850-01-01'} + + # if time_bounds attrs are same as time attrs, pop them. + ds.time_bounds.attrs = {'calendar': 'noleap', + 'units': 'days since 1850-01-01'} + encoded, _ = cf_encoder({k: ds[k] for k in ds.variables}, + ds.attrs) + assert 'calendar' not in encoded['time_bounds'].attrs + assert 'units' not in encoded['time_bounds'].attrs + + # for CF-noncompliant case of time_bounds attrs being different from + # time attrs; preserve them for faithful roundtrip + ds.time_bounds.attrs = {'calendar': 'noleap', + 'units': 'days since 1849-01-01'} + encoded, _ = cf_encoder({k: ds[k] for k in ds.variables}, + ds.attrs) + assert 'calendar' not in encoded['time_bounds'].attrs + assert encoded['time_bounds'].attrs['units'] == ds.time_bounds.attrs['units'] # noqa + + @pytest.fixture(params=_ALL_CALENDARS) def calendar(request): return request.param From d637e9ec4e5b9c8e6837afd2ae146cd57fa09fcc Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 18 May 2019 20:31:27 -0600 Subject: [PATCH 09/15] I need to deal with encoding! --- xarray/conventions.py | 42 +++++++++++++++++++++++++ xarray/tests/test_coding_times.py | 51 ++++++++++++++++++------------- 2 files changed, 71 insertions(+), 22 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 9fb45fa3795..3a13b5a6beb 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -7,6 +7,7 @@ from .coding import strings, times, variables from .coding.variables import SerializationWarning from .core import duck_array_ops, indexing +from .core.common import contains_cftime_datetimes from .core.pycompat import dask_array_type from .core.variable import IndexVariable, Variable, as_variable @@ -353,6 +354,43 @@ def _update_bounds_attributes(variables): if 'calendar' in attrs: bounds_attrs.setdefault('calendar', attrs['calendar']) +def _update_bounds_encoding(variables): + """Adds time encoding to time bounds variables. + + Variables handling time bounds ("Cell boundaries" in the CF + conventions) do not necessarily carry the necessary attributes to be + decoded. This copies the encoding from the time variable to the + associated bounds variable so that we write CF-compliant files. + + See Also: + + http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/ + cf-conventions.html#cell-boundaries + + https://github.com/pydata/xarray/issues/2565 + """ + + # For all time variables with bounds + for v in variables.values(): + attrs = v.attrs + encoding = v.encoding + has_date_units = 'units' in encoding and 'since' in encoding['units'] + is_datetime_type = (np.issubdtype(v.dtype, np.datetime64) or + contains_cftime_datetimes(v)) + if is_datetime_type and not has_date_units and 'bounds' in attrs: + warnings.warn('Variable {0} has datetime units and a ' + 'bounds variable but {0}.encoding does not have ' + 'units specified. Output file might not comply with ' + 'CF-conventions'.format(v.name), + UserWarning) + + + if has_date_units and 'bounds' in attrs: + if attrs['bounds'] in variables: + bounds_encoding = variables[attrs['bounds']].encoding + bounds_encoding.setdefault('units', encoding['units']) + if 'calendar' in encoding: + bounds_encoding.setdefault('calendar', encoding['calendar']) def decode_cf_variables(variables, attributes, concat_characters=True, mask_and_scale=True, decode_times=True, @@ -621,6 +659,10 @@ def cf_encoder(variables, attributes): -------- decode_cf_variable, encode_cf_variable """ + + # add encoding for time bounds variables so that they are encoded correctly. + _update_bounds_encoding(variables) + new_vars = OrderedDict((k, encode_cf_variable(v, name=k)) for k, v in variables.items()) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 575c35a4ae0..10c88861159 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -673,39 +673,46 @@ def test_decode_cf_time_bounds(): def test_encode_time_bounds(): - # create time and time_bounds DataArrays for Jan-1850 and Feb-1850 - time_bounds_vals = np.array([[0.0, 31.0], [31.0, 59.0]], - dtype=np.int64) - time_vals = time_bounds_vals.mean(axis=1) - - time_var = DataArray(time_vals, dims='time', - coords={'time': time_vals}) - time_bounds_var = DataArray(time_bounds_vals, dims=('time', 'd2'), - coords={'time': time_vals}) - - # create Dataset of time and time_bounds - ds = Dataset(coords={'time': time_var}, - data_vars={'time_bounds': time_bounds_var}) - ds.time.attrs = {'bounds': 'time_bounds', 'calendar': 'noleap', - 'units': 'days since 1850-01-01'} - - # if time_bounds attrs are same as time attrs, pop them. - ds.time_bounds.attrs = {'calendar': 'noleap', - 'units': 'days since 1850-01-01'} + + time = pd.date_range('2000-01-16', periods=1) + time_bounds = pd.date_range('2000-01-01', periods=2, freq='MS') + ds = Dataset(dict(time=time, time_bounds=time_bounds)) + ds.time.attrs = {'bounds': 'time_bounds'} + ds.time.encoding = {'calendar': 'noleap', + 'units': 'days since 2000-01-01'} + + expected = dict() + # expected['time'] = Variable(data=np.array([15]), dims=['time']) + expected['time_bounds'] = Variable(data=np.array([0, 31]), dims=['time_bounds']) + + encoded, _ = cf_encoder(ds.variables, ds.attrs) + assert_equal(encoded['time_bounds'], expected['time_bounds']) + assert 'calendar' not in encoded['time_bounds'].attrs + assert 'units' not in encoded['time_bounds'].attrs + + # if time_bounds attrs are same as time attrs, it doesn't matter + ds.time_bounds.encoding = {'calendar': 'noleap', + 'units': 'days since 2000-01-01'} encoded, _ = cf_encoder({k: ds[k] for k in ds.variables}, ds.attrs) + assert_equal(encoded['time_bounds'], expected['time_bounds']) assert 'calendar' not in encoded['time_bounds'].attrs assert 'units' not in encoded['time_bounds'].attrs # for CF-noncompliant case of time_bounds attrs being different from # time attrs; preserve them for faithful roundtrip - ds.time_bounds.attrs = {'calendar': 'noleap', - 'units': 'days since 1849-01-01'} + ds.time_bounds.encoding = {'calendar': 'noleap', + 'units': 'days since 1849-01-01'} encoded, _ = cf_encoder({k: ds[k] for k in ds.variables}, ds.attrs) + with pytest.raises(AssertionError): + assert_equal(encoded['time_bounds'], expected['time_bounds']) assert 'calendar' not in encoded['time_bounds'].attrs - assert encoded['time_bounds'].attrs['units'] == ds.time_bounds.attrs['units'] # noqa + assert encoded['time_bounds'].attrs['units'] == ds.time_bounds.encoding['units'] # noqa + ds.time.encoding = {} + with pytest.warns(UserWarning): + cf_encoder(ds.variables, ds.attrs) @pytest.fixture(params=_ALL_CALENDARS) def calendar(request): From 2f1dd25eba9d188a4cf64a96999793ba4a1b6a94 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 18 May 2019 20:34:11 -0600 Subject: [PATCH 10/15] minor fixes. --- doc/whats-new.rst | 2 +- xarray/conventions.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0f8553ef8e1..a2ade467eef 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,7 +33,7 @@ Enhancements Bug fixes ~~~~~~~~~ -- Don't set attributes on bounds variables when writing to netCDF. +- Don't set encoding attributes on bounds variables when writing to netCDF. :py:meth:`xr.open_mfdataset` sets variable encodings to that of variables in first file.(:issue:`2436`, :issue:`2921`) By `Deepak Cherian `_. diff --git a/xarray/conventions.py b/xarray/conventions.py index 3a13b5a6beb..00fc4d9dff0 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -377,6 +377,7 @@ def _update_bounds_encoding(variables): has_date_units = 'units' in encoding and 'since' in encoding['units'] is_datetime_type = (np.issubdtype(v.dtype, np.datetime64) or contains_cftime_datetimes(v)) + if is_datetime_type and not has_date_units and 'bounds' in attrs: warnings.warn('Variable {0} has datetime units and a ' 'bounds variable but {0}.encoding does not have ' @@ -384,7 +385,6 @@ def _update_bounds_encoding(variables): 'CF-conventions'.format(v.name), UserWarning) - if has_date_units and 'bounds' in attrs: if attrs['bounds'] in variables: bounds_encoding = variables[attrs['bounds']].encoding From 12f3e552cc54213da13318991df66551806103dc Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 18 May 2019 20:39:32 -0600 Subject: [PATCH 11/15] another minor fix. --- xarray/conventions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 00fc4d9dff0..d5bb078b555 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -379,10 +379,10 @@ def _update_bounds_encoding(variables): contains_cftime_datetimes(v)) if is_datetime_type and not has_date_units and 'bounds' in attrs: - warnings.warn('Variable {0} has datetime units and a ' - 'bounds variable but {0}.encoding does not have ' - 'units specified. Output file might not comply with ' - 'CF-conventions'.format(v.name), + warnings.warn("Variable '{0}' has datetime units and a " + "bounds variable but {0}.encoding does not have " + "units specified. Output file might not comply with " + "CF-conventions.".format(v.name), UserWarning) if has_date_units and 'bounds' in attrs: From f8789e7fea9e6d434bc5d5b92767b84f15fc3dfe Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 19 May 2019 14:13:44 -0600 Subject: [PATCH 12/15] review fixes. --- xarray/conventions.py | 13 +++++++++---- xarray/tests/test_coding_times.py | 1 + 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index d5bb078b555..f4590d2cd02 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -378,11 +378,16 @@ def _update_bounds_encoding(variables): is_datetime_type = (np.issubdtype(v.dtype, np.datetime64) or contains_cftime_datetimes(v)) - if is_datetime_type and not has_date_units and 'bounds' in attrs: - warnings.warn("Variable '{0}' has datetime units and a " + if (is_datetime_type and not has_date_units and + 'bounds' in attrs and attrs['bounds'] in variables): + warnings.warn("Variable '{0}' has datetime type and a " "bounds variable but {0}.encoding does not have " - "units specified. Output file might not comply with " - "CF-conventions.".format(v.name), + "units specified. The units encodings for '{0}' " + "and '{1}' will be determined independently " + "and may not be equal, counter to CF-conventions. " + "If this is a concern, specify a units encoding for " + "'{0}' before writing to a file." + .format(v.name, attrs['bounds']), UserWarning) if has_date_units and 'bounds' in attrs: diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 10c88861159..09d7ca14b70 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -672,6 +672,7 @@ def test_decode_cf_time_bounds(): _update_bounds_attributes(ds.variables) +@requires_cftime_or_netCDF4 def test_encode_time_bounds(): time = pd.date_range('2000-01-16', periods=1) From e0c49a4702014ce7ab4d6c161917fb9d9be6d53c Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 19 May 2019 14:17:46 -0600 Subject: [PATCH 13/15] lint fixes. --- xarray/conventions.py | 7 +++++-- xarray/tests/test_coding_times.py | 7 ++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index f4590d2cd02..42ad800ea48 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -354,6 +354,7 @@ def _update_bounds_attributes(variables): if 'calendar' in attrs: bounds_attrs.setdefault('calendar', attrs['calendar']) + def _update_bounds_encoding(variables): """Adds time encoding to time bounds variables. @@ -395,7 +396,9 @@ def _update_bounds_encoding(variables): bounds_encoding = variables[attrs['bounds']].encoding bounds_encoding.setdefault('units', encoding['units']) if 'calendar' in encoding: - bounds_encoding.setdefault('calendar', encoding['calendar']) + bounds_encoding.setdefault('calendar', + encoding['calendar']) + def decode_cf_variables(variables, attributes, concat_characters=True, mask_and_scale=True, decode_times=True, @@ -665,7 +668,7 @@ def cf_encoder(variables, attributes): decode_cf_variable, encode_cf_variable """ - # add encoding for time bounds variables so that they are encoded correctly. + # add encoding for time bounds variables if present. _update_bounds_encoding(variables) new_vars = OrderedDict((k, encode_cf_variable(v, name=k)) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 09d7ca14b70..7b2ba5f22c8 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -6,7 +6,6 @@ import pytest from xarray import DataArray, Dataset, Variable, coding, decode_cf -from xarray.backends.api import open_dataset, open_mfdataset from xarray.coding.times import ( _import_cftime, cftime_to_nptime, decode_cf_datetime, encode_cf_datetime) from xarray.coding.variables import SerializationWarning @@ -16,7 +15,7 @@ from . import ( assert_array_equal, has_cftime, has_cftime_or_netCDF4, has_dask, - requires_cftime_or_netCDF4, requires_cftime) + requires_cftime, requires_cftime_or_netCDF4) try: from pandas.errors import OutOfBoundsDatetime @@ -684,7 +683,8 @@ def test_encode_time_bounds(): expected = dict() # expected['time'] = Variable(data=np.array([15]), dims=['time']) - expected['time_bounds'] = Variable(data=np.array([0, 31]), dims=['time_bounds']) + expected['time_bounds'] = Variable(data=np.array([0, 31]), + dims=['time_bounds']) encoded, _ = cf_encoder(ds.variables, ds.attrs) assert_equal(encoded['time_bounds'], expected['time_bounds']) @@ -715,6 +715,7 @@ def test_encode_time_bounds(): with pytest.warns(UserWarning): cf_encoder(ds.variables, ds.attrs) + @pytest.fixture(params=_ALL_CALENDARS) def calendar(request): return request.param From 34d0e60d325c408fc7b3b7e8961667aaa2ec3bde Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 24 Jun 2019 17:47:55 -0400 Subject: [PATCH 14/15] Remove encoding changes and xfail test. --- xarray/backends/api.py | 3 --- xarray/tests/test_backends.py | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 6319c05d690..7c5040580fe 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -724,9 +724,6 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT, combined._file_obj = _MultiFileCloser(file_objs) combined.attrs = datasets[0].attrs - for v in combined.variables: - if v in datasets[0]: - combined[v].encoding = datasets[0][v].encoding return combined diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index f2e2e46660e..08347b5ed1b 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2474,6 +2474,7 @@ def test_attrs_mfdataset(self): 'no attribute'): actual.test2 + @pytest.mark.xfail(reason='mfdataset loses encoding currently.') def test_encoding_mfdataset(self): original = Dataset({'foo': ('t', np.random.randn(10)), 't': ('t', pd.date_range(start='2010-01-01', From c63cf337ddae818460b2609e64d8d6956ff28fee Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 24 Jun 2019 17:51:15 -0400 Subject: [PATCH 15/15] Update whats-new.rst --- doc/whats-new.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a24c0da4445..999aaa47843 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -77,8 +77,7 @@ Bug fixes ~~~~~~~~~ - Don't set encoding attributes on bounds variables when writing to netCDF. - :py:meth:`xr.open_mfdataset` sets variable encodings to that of variables - in first file.(:issue:`2436`, :issue:`2921`) + (:issue:`2921`) By `Deepak Cherian `_. - NetCDF4 output: variables with unlimited dimensions must be chunked (not contiguous) on output. (:issue:`1849`)