diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 728e40d4409..dad88a7cb46 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -66,6 +66,7 @@ Bug fixes unintentionally loading the datastores data and attributes repeatedly during writes (:issue:`1798`). By `Joe Hamman `_. +- Handle ``_FillValue`` for variable-length unicode strings using netCDF4 backend. h5netcdf backend still cannot accept _FillValue for variable-length strings (:issue:`1781`). By `Michael Delgado `_. .. _whats-new.0.10.0: diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index d8aa33f35dc..a71c1ce8701 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -340,15 +340,6 @@ def prepare_variable(self, name, variable, check_encoding=False, fill_value = attrs.pop('_FillValue', None) - if datatype is str and fill_value is not None: - raise NotImplementedError( - 'netCDF4 does not yet support setting a fill value for ' - 'variable-length strings ' - '(https://github.com/Unidata/netcdf4-python/issues/730). ' - "Either remove '_FillValue' from encoding on variable %r " - "or set {'dtype': 'S1'} in encoding to use the fixed width " - 'NC_CHAR type.' % name) - encoding = _extract_nc4_variable_encoding( variable, raise_on_invalid=check_encoding, unlimited_dims=unlimited_dims) diff --git a/xarray/conventions.py b/xarray/conventions.py index 5b951ff694b..5c87eab0f47 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -949,7 +949,7 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True, original_dtype = data.dtype - if concat_characters and data.dtype.kind == 'S': + if concat_characters and data.dtype.kind in ['U', 'S', 'O']: if stack_char_dim: dimensions = dimensions[:-1] data = StackedBytesArray(data) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 6b0cd59eb9e..af447ab4207 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -828,20 +828,31 @@ def test_roundtrip_string_with_fill_value_vlen(self): values = np.array([u'ab', u'cdef', np.nan], dtype=object) expected = Dataset({'x': ('t', values)}) - # netCDF4-based backends don't support an explicit fillvalue + # H5netcdf backends don't support an explicit fillvalue # for variable length strings yet. - # https://github.com/Unidata/netcdf4-python/issues/730 # https://github.com/shoyer/h5netcdf/issues/37 + # The netCDF4-python backend does accept an explicit _FillValue: + # https://github.com/Unidata/netcdf4-python/issues/730 + # This tests both of those states (:issue:`1802`) original = Dataset({'x': ('t', values, {}, {'_FillValue': u'XXX'})}) - with pytest.raises(NotImplementedError): + if isinstance(self, H5NetCDFDataTest): + with pytest.raises(NotImplementedError): + with self.roundtrip(original) as actual: + self.assertDatasetIdentical(expected, actual) + else: with self.roundtrip(original) as actual: self.assertDatasetIdentical(expected, actual) original = Dataset({'x': ('t', values, {}, {'_FillValue': u''})}) - with pytest.raises(NotImplementedError): + if isinstance(self, H5NetCDFDataTest): + with pytest.raises(NotImplementedError): + with self.roundtrip(original) as actual: + self.assertDatasetIdentical(expected, actual) + else: with self.roundtrip(original) as actual: self.assertDatasetIdentical(expected, actual) + def test_roundtrip_character_array(self): with create_tmp_file() as tmp_file: values = np.array([['a', 'b', 'c'], ['d', 'e', 'f']], dtype='S')