pydata · delgadom · Dec 28, 2017 · Dec 28, 2017 · Dec 29, 2017 · Dec 29, 2017
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -66,6 +66,7 @@ Bug fixes
   unintentionally loading the datastores data and attributes repeatedly during
   writes  (:issue:`1798`).
   By `Joe Hamman <https://github.com/jhamman>`_.
+- Handle ``_FillValue`` for variable-length unicode strings using netCDF4 backend. h5netcdf backend still cannot accept _FillValue for variable-length strings (:issue:`1781`). By `Michael Delgado <https://github.com/delgadom>`_.
 
 
 .. _whats-new.0.10.0:

diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py
@@ -340,15 +340,6 @@ def prepare_variable(self, name, variable, check_encoding=False,
 
         fill_value = attrs.pop('_FillValue', None)
 
-        if datatype is str and fill_value is not None:
-            raise NotImplementedError(
-                'netCDF4 does not yet support setting a fill value for '
-                'variable-length strings '
-                '(https://github.com/Unidata/netcdf4-python/issues/730). '
-                "Either remove '_FillValue' from encoding on variable %r "
-                "or set {'dtype': 'S1'} in encoding to use the fixed width "
-                'NC_CHAR type.' % name)
-
         encoding = _extract_nc4_variable_encoding(
             variable, raise_on_invalid=check_encoding,
             unlimited_dims=unlimited_dims)

diff --git a/xarray/conventions.py b/xarray/conventions.py
@@ -949,7 +949,7 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True,
 
     original_dtype = data.dtype
 
-    if concat_characters and data.dtype.kind == 'S':
+    if concat_characters and data.dtype.kind in ['U', 'S', 'O']:
         if stack_char_dim:
             dimensions = dimensions[:-1]
             data = StackedBytesArray(data)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -828,20 +828,31 @@ def test_roundtrip_string_with_fill_value_vlen(self):
         values = np.array([u'ab', u'cdef', np.nan], dtype=object)
         expected = Dataset({'x': ('t', values)})
 
-        # netCDF4-based backends don't support an explicit fillvalue
+        # H5netcdf backends don't support an explicit fillvalue
         # for variable length strings yet.
-        # https://github.com/Unidata/netcdf4-python/issues/730
         # https://github.com/shoyer/h5netcdf/issues/37
+        # The netCDF4-python backend does accept an explicit _FillValue:
+        # https://github.com/Unidata/netcdf4-python/issues/730
+        # This tests both of those states (:issue:`1802`)
         original = Dataset({'x': ('t', values, {}, {'_FillValue': u'XXX'})})
-        with pytest.raises(NotImplementedError):
+        if isinstance(self, H5NetCDFDataTest):
+            with pytest.raises(NotImplementedError):
+                with self.roundtrip(original) as actual:
+                    self.assertDatasetIdentical(expected, actual)
+        else:
             with self.roundtrip(original) as actual:
                 self.assertDatasetIdentical(expected, actual)
 
         original = Dataset({'x': ('t', values, {}, {'_FillValue': u''})})
-        with pytest.raises(NotImplementedError):
+        if isinstance(self, H5NetCDFDataTest):
+            with pytest.raises(NotImplementedError):
+                with self.roundtrip(original) as actual:
+                    self.assertDatasetIdentical(expected, actual)
+        else:
             with self.roundtrip(original) as actual:
                 self.assertDatasetIdentical(expected, actual)
 
+
     def test_roundtrip_character_array(self):
         with create_tmp_file() as tmp_file:
             values = np.array([['a', 'b', 'c'], ['d', 'e', 'f']], dtype='S')