pydata · shoyer · May 5, 2019 · Apr 25, 2019 · Apr 25, 2019 · Apr 25, 2019
diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
@@ -8,7 +8,7 @@
 import numpy as np
 import pandas as pd
 
-from . import utils
+from . import utils, dtypes
 from .indexing import get_indexer_nd
 from .utils import is_dict_like, is_full_slice
 from .variable import IndexVariable, Variable
@@ -35,16 +35,17 @@ def _get_joiner(join):
 
 
 def align(*objects, **kwargs):
-    """align(*objects, join='inner', copy=True, indexes=None,
-             exclude=frozenset())
+    """align(*objects, join='inner', copy=True, fill_value=dtypes.NA,
+             indexes=None, exclude=frozenset())
 
     Given any number of Dataset and/or DataArray objects, returns new
     objects with aligned indexes and dimension sizes.
 
     Array from the aligned objects are suitable as input to mathematical
     operators, because along each dimension they have the same index and size.
 
-    Missing values (if ``join != 'inner'``) are filled with NaN.
+    Missing values (if ``join != 'inner'``) are filled with ``fill_value``.
+    The default fill value is NaN.
 
     Parameters
     ----------
@@ -65,6 +66,8 @@ def align(*objects, **kwargs):
         ``copy=False`` and reindexing is unnecessary, or can be performed with
         only slice operations, then the output may share memory with the input.
         In either case, new xarray objects are always returned.
+    fill_value : scalar, optional
+        Value to use for newly missing values
     exclude : sequence of str, optional
         Dimensions that must be excluded from alignment
     indexes : dict-like, optional
@@ -84,6 +87,7 @@ def align(*objects, **kwargs):
     """
     join = kwargs.pop('join', 'inner')
     copy = kwargs.pop('copy', True)
+    fill_value = kwargs.pop('fill_value', dtypes.NA)
     indexes = kwargs.pop('indexes', None)
     exclude = kwargs.pop('exclude', _DEFAULT_EXCLUDE)
     if indexes is None:
@@ -162,15 +166,17 @@ def align(*objects, **kwargs):
             # fast path for no reindexing necessary
             new_obj = obj.copy(deep=copy)
         else:
-            new_obj = obj.reindex(copy=copy, **valid_indexers)
+            new_obj = obj.reindex(copy=copy, fill_value=fill_value,
+                                  **valid_indexers)
         new_obj.encoding = obj.encoding
         result.append(new_obj)
 
     return tuple(result)
 
 
 def deep_align(objects, join='inner', copy=True, indexes=None,
-               exclude=frozenset(), raise_on_invalid=True):
+               exclude=frozenset(), raise_on_invalid=True,
+               fill_value=dtypes.NA):
     """Align objects for merging, recursing into dictionary values.
 
     This function is not public API.
@@ -214,7 +220,7 @@ def is_alignable(obj):
             out.append(variables)
 
     aligned = align(*targets, join=join, copy=copy, indexes=indexes,
-                    exclude=exclude)
+                    exclude=exclude, fill_value=fill_value)
 
     for position, key, aligned_obj in zip(positions, keys, aligned):
         if key is no_key:
@@ -270,6 +276,7 @@ def reindex_variables(
     method: Optional[str] = None,
     tolerance: Any = None,
     copy: bool = True,
+    fill_value: Optional[Any] = dtypes.NA,
 ) -> 'Tuple[OrderedDict[Any, Variable], OrderedDict[Any, pd.Index]]':
     """Conform a dictionary of aligned variables onto a new set of variables,
     filling in missing values with NaN.
@@ -305,6 +312,8 @@ def reindex_variables(
         ``copy=False`` and reindexing is unnecessary, or can be performed
         with only slice operations, then the output may share memory with
         the input. In either case, new xarray objects are always returned.
+    fill_value : scalar, optional
+        Value to use for newly missing values
 
     Returns
     -------
@@ -380,7 +389,7 @@ def reindex_variables(
             needs_masking = any(d in masked_dims for d in var.dims)
 
             if needs_masking:
-                new_var = var._getitem_with_mask(key)
+                new_var = var._getitem_with_mask(key, fill_value=fill_value)
             elif all(is_full_slice(k) for k in key):
                 # no reindexing necessary
                 # here we need to manually deal with copying data, since

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -879,9 +879,10 @@ def sel_points(self, dim='points', method=None, tolerance=None,
             dim=dim, method=method, tolerance=tolerance, **indexers)
         return self._from_temp_dataset(ds)
 
-    def reindex_like(self, other, method=None, tolerance=None, copy=True):
-        """Conform this object onto the indexes of another object, filling
-        in missing values with NaN.
+    def reindex_like(self, other, method=None, tolerance=None, copy=True,
+                     fill_value=dtypes.NA):
+        """Conform this object onto the indexes of another object, filling in
+        missing values with ``fill_value``. The default fill value is NaN.
 
         Parameters
         ----------
@@ -910,6 +911,8 @@ def reindex_like(self, other, method=None, tolerance=None, copy=True):
             ``copy=False`` and reindexing is unnecessary, or can be performed
             with only slice operations, then the output may share memory with
             the input. In either case, a new xarray object is always returned.
+        fill_value : scalar, optional
+            Value to use for newly missing values
 
         Returns
         -------
@@ -924,12 +927,12 @@ def reindex_like(self, other, method=None, tolerance=None, copy=True):
         """
         indexers = reindex_like_indexers(self, other)
         return self.reindex(method=method, tolerance=tolerance, copy=copy,
-                            **indexers)
+                            fill_value=fill_value, **indexers)
 
     def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
-                **indexers_kwargs):
-        """Conform this object onto a new set of indexes, filling in
-        missing values with NaN.
+                fill_value=dtypes.NA, **indexers_kwargs):
+        """Conform this object onto the indexes of another object, filling in
+        missing values with ``fill_value``. The default fill value is NaN.
 
         Parameters
         ----------
@@ -956,6 +959,8 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
             Maximum distance between original and new labels for inexact
             matches. The values of the index at the matching locations must
             satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
+        fill_value : scalar, optional
+            Value to use for newly missing values
         **indexers_kwarg : {dim: indexer, ...}, optional
             The keyword arguments form of ``indexers``.
             One of indexers or indexers_kwargs must be provided.
@@ -974,7 +979,8 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
         indexers = either_dict_or_kwargs(
             indexers, indexers_kwargs, 'reindex')
         ds = self._to_temp_dataset().reindex(
-            indexers=indexers, method=method, tolerance=tolerance, copy=copy)
+            indexers=indexers, method=method, tolerance=tolerance, copy=copy,
+            fill_value=fill_value)
         return self._from_temp_dataset(ds)
 
     def interp(self, coords=None, method='linear', assume_sorted=False,

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -1932,9 +1932,10 @@ def sel_points(self, dim='points', method=None, tolerance=None,
         )
         return self.isel_points(dim=dim, **pos_indexers)
 
-    def reindex_like(self, other, method=None, tolerance=None, copy=True):
-        """Conform this object onto the indexes of another object, filling
-        in missing values with NaN.
+    def reindex_like(self, other, method=None, tolerance=None, copy=True,
+                     fill_value=dtypes.NA):
+        """Conform this object onto the indexes of another object, filling in
+        missing values with ``fill_value``. The default fill value is NaN.
 
         Parameters
         ----------
@@ -1963,6 +1964,8 @@ def reindex_like(self, other, method=None, tolerance=None, copy=True):
             ``copy=False`` and reindexing is unnecessary, or can be performed
             with only slice operations, then the output may share memory with
             the input. In either case, a new xarray object is always returned.
+        fill_value : scalar, optional
+            Value to use for newly missing values
 
         Returns
         -------
@@ -1977,12 +1980,12 @@ def reindex_like(self, other, method=None, tolerance=None, copy=True):
         """
         indexers = alignment.reindex_like_indexers(self, other)
         return self.reindex(indexers=indexers, method=method, copy=copy,
-                            tolerance=tolerance)
+                            fill_value=fill_value, tolerance=tolerance)
 
     def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
-                **indexers_kwargs):
+                fill_value=dtypes.NA, **indexers_kwargs):
         """Conform this object onto a new set of indexes, filling in
-        missing values with NaN.
+        missing values with ``fill_value``. The default fill value is NaN.
 
         Parameters
         ----------
@@ -2010,6 +2013,8 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
             ``copy=False`` and reindexing is unnecessary, or can be performed
             with only slice operations, then the output may share memory with
             the input. In either case, a new xarray object is always returned.
+        fill_value : scalar, optional
+            Value to use for newly missing values
         **indexers_kwarg : {dim: indexer, ...}, optional
             Keyword arguments in the same form as ``indexers``.
             One of indexers or indexers_kwargs must be provided.
@@ -2034,7 +2039,7 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
 
         variables, indexes = alignment.reindex_variables(
             self.variables, self.sizes, self.indexes, indexers, method,
-            tolerance, copy=copy)
+            tolerance, copy=copy, fill_value=fill_value)
         coord_names = set(self._coord_names)
         coord_names.update(indexers)
         return self._replace_with_new_dims(
@@ -2752,7 +2757,7 @@ def update(self, other, inplace=None):
                                            inplace=inplace)
 
     def merge(self, other, inplace=None, overwrite_vars=frozenset(),
-              compat='no_conflicts', join='outer'):
+              compat='no_conflicts', join='outer', fill_value=dtypes.NA):
         """Merge the arrays of two datasets into a single dataset.
 
         This method generally not allow for overriding data, with the exception
@@ -2790,6 +2795,8 @@ def merge(self, other, inplace=None, overwrite_vars=frozenset(),
             - 'left': use indexes from ``self``
             - 'right': use indexes from ``other``
             - 'exact': error instead of aligning non-equal indexes
+        fill_value: scalar, optional
+            Value to use for newly missing values
 
         Returns
         -------
@@ -2804,7 +2811,7 @@ def merge(self, other, inplace=None, overwrite_vars=frozenset(),
         inplace = _check_inplace(inplace)
         variables, coord_names, dims = dataset_merge_method(
             self, other, overwrite_vars=overwrite_vars, compat=compat,
-            join=join)
+            join=join, fill_value=fill_value)
 
         return self._replace_vars_and_dims(variables, coord_names, dims,
                                            inplace=inplace)

diff --git a/xarray/core/merge.py b/xarray/core/merge.py
@@ -4,6 +4,7 @@
 
 import pandas as pd
 
+from . import dtypes
 from .alignment import deep_align
 from .pycompat import TYPE_CHECKING
 from .utils import Frozen
@@ -349,7 +350,7 @@ def expand_and_merge_variables(objs, priority_arg=None):
 
 
 def merge_coords(objs, compat='minimal', join='outer', priority_arg=None,
-                 indexes=None):
+                 indexes=None, fill_value=dtypes.NA):
     """Merge coordinate variables.
 
     See merge_core below for argument descriptions. This works similarly to
@@ -358,7 +359,8 @@ def merge_coords(objs, compat='minimal', join='outer', priority_arg=None,
     """
     _assert_compat_valid(compat)
     coerced = coerce_pandas_values(objs)
-    aligned = deep_align(coerced, join=join, copy=False, indexes=indexes)
+    aligned = deep_align(coerced, join=join, copy=False, indexes=indexes,
+                         fill_value=fill_value)
     expanded = expand_variable_dicts(aligned)
     priority_vars = _get_priority_vars(aligned, priority_arg, compat=compat)
     variables = merge_variables(expanded, priority_vars, compat=compat)
@@ -404,7 +406,8 @@ def merge_core(objs,
                join='outer',
                priority_arg=None,
                explicit_coords=None,
-               indexes=None):
+               indexes=None,
+               fill_value=dtypes.NA):
     """Core logic for merging labeled objects.
 
     This is not public API.
@@ -423,6 +426,8 @@ def merge_core(objs,
         An explicit list of variables from `objs` that are coordinates.
     indexes : dict, optional
         Dictionary with values given by pandas.Index objects.
+    fill_value : scalar, optional
+        Value to use for newly missing values
 
     Returns
     -------
@@ -442,7 +447,8 @@ def merge_core(objs,
     _assert_compat_valid(compat)
 
     coerced = coerce_pandas_values(objs)
-    aligned = deep_align(coerced, join=join, copy=False, indexes=indexes)
+    aligned = deep_align(coerced, join=join, copy=False, indexes=indexes,
+                         fill_value=fill_value)
     expanded = expand_variable_dicts(aligned)
 
     coord_names, noncoord_names = determine_coords(coerced)
@@ -470,7 +476,7 @@ def merge_core(objs,
     return variables, coord_names, dict(dims)
 
 
-def merge(objects, compat='no_conflicts', join='outer'):
+def merge(objects, compat='no_conflicts', join='outer', fill_value=dtypes.NA):
     """Merge any number of xarray objects into a single Dataset as variables.
 
     Parameters
@@ -492,6 +498,8 @@ def merge(objects, compat='no_conflicts', join='outer'):
           of all non-null values.
     join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
         How to combine objects with different indexes.
+    fill_value : scalar, optional
+        Value to use for newly missing values
 
     Returns
     -------
@@ -529,15 +537,17 @@ def merge(objects, compat='no_conflicts', join='outer'):
         obj.to_dataset() if isinstance(obj, DataArray) else obj
         for obj in objects]
 
-    variables, coord_names, dims = merge_core(dict_like_objects, compat, join)
+    variables, coord_names, dims = merge_core(dict_like_objects, compat, join,
+                                              fill_value=fill_value)
     # TODO: don't always recompute indexes
     merged = Dataset._construct_direct(
         variables, coord_names, dims, indexes=None)
 
     return merged
 
 
-def dataset_merge_method(dataset, other, overwrite_vars, compat, join):
+def dataset_merge_method(dataset, other, overwrite_vars, compat, join,
+                         fill_value=dtypes.NA):
     """Guts of the Dataset.merge method."""
 
     # we are locked into supporting overwrite_vars for the Dataset.merge
@@ -565,7 +575,8 @@ def dataset_merge_method(dataset, other, overwrite_vars, compat, join):
         objs = [dataset, other_no_overwrite, other_overwrite]
         priority_arg = 2
 
-    return merge_core(objs, compat, join, priority_arg=priority_arg)
+    return merge_core(objs, compat, join, priority_arg=priority_arg,
+                      fill_value=fill_value)
 
 
 def dataset_update_method(dataset, other):

diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
@@ -1259,6 +1259,18 @@ def test_reindex_like_no_index(self):
                 ValueError, 'different size for unlabeled'):
             foo.reindex_like(bar)
 
+    @pytest.mark.parametrize('fill_value', [dtypes.NA, 2, 2.0])
+    def test_reindex_fill_value(self, fill_value):
+        foo = DataArray([10, 20], dims='y', coords={'y': [0, 1]})
+        bar = DataArray([10, 20, 30], dims='y', coords={'y': [0, 1, 2]})
+        if fill_value == dtypes.NA:
+            # if we supply the default, we expect the missing value for a
+            # float array
+            fill_value = np.nan
+        actual = x.reindex_like(bar, fill_value=fill_value)
+        expected = DataArray([10, 20, fill_value], coords=[('y', [0, 1, 2])])
+        assert_identical(expected, actual)
+
     @pytest.mark.filterwarnings('ignore:Indexer has dimensions')
     def test_reindex_regressions(self):
         # regression test for #279
@@ -1286,6 +1298,18 @@ def test_reindex_method(self):
         expected = DataArray([10, 20, np.nan], coords=[('y', y)])
         assert_identical(expected, actual)
 
+    @pytest.mark.parametrize('fill_value', [dtypes.NA, 2, 2.0])
+    def test_reindex_fill_value(self, fill_value):
+        x = DataArray([10, 20], dims='y', coords={'y': [0, 1]})
+        y = [0, 1, 2]
+        if fill_value == dtypes.NA:
+            # if we supply the default, we expect the missing value for a
+            # float array
+            fill_value = np.nan
+        actual = x.reindex(y=y, fill_value=fill_value)
+        expected = DataArray([10, 20, fill_value], coords=[('y', y)])
+        assert_identical(expected, actual)
+
     def test_rename(self):
         renamed = self.dv.rename('bar')
         assert_identical(