Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DataArray.pad, Dataset.pad, Variable.pad #3596

Merged
merged 25 commits into from
Mar 19, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7c230aa
add pad method to Variable and add corresponding test
mark-boer Nov 5, 2019
5980234
move pad_with_fill value to dask_array_compat.py and make it default …
mark-boer Nov 18, 2019
b6a979b
add pad method to dataarray
mark-boer Nov 20, 2019
80abc3a
add docstrings for variable.pad and dataarray.pad
mark-boer Nov 28, 2019
ed3d88e
add tests for DataArray.pad
mark-boer Dec 3, 2019
d4e484d
improve pad method signature and support dictionaries as pad_options …
mark-boer Dec 4, 2019
65d7495
fix linting errors and remove typo from tests
mark-boer Dec 8, 2019
0d7f1a7
implement suggested changes: pad_width => padwidths, use pytest.mark.…
mark-boer Dec 8, 2019
1ee2950
move pad method to dataset
mark-boer Dec 28, 2019
11023c3
add helper function to variable.pad and fix some mypy errors
mark-boer Dec 29, 2019
3aae4ba
add some more tests for DataArray.pad and add docstrings to all pad m…
mark-boer Dec 31, 2019
742487e
Merge branch 'master' into feature/dataarray_pad
mark-boer Dec 31, 2019
314f007
add workaround for dask.pad mode=mean that converts integers to float…
mark-boer Jan 1, 2020
7515478
disable linear_ramp test and add pad to whats-new.rst and api.rst
mark-boer Jan 25, 2020
ba3f0a4
Merge branch 'master' into feature/dataarray_pad
mark-boer Jan 25, 2020
855c39e
fix small mege issue in test_unit
mark-boer Jan 26, 2020
d507d1d
fix DataArray.pad and Dataset.pad docstrings
mark-boer Jan 26, 2020
64ac8a2
implement suggested changes from code review: add option of integer p…
mark-boer Feb 12, 2020
71e11bb
apply isort and and set linear_ramp to xfail
mark-boer Feb 12, 2020
7060b07
Minor fixes.
dcherian Mar 5, 2020
588ff03
Merge remote-tracking branch 'upstream/master' into feature/dataarray…
mark-boer Mar 8, 2020
3e6f792
fix merge issue and make some minor changes as suggested in the code …
mark-boer Mar 8, 2020
6958da9
fix test_unit.test_pad_constant_values
mark-boer Mar 8, 2020
af0a4a1
Keewis review comments
dcherian Mar 18, 2020
f781f72
Add experimental warning
dcherian Mar 19, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions xarray/core/dask_array_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,63 @@ def meta_from_array(x, ndim=None, dtype=None):
meta = meta.astype(dtype)

return meta


# TODO figure out how Dask versioning works
# if LooseVersion(dask_version) >= LooseVersion("1.7.0"):
mark-boer marked this conversation as resolved.
Show resolved Hide resolved
try:
pad = da.pad
except AttributeError:
def pad(array, pad_width, mode="constant", **kwargs):
"""
Return a new dask.DataArray wit padding. This functions implements a
constant padding for versions of Dask that do not implement this yet.

Parameters
----------
array: Array to pad

pad_width: List of the form [(before, after)]
Number of values padded to the edges of axis.
"""
if mode != "constant":
raise NotImplementedError(
"Pad is not yet implemented for your current version of Dask. "
"Please update your version of Dask or use the "
"mode=`constant`, that is added by xarray."
)

try:
fill_value = kwargs["constant_values"]
dtype = array.dtype
except KeyError:
dtype, fill_value = dtypes.maybe_promote(array.dtype)

for axis, pad in enumerate(pad_width):
before_shape = list(array.shape)
before_shape[axis] = pad[0]
before_chunks = list(array.chunks)
before_chunks[axis] = (pad[0],)
after_shape = list(array.shape)
after_shape[axis] = pad[1]
after_chunks = list(array.chunks)
after_chunks[axis] = (pad[1],)

arrays = []
if pad[0] > 0:
arrays.append(
da.full(
before_shape, fill_value, dtype=dtype, chunks=before_chunks
)
)
arrays.append(array)
if pad[1] > 0:
arrays.append(
da.full(
after_shape, fill_value, dtype=dtype, chunks=after_chunks
)
)
if len(arrays) > 1:
array = da.concatenate(arrays, axis=axis)

return array
152 changes: 152 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3110,6 +3110,158 @@ def map_blocks(

return map_blocks(func, self, args, kwargs)

def pad(
self,
pad_widths: Mapping[Hashable, Tuple[int, int]] = None,
mode: str = "constant",
stat_length: Union[
int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]]
] = None,
dcherian marked this conversation as resolved.
Show resolved Hide resolved
constant_values: Union[
int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]]
] = None,
end_values: Union[
int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]]
] = None,
reflect_type: str = None,
**pad_widths_kwargs: Any,
) -> "DataArray":
"""Pad this array along one or more dimensions.

When using one of the modes ("edge", "reflect", "symmetric", "wrap"),
coordinates will be padded with the same mode, otherwise coordinates
are padded using the "constant" mode with fill_value dtypes.NA.

Parameters
----------
pad_widths : Mapping with the form of {dim: (pad_before, pad_after)}
Number of values padded along each dimension.
mode : str (taken from numpy docs)
One of the following string values or a user supplied function.
'constant' (default)
Pads with a constant value.
'edge'
Pads with the edge values of array.
'linear_ramp'
Pads with the linear ramp between end_value and the
array edge value.
'maximum'
Pads with the maximum value of all or part of the
vector along each axis.
'mean'
Pads with the mean value of all or part of the
vector along each axis.
'median'
Pads with the median value of all or part of the
vector along each axis.
'minimum'
Pads with the minimum value of all or part of the
vector along each axis.
'reflect'
Pads with the reflection of the vector mirrored on
the first and last values of the vector along each
axis.
'symmetric'
Pads with the reflection of the vector mirrored
along the edge of the array.
'wrap'
Pads with the wrap of the vector along the axis.
The first values are used to pad the end and the
end values are used to pad the beginning.
stat_length : sequence or int, optional
Used in 'maximum', 'mean', 'median', and 'minimum'. Number of
values at edge of each axis used to calculate the statistic value.
((before_1, after_1), ... (before_N, after_N)) unique statistic
lengths for each axis.
((before, after),) yields same before and after statistic lengths
for each axis.
(stat_length,) or int is a shortcut for before = after = statistic
length for all axes.
Default is ``None``, to use the entire axis.
constant_values : sequence or scalar, optional
Used in 'constant'. The values to set the padded values for each
axis.
``((before_1, after_1), ... (before_N, after_N))`` unique pad constants
for each axis.
``((before, after),)`` yields same before and after constants for each
axis.
``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for
all axes.
Default is 0.
end_values : sequence or scalar, optional
Used in 'linear_ramp'. The values used for the ending value of the
linear_ramp and that will form the edge of the padded array.
``((before_1, after_1), ... (before_N, after_N))`` unique end values
for each axis.
``((before, after),)`` yields same before and after end values for each
axis.
``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for
all axes.
Default is 0.
reflect_type : {'even', 'odd'}, optional
Used in 'reflect', and 'symmetric'. The 'even' style is the
default with an unaltered reflection around the edge value. For
the 'odd' style, the extended part of the array is created by
subtracting the reflected values from two times the edge value.

**pad_widths_kwargs:
The keyword arguments form of ``pad_widths``.
One of pad_widths or pad_widths_kwarg must be provided.

Returns
-------
padded : DataArray
DataArray with the padded coordinates and data.

See also
--------
shift
mark-boer marked this conversation as resolved.
Show resolved Hide resolved
roll

Examples
--------

>>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0,1,2])])
>>> arr.pad(x=(1,2), pad_options={"constant_values" : 0})
<xarray.DataArray (x: 6)>
array([0, 5, 6, 7, 0, 0])
Coordinates:
* x (x) float64 nan 0.0 1.0 2.0 nan nan
"""
pad_widths = either_dict_or_kwargs(pad_widths, pad_widths_kwargs, "pad")

variable = self.variable.pad(
pad_widths=pad_widths,
mode=mode,
stat_length=stat_length,
constant_values=constant_values,
end_values=end_values,
reflect_type=reflect_type,
)

if mode in ("edge", "reflect", "symmetric", "wrap"):
coord_pad_mode = mode
coord_pad_options = {
"stat_length": stat_length,
"constant_values": constant_values,
"end_values": end_values,
"reflect_type": reflect_type,
}
else:
coord_pad_mode = "constant"
coord_pad_options = {}

coords = {}
for name, dim in self.coords.items():
if name in pad_widths:
coords[name] = dim.variable.pad(
{name: pad_widths[name]}, mode=coord_pad_mode, **coord_pad_options
)
else:
coords[name] = as_variable(dim, name=name)

return self._replace(variable=variable, coords=coords)

# this needs to be at the end, or mypy will confuse with `str`
# https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names
str = property(StringAccessor)
Expand Down
4 changes: 4 additions & 0 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@

try:
import dask.array as dask_array
from . import dask_array_compat
except ImportError:
dask_array = None # type: ignore
dask_array_compat = None


def _dask_or_eager_func(
Expand Down Expand Up @@ -454,3 +456,5 @@ def rolling_window(array, axis, window, center, fill_value):
return dask_array_ops.rolling_window(array, axis, window, center, fill_value)
else: # np.ndarray
return nputils.rolling_window(array, axis, window, center, fill_value)

pad = _dask_or_eager_func("pad", dask_module=dask_array_compat)
dcherian marked this conversation as resolved.
Show resolved Hide resolved
2 changes: 1 addition & 1 deletion xarray/core/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def _bottleneck_reduce(self, func, **kwargs):
else:
shift = (-self.window // 2) + 1
valid = (slice(None),) * axis + (slice(-shift, None),)
padded = padded.pad_with_fill_value({self.dim: (0, -shift)})
padded = padded.pad({self.dim: (0, -shift)}, mode="constant")

if isinstance(padded.data, dask_array_type):
raise AssertionError("should not be reachable")
Expand Down
Loading