diff --git a/doc/computation.rst b/doc/computation.rst
index ae5f4bc5c66..d477cb63d72 100644
--- a/doc/computation.rst
+++ b/doc/computation.rst
@@ -462,13 +462,13 @@ Datasets support most of the same methods found on data arrays:
     abs(ds)
 
 Datasets also support NumPy ufuncs (requires NumPy v1.13 or newer), or
-alternatively you can use :py:meth:`~xarray.Dataset.apply` to apply a function
+alternatively you can use :py:meth:`~xarray.Dataset.map` to map a function
 to each variable in a dataset:
 
 .. ipython:: python
 
     np.sin(ds)
-    ds.apply(np.sin)
+    ds.map(np.sin)
 
 Datasets also use looping over variables for *broadcasting* in binary
 arithmetic. You can do arithmetic between any ``DataArray`` and a dataset:
diff --git a/doc/groupby.rst b/doc/groupby.rst
index 52a27f4f160..f5943703765 100644
--- a/doc/groupby.rst
+++ b/doc/groupby.rst
@@ -35,10 +35,11 @@ Let's create a simple example dataset:
 
 .. ipython:: python
 
-    ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 3))},
-                    coords={'x': [10, 20, 30, 40],
-                            'letters': ('x', list('abba'))})
-    arr = ds['foo']
+    ds = xr.Dataset(
+        {"foo": (("x", "y"), np.random.rand(4, 3))},
+        coords={"x": [10, 20, 30, 40], "letters": ("x", list("abba"))},
+    )
+    arr = ds["foo"]
     ds
 
 If we groupby the name of a variable or coordinate in a dataset (we can also
@@ -93,7 +94,7 @@ Apply
 ~~~~~
 
 To apply a function to each group, you can use the flexible
-:py:meth:`~xarray.DatasetGroupBy.apply` method. The resulting objects are automatically
+:py:meth:`~xarray.DatasetGroupBy.map` method. The resulting objects are automatically
 concatenated back together along the group axis:
 
 .. ipython:: python
@@ -101,7 +102,7 @@ concatenated back together along the group axis:
     def standardize(x):
         return (x - x.mean()) / x.std()
 
-    arr.groupby('letters').apply(standardize)
+    arr.groupby('letters').map(standardize)
 
 GroupBy objects also have a :py:meth:`~xarray.DatasetGroupBy.reduce` method and
 methods like :py:meth:`~xarray.DatasetGroupBy.mean` as shortcuts for applying an
@@ -202,7 +203,7 @@ __ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimen
         dims=['ny','nx'])
     da
     da.groupby('lon').sum(...)
-    da.groupby('lon').apply(lambda x: x - x.mean(), shortcut=False)
+    da.groupby('lon').map(lambda x: x - x.mean(), shortcut=False)
 
 Because multidimensional groups have the ability to generate a very large
 number of bins, coarse-binning via :py:meth:`~xarray.Dataset.groupby_bins`
diff --git a/doc/howdoi.rst b/doc/howdoi.rst
index 721d1323e73..91644ba2718 100644
--- a/doc/howdoi.rst
+++ b/doc/howdoi.rst
@@ -44,7 +44,7 @@ How do I ...
    * - convert a possibly irregularly sampled timeseries to a regularly sampled timeseries
      - :py:meth:`DataArray.resample`, :py:meth:`Dataset.resample` (see :ref:`resampling` for more)
    * - apply a function on all data variables in a Dataset
-     - :py:meth:`Dataset.apply`
+     - :py:meth:`Dataset.map`
    * - write xarray objects with complex values to a netCDF file
      - :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="h5netcdf", invalid_netcdf=True``
    * - make xarray objects look like other xarray objects
diff --git a/doc/quick-overview.rst b/doc/quick-overview.rst
index 7d84199323d..741b3d1a5fe 100644
--- a/doc/quick-overview.rst
+++ b/doc/quick-overview.rst
@@ -142,7 +142,7 @@ xarray supports grouped operations using a very similar API to pandas (see :ref:
     labels = xr.DataArray(['E', 'F', 'E'], [data.coords['y']], name='labels')
     labels
     data.groupby(labels).mean('y')
-    data.groupby(labels).apply(lambda x: x - x.min())
+    data.groupby(labels).map(lambda x: x - x.min())
 
 Plotting
 --------
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 04fe88e9993..b572b1dda67 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -44,6 +44,13 @@ New Features
   option for dropping either labels or variables, but using the more specific methods is encouraged.
   (:pull:`3475`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
+- :py:meth:`Dataset.map` & :py:meth:`GroupBy.map` & :py:meth:`Resample.map` have been added for 
+  mapping / applying a function over each item in the collection, reflecting the widely used
+  and least surprising name for this operation.
+  The existing ``apply`` methods remain for backward compatibility, though using the ``map``
+  methods is encouraged.
+  (:pull:`3459`)
+  By `Maximilian Roos <https://github.com/max-sixty>`_
 - :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (`...`)
   to represent all 'other' dimensions. For example, to move one dimension to the front,
   use `.transpose('x', ...)`. (:pull:`3421`)
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index d2d37871ee9..c9ccd88b61e 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -919,7 +919,7 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray":
         Coordinates:
         * x        (x) <U1 'a' 'b' 'c'
 
-        See also
+        See Also
         --------
         pandas.DataFrame.copy
         """
@@ -1716,7 +1716,7 @@ def stack(
                    codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
                    names=['x', 'y'])
 
-        See also
+        See Also
         --------
         DataArray.unstack
         """
@@ -1764,7 +1764,7 @@ def unstack(
         >>> arr.identical(roundtripped)
         True
 
-        See also
+        See Also
         --------
         DataArray.stack
         """
@@ -1922,6 +1922,11 @@ def drop(
         """Backward compatible method based on `drop_vars` and `drop_sel`
 
         Using either `drop_vars` or `drop_sel` is encouraged
+
+        See Also
+        --------
+        DataArray.drop_vars
+        DataArray.drop_sel
         """
         ds = self._to_temp_dataset().drop(labels, dim, errors=errors)
         return self._from_temp_dataset(ds)
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 2cadc90334c..dc5a315e72a 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -3557,6 +3557,11 @@ def drop(self, labels=None, dim=None, *, errors="raise", **labels_kwargs):
         """Backward compatible method based on `drop_vars` and `drop_sel`
 
         Using either `drop_vars` or `drop_sel` is encouraged
+
+        See Also
+        --------
+        Dataset.drop_vars
+        Dataset.drop_sel
         """
         if errors not in ["raise", "ignore"]:
             raise ValueError('errors must be either "raise" or "ignore"')
@@ -4108,14 +4113,14 @@ def reduce(
             variables, coord_names=coord_names, attrs=attrs, indexes=indexes
         )
 
-    def apply(
+    def map(
         self,
         func: Callable,
         keep_attrs: bool = None,
         args: Iterable[Any] = (),
         **kwargs: Any,
     ) -> "Dataset":
-        """Apply a function over the data variables in this dataset.
+        """Apply a function to each variable in this dataset
 
         Parameters
         ----------
@@ -4135,7 +4140,7 @@ def apply(
         Returns
         -------
         applied : Dataset
-            Resulting dataset from applying ``func`` over each data variable.
+            Resulting dataset from applying ``func`` to each data variable.
 
         Examples
         --------
@@ -4148,7 +4153,7 @@ def apply(
         Data variables:
             foo      (dim_0, dim_1) float64 -0.3751 -1.951 -1.945 0.2948 0.711 -0.3948
             bar      (x) int64 -1 2
-        >>> ds.apply(np.fabs)
+        >>> ds.map(np.fabs)
         <xarray.Dataset>
         Dimensions:  (dim_0: 2, dim_1: 3, x: 2)
         Dimensions without coordinates: dim_0, dim_1, x
@@ -4165,6 +4170,27 @@ def apply(
         attrs = self.attrs if keep_attrs else None
         return type(self)(variables, attrs=attrs)
 
+    def apply(
+        self,
+        func: Callable,
+        keep_attrs: bool = None,
+        args: Iterable[Any] = (),
+        **kwargs: Any,
+    ) -> "Dataset":
+        """
+        Backward compatible implementation of ``map``
+
+        See Also
+        --------
+        Dataset.map
+        """
+        warnings.warn(
+            "Dataset.apply may be deprecated in the future. Using Dataset.map is encouraged",
+            PendingDeprecationWarning,
+            stacklevel=2,
+        )
+        return self.map(func, keep_attrs, args, **kwargs)
+
     def assign(
         self, variables: Mapping[Hashable, Any] = None, **variables_kwargs: Hashable
     ) -> "Dataset":
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index c8906e34737..8ae65d9b9df 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -608,7 +608,7 @@ def assign_coords(self, coords=None, **coords_kwargs):
         Dataset.swap_dims
         """
         coords_kwargs = either_dict_or_kwargs(coords, coords_kwargs, "assign_coords")
-        return self.apply(lambda ds: ds.assign_coords(**coords_kwargs))
+        return self.map(lambda ds: ds.assign_coords(**coords_kwargs))
 
 
 def _maybe_reorder(xarray_obj, dim, positions):
@@ -655,8 +655,8 @@ def lookup_order(dimension):
         new_order = sorted(stacked.dims, key=lookup_order)
         return stacked.transpose(*new_order, transpose_coords=self._restore_coord_dims)
 
-    def apply(self, func, shortcut=False, args=(), **kwargs):
-        """Apply a function over each array in the group and concatenate them
+    def map(self, func, shortcut=False, args=(), **kwargs):
+        """Apply a function to each array in the group and concatenate them
         together into a new array.
 
         `func` is called like `func(ar, *args, **kwargs)` for each array `ar`
@@ -702,6 +702,21 @@ def apply(self, func, shortcut=False, args=(), **kwargs):
         applied = (maybe_wrap_array(arr, func(arr, *args, **kwargs)) for arr in grouped)
         return self._combine(applied, shortcut=shortcut)
 
+    def apply(self, func, shortcut=False, args=(), **kwargs):
+        """
+        Backward compatible implementation of ``map``
+
+        See Also
+        --------
+        DataArrayGroupBy.map
+        """
+        warnings.warn(
+            "GroupBy.apply may be deprecated in the future. Using GroupBy.map is encouraged",
+            PendingDeprecationWarning,
+            stacklevel=2,
+        )
+        return self.map(func, shortcut=shortcut, args=args, **kwargs)
+
     def _combine(self, applied, restore_coord_dims=False, shortcut=False):
         """Recombine the applied objects like the original."""
         applied_example, applied = peek_at(applied)
@@ -765,7 +780,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
         if dim is None:
             dim = self._group_dim
 
-        out = self.apply(
+        out = self.map(
             self._obj.__class__.quantile,
             shortcut=False,
             q=q,
@@ -820,7 +835,7 @@ def reduce_array(ar):
 
         check_reduce_dims(dim, self.dims)
 
-        return self.apply(reduce_array, shortcut=shortcut)
+        return self.map(reduce_array, shortcut=shortcut)
 
 
 ops.inject_reduce_methods(DataArrayGroupBy)
@@ -828,8 +843,8 @@ def reduce_array(ar):
 
 
 class DatasetGroupBy(GroupBy, ImplementsDatasetReduce):
-    def apply(self, func, args=(), shortcut=None, **kwargs):
-        """Apply a function over each Dataset in the group and concatenate them
+    def map(self, func, args=(), shortcut=None, **kwargs):
+        """Apply a function to each Dataset in the group and concatenate them
         together into a new Dataset.
 
         `func` is called like `func(ds, *args, **kwargs)` for each dataset `ds`
@@ -862,6 +877,22 @@ def apply(self, func, args=(), shortcut=None, **kwargs):
         applied = (func(ds, *args, **kwargs) for ds in self._iter_grouped())
         return self._combine(applied)
 
+    def apply(self, func, args=(), shortcut=None, **kwargs):
+        """
+        Backward compatible implementation of ``map``
+
+        See Also
+        --------
+        DatasetGroupBy.map
+        """
+
+        warnings.warn(
+            "GroupBy.apply may be deprecated in the future. Using GroupBy.map is encouraged",
+            PendingDeprecationWarning,
+            stacklevel=2,
+        )
+        return self.map(func, shortcut=shortcut, args=args, **kwargs)
+
     def _combine(self, applied):
         """Recombine the applied objects like the original."""
         applied_example, applied = peek_at(applied)
@@ -914,7 +945,7 @@ def reduce_dataset(ds):
 
         check_reduce_dims(dim, self.dims)
 
-        return self.apply(reduce_dataset)
+        return self.map(reduce_dataset)
 
     def assign(self, **kwargs):
         """Assign data variables by group.
@@ -923,7 +954,7 @@ def assign(self, **kwargs):
         --------
         Dataset.assign
         """
-        return self.apply(lambda ds: ds.assign(**kwargs))
+        return self.map(lambda ds: ds.assign(**kwargs))
 
 
 ops.inject_reduce_methods(DatasetGroupBy)
diff --git a/xarray/core/resample.py b/xarray/core/resample.py
index 2cb1bd55e19..fb388490d06 100644
--- a/xarray/core/resample.py
+++ b/xarray/core/resample.py
@@ -1,3 +1,5 @@
+import warnings
+
 from . import ops
 from .groupby import DataArrayGroupBy, DatasetGroupBy
 
@@ -173,8 +175,8 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs):
 
         super().__init__(*args, **kwargs)
 
-    def apply(self, func, shortcut=False, args=(), **kwargs):
-        """Apply a function over each array in the group and concatenate them
+    def map(self, func, shortcut=False, args=(), **kwargs):
+        """Apply a function to each array in the group and concatenate them
         together into a new array.
 
         `func` is called like `func(ar, *args, **kwargs)` for each array `ar`
@@ -212,7 +214,9 @@ def apply(self, func, shortcut=False, args=(), **kwargs):
         applied : DataArray or DataArray
             The result of splitting, applying and combining this array.
         """
-        combined = super().apply(func, shortcut=shortcut, args=args, **kwargs)
+        # TODO: the argument order for Resample doesn't match that for its parent,
+        # GroupBy
+        combined = super().map(func, shortcut=shortcut, args=args, **kwargs)
 
         # If the aggregation function didn't drop the original resampling
         # dimension, then we need to do so before we can rename the proxy
@@ -225,6 +229,21 @@ def apply(self, func, shortcut=False, args=(), **kwargs):
 
         return combined
 
+    def apply(self, func, args=(), shortcut=None, **kwargs):
+        """
+        Backward compatible implementation of ``map``
+
+        See Also
+        --------
+        DataArrayResample.map
+        """
+        warnings.warn(
+            "Resample.apply may be deprecated in the future. Using Resample.map is encouraged",
+            PendingDeprecationWarning,
+            stacklevel=2,
+        )
+        return self.map(func=func, shortcut=shortcut, args=args, **kwargs)
+
 
 ops.inject_reduce_methods(DataArrayResample)
 ops.inject_binary_ops(DataArrayResample)
@@ -247,7 +266,7 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs):
 
         super().__init__(*args, **kwargs)
 
-    def apply(self, func, args=(), shortcut=None, **kwargs):
+    def map(self, func, args=(), shortcut=None, **kwargs):
         """Apply a function over each Dataset in the groups generated for
         resampling  and concatenate them together into a new Dataset.
 
@@ -282,6 +301,22 @@ def apply(self, func, args=(), shortcut=None, **kwargs):
 
         return combined.rename({self._resample_dim: self._dim})
 
+    def apply(self, func, args=(), shortcut=None, **kwargs):
+        """
+        Backward compatible implementation of ``map``
+
+        See Also
+        --------
+        DataSetResample.map
+        """
+
+        warnings.warn(
+            "Resample.apply may be deprecated in the future. Using Resample.map is encouraged",
+            PendingDeprecationWarning,
+            stacklevel=2,
+        )
+        return self.map(func=func, shortcut=shortcut, args=args, **kwargs)
+
     def reduce(self, func, dim=None, keep_attrs=None, **kwargs):
         """Reduce the items in this group by applying `func` along the
         pre-defined resampling dimension.
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index acfe684d220..42fae2c9dd4 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -2417,7 +2417,7 @@ def test_groupby_properties(self):
             assert_array_equal(expected_groups[key], grouped.groups[key])
         assert 3 == len(grouped)
 
-    def test_groupby_apply_identity(self):
+    def test_groupby_map_identity(self):
         expected = self.make_groupby_example_array()
         idx = expected.coords["y"]
 
@@ -2428,7 +2428,7 @@ def identity(x):
             for shortcut in [False, True]:
                 for squeeze in [False, True]:
                     grouped = expected.groupby(g, squeeze=squeeze)
-                    actual = grouped.apply(identity, shortcut=shortcut)
+                    actual = grouped.map(identity, shortcut=shortcut)
                     assert_identical(expected, actual)
 
     def test_groupby_sum(self):
@@ -2461,7 +2461,7 @@ def test_groupby_sum(self):
             [["a", "b", "c"]],
             ["abc"],
         )
-        actual = array["y"].groupby("abc").apply(np.sum)
+        actual = array["y"].groupby("abc").map(np.sum)
         assert_allclose(expected, actual)
         actual = array["y"].groupby("abc").sum(...)
         assert_allclose(expected, actual)
@@ -2532,7 +2532,7 @@ def test_groupby_reduce_attrs(self):
                     expected.attrs["foo"] = "bar"
                 assert_identical(expected, actual)
 
-    def test_groupby_apply_center(self):
+    def test_groupby_map_center(self):
         def center(x):
             return x - np.mean(x)
 
@@ -2545,16 +2545,16 @@ def center(x):
         )
         expected_ds["foo"] = (["x", "y"], exp_data)
         expected_centered = expected_ds["foo"]
-        assert_allclose(expected_centered, grouped.apply(center))
+        assert_allclose(expected_centered, grouped.map(center))
 
-    def test_groupby_apply_ndarray(self):
+    def test_groupby_map_ndarray(self):
         # regression test for #326
         array = self.make_groupby_example_array()
         grouped = array.groupby("abc")
-        actual = grouped.apply(np.asarray)
+        actual = grouped.map(np.asarray)
         assert_equal(array, actual)
 
-    def test_groupby_apply_changes_metadata(self):
+    def test_groupby_map_changes_metadata(self):
         def change_metadata(x):
             x.coords["x"] = x.coords["x"] * 2
             x.attrs["fruit"] = "lemon"
@@ -2562,7 +2562,7 @@ def change_metadata(x):
 
         array = self.make_groupby_example_array()
         grouped = array.groupby("abc")
-        actual = grouped.apply(change_metadata)
+        actual = grouped.map(change_metadata)
         expected = array.copy()
         expected = change_metadata(expected)
         assert_equal(expected, actual)
@@ -2631,7 +2631,7 @@ def test_groupby_restore_dim_order(self):
             ("a", ("a", "y")),
             ("b", ("x", "b")),
         ]:
-            result = array.groupby(by).apply(lambda x: x.squeeze())
+            result = array.groupby(by).map(lambda x: x.squeeze())
             assert result.dims == expected_dims
 
     def test_groupby_restore_coord_dims(self):
@@ -2651,13 +2651,13 @@ def test_groupby_restore_coord_dims(self):
             ("a", ("a", "y")),
             ("b", ("x", "b")),
         ]:
-            result = array.groupby(by, restore_coord_dims=True).apply(
+            result = array.groupby(by, restore_coord_dims=True).map(
                 lambda x: x.squeeze()
             )["c"]
             assert result.dims == expected_dims
 
         with pytest.warns(FutureWarning):
-            array.groupby("x").apply(lambda x: x.squeeze())
+            array.groupby("x").map(lambda x: x.squeeze())
 
     def test_groupby_first_and_last(self):
         array = DataArray([1, 2, 3, 4, 5], dims="x")
@@ -2699,9 +2699,9 @@ def test_groupby_multidim(self):
             actual_sum = array.groupby(dim).sum(...)
             assert_identical(expected_sum, actual_sum)
 
-    def test_groupby_multidim_apply(self):
+    def test_groupby_multidim_map(self):
         array = self.make_groupby_multidim_example_array()
-        actual = array.groupby("lon").apply(lambda x: x - x.mean())
+        actual = array.groupby("lon").map(lambda x: x - x.mean())
         expected = DataArray(
             [[[-2.5, -6.0], [-5.0, -8.5]], [[2.5, 3.0], [8.0, 8.5]]],
             coords=array.coords,
@@ -2722,7 +2722,7 @@ def test_groupby_bins(self):
         )
         # the problem with this is that it overwrites the dimensions of array!
         # actual = array.groupby('dim_0', bins=bins).sum()
-        actual = array.groupby_bins("dim_0", bins).apply(lambda x: x.sum())
+        actual = array.groupby_bins("dim_0", bins).map(lambda x: x.sum())
         assert_identical(expected, actual)
         # make sure original array dims are unchanged
         assert len(array.dim_0) == 4
@@ -2744,12 +2744,12 @@ def test_groupby_bins_multidim(self):
         bins = [0, 15, 20]
         bin_coords = pd.cut(array["lat"].values.flat, bins).categories
         expected = DataArray([16, 40], dims="lat_bins", coords={"lat_bins": bin_coords})
-        actual = array.groupby_bins("lat", bins).apply(lambda x: x.sum())
+        actual = array.groupby_bins("lat", bins).map(lambda x: x.sum())
         assert_identical(expected, actual)
         # modify the array coordinates to be non-monotonic after unstacking
         array["lat"].data = np.array([[10.0, 20.0], [20.0, 10.0]])
         expected = DataArray([28, 28], dims="lat_bins", coords={"lat_bins": bin_coords})
-        actual = array.groupby_bins("lat", bins).apply(lambda x: x.sum())
+        actual = array.groupby_bins("lat", bins).map(lambda x: x.sum())
         assert_identical(expected, actual)
 
     def test_groupby_bins_sort(self):
@@ -2784,7 +2784,7 @@ def func(arg1, arg2, arg3=0.0):
         times = pd.date_range("2000", periods=3, freq="D")
         da = xr.DataArray([1.0, 1.0, 1.0], coords=[times], dims=["time"])
         expected = xr.DataArray([3.0, 3.0, 3.0], coords=[times], dims=["time"])
-        actual = da.resample(time="D").apply(func, args=(1.0,), arg3=1.0)
+        actual = da.resample(time="D").map(func, args=(1.0,), arg3=1.0)
         assert_identical(actual, expected)
 
     def test_resample_first(self):
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 50e78c9f685..d001c43da94 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -3310,17 +3310,17 @@ def identity(x):
             return x
 
         for k in ["x", "c", "y"]:
-            actual = data.groupby(k, squeeze=False).apply(identity)
+            actual = data.groupby(k, squeeze=False).map(identity)
             assert_equal(data, actual)
 
     def test_groupby_returns_new_type(self):
         data = Dataset({"z": (["x", "y"], np.random.randn(3, 5))})
 
-        actual = data.groupby("x").apply(lambda ds: ds["z"])
+        actual = data.groupby("x").map(lambda ds: ds["z"])
         expected = data["z"]
         assert_identical(expected, actual)
 
-        actual = data["z"].groupby("x").apply(lambda x: x.to_dataset())
+        actual = data["z"].groupby("x").map(lambda x: x.to_dataset())
         expected = data
         assert_identical(expected, actual)
 
@@ -3639,7 +3639,7 @@ def func(arg1, arg2, arg3=0.0):
         times = pd.date_range("2000", freq="D", periods=3)
         ds = xr.Dataset({"foo": ("time", [1.0, 1.0, 1.0]), "time": times})
         expected = xr.Dataset({"foo": ("time", [3.0, 3.0, 3.0]), "time": times})
-        actual = ds.resample(time="D").apply(func, args=(1.0,), arg3=1.0)
+        actual = ds.resample(time="D").map(func, args=(1.0,), arg3=1.0)
         assert_identical(expected, actual)
 
     def test_to_array(self):
@@ -4515,31 +4515,36 @@ def test_count(self):
         actual = ds.count()
         assert_identical(expected, actual)
 
-    def test_apply(self):
+    def test_map(self):
         data = create_test_data()
         data.attrs["foo"] = "bar"
 
-        assert_identical(data.apply(np.mean), data.mean())
+        assert_identical(data.map(np.mean), data.mean())
 
         expected = data.mean(keep_attrs=True)
-        actual = data.apply(lambda x: x.mean(keep_attrs=True), keep_attrs=True)
+        actual = data.map(lambda x: x.mean(keep_attrs=True), keep_attrs=True)
         assert_identical(expected, actual)
 
-        assert_identical(
-            data.apply(lambda x: x, keep_attrs=True), data.drop_vars("time")
-        )
+        assert_identical(data.map(lambda x: x, keep_attrs=True), data.drop_vars("time"))
 
         def scale(x, multiple=1):
             return multiple * x
 
-        actual = data.apply(scale, multiple=2)
+        actual = data.map(scale, multiple=2)
         assert_equal(actual["var1"], 2 * data["var1"])
         assert_identical(actual["numbers"], data["numbers"])
 
-        actual = data.apply(np.asarray)
+        actual = data.map(np.asarray)
         expected = data.drop_vars("time")  # time is not used on a data var
         assert_equal(expected, actual)
 
+    def test_apply_pending_deprecated_map(self):
+        data = create_test_data()
+        data.attrs["foo"] = "bar"
+
+        with pytest.warns(PendingDeprecationWarning):
+            assert_identical(data.apply(np.mean), data.mean())
+
     def make_example_math_dataset(self):
         variables = {
             "bar": ("x", np.arange(100, 400, 100)),
@@ -4566,15 +4571,15 @@ def test_dataset_number_math(self):
     def test_unary_ops(self):
         ds = self.make_example_math_dataset()
 
-        assert_identical(ds.apply(abs), abs(ds))
-        assert_identical(ds.apply(lambda x: x + 4), ds + 4)
+        assert_identical(ds.map(abs), abs(ds))
+        assert_identical(ds.map(lambda x: x + 4), ds + 4)
 
         for func in [
             lambda x: x.isnull(),
             lambda x: x.round(),
             lambda x: x.astype(int),
         ]:
-            assert_identical(ds.apply(func), func(ds))
+            assert_identical(ds.map(func), func(ds))
 
         assert_identical(ds.isnull(), ~ds.notnull())
 
@@ -4587,7 +4592,7 @@ def test_unary_ops(self):
     def test_dataset_array_math(self):
         ds = self.make_example_math_dataset()
 
-        expected = ds.apply(lambda x: x - ds["foo"])
+        expected = ds.map(lambda x: x - ds["foo"])
         assert_identical(expected, ds - ds["foo"])
         assert_identical(expected, -ds["foo"] + ds)
         assert_identical(expected, ds - ds["foo"].variable)
@@ -4596,7 +4601,7 @@ def test_dataset_array_math(self):
         actual -= ds["foo"]
         assert_identical(expected, actual)
 
-        expected = ds.apply(lambda x: x + ds["bar"])
+        expected = ds.map(lambda x: x + ds["bar"])
         assert_identical(expected, ds + ds["bar"])
         actual = ds.copy(deep=True)
         actual += ds["bar"]
@@ -4612,7 +4617,7 @@ def test_dataset_dataset_math(self):
         assert_identical(ds, ds + 0 * ds)
         assert_identical(ds, ds + {"foo": 0, "bar": 0})
 
-        expected = ds.apply(lambda x: 2 * x)
+        expected = ds.map(lambda x: 2 * x)
         assert_identical(expected, 2 * ds)
         assert_identical(expected, ds + ds)
         assert_identical(expected, ds + ds.data_vars)
@@ -4709,7 +4714,7 @@ def test_dataset_transpose(self):
         assert_identical(expected, actual)
 
         actual = ds.transpose("x", "y")
-        expected = ds.apply(lambda x: x.transpose("x", "y", transpose_coords=True))
+        expected = ds.map(lambda x: x.transpose("x", "y", transpose_coords=True))
         assert_identical(expected, actual)
 
         ds = create_test_data()
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index e2216547ac8..581affa3471 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -45,14 +45,14 @@ def test_groupby_dims_property(dataset):
     assert stacked.groupby("xy").dims == stacked.isel(xy=0).dims
 
 
-def test_multi_index_groupby_apply(dataset):
+def test_multi_index_groupby_map(dataset):
     # regression test for GH873
     ds = dataset.isel(z=1, drop=True)[["foo"]]
     expected = 2 * ds
     actual = (
         ds.stack(space=["x", "y"])
         .groupby("space")
-        .apply(lambda x: 2 * x)
+        .map(lambda x: 2 * x)
         .unstack("space")
     )
     assert_equal(expected, actual)
@@ -107,23 +107,23 @@ def test_groupby_input_mutation():
     assert_identical(array, array_copy)  # should not modify inputs
 
 
-def test_da_groupby_apply_func_args():
+def test_da_groupby_map_func_args():
     def func(arg1, arg2, arg3=0):
         return arg1 + arg2 + arg3
 
     array = xr.DataArray([1, 1, 1], [("x", [1, 2, 3])])
     expected = xr.DataArray([3, 3, 3], [("x", [1, 2, 3])])
-    actual = array.groupby("x").apply(func, args=(1,), arg3=1)
+    actual = array.groupby("x").map(func, args=(1,), arg3=1)
     assert_identical(expected, actual)
 
 
-def test_ds_groupby_apply_func_args():
+def test_ds_groupby_map_func_args():
     def func(arg1, arg2, arg3=0):
         return arg1 + arg2 + arg3
 
     dataset = xr.Dataset({"foo": ("x", [1, 1, 1])}, {"x": [1, 2, 3]})
     expected = xr.Dataset({"foo": ("x", [3, 3, 3])}, {"x": [1, 2, 3]})
-    actual = dataset.groupby("x").apply(func, args=(1,), arg3=1)
+    actual = dataset.groupby("x").map(func, args=(1,), arg3=1)
     assert_identical(expected, actual)
 
 
@@ -285,7 +285,7 @@ def test_groupby_drops_nans():
     expected.variable.values[0, 0, :] = np.nan
     expected.variable.values[-1, -1, :] = np.nan
     expected.variable.values[3, 0, :] = np.nan
-    actual = grouped.apply(lambda x: x).transpose(*ds.variable.dims)
+    actual = grouped.map(lambda x: x).transpose(*ds.variable.dims)
     assert_identical(actual, expected)
 
     # reduction along grouped dimension
diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py
index 8e2d4b8e064..a31da162487 100644
--- a/xarray/tests/test_sparse.py
+++ b/xarray/tests/test_sparse.py
@@ -339,7 +339,7 @@ def test_dataarray_property(prop):
         (do("copy"), True),
         (do("count"), False),
         (do("diff", "x"), True),
-        (do("drop", "x"), True),
+        (do("drop_vars", "x"), True),
         (do("expand_dims", {"z": 2}, axis=2), True),
         (do("get_axis_num", "x"), False),
         (do("get_index", "x"), False),