diff --git a/README.rst b/README.rst
index 53f51392a1a..5ee7234f221 100644
--- a/README.rst
+++ b/README.rst
@@ -138,4 +138,7 @@ under a "3-clause BSD" license:
 xarray also bundles portions of CPython, which is available under the "Python
 Software Foundation License" in xarray/core/pycompat.py.
 
+xarray uses icons from the icomoon package (free version), which is
+available under the "CC BY 4.0" license.
+
 The full text of these licenses are included in the licenses directory.
diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml
index c99ae39e5d9..3f10a158f91 100644
--- a/ci/requirements/py36-min-all-deps.yml
+++ b/ci/requirements/py36-min-all-deps.yml
@@ -13,7 +13,7 @@ dependencies:
   - cartopy=0.17
   - cdms2=3.1
   - cfgrib=0.9
-  - cftime=1.0.3  # FIXME need 1.0.5 (not released yet); 1.0.4 is broken
+  - cftime=1.0
   - coveralls
   - dask=1.2
   - distributed=1.27
diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml
index 6e27cea2ffe..f9847ef6da5 100644
--- a/ci/requirements/py36.yml
+++ b/ci/requirements/py36.yml
@@ -9,7 +9,7 @@ dependencies:
   - cartopy
   - cdms2
   - cfgrib
-  - cftime<1.0.4  # FIXME need 1.0.5 (not released yet); 1.0.4 is broken
+  - cftime
   - coveralls
   - dask
   - distributed
diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml
index 7027fc11ab7..111cd96c30c 100644
--- a/ci/requirements/py37-windows.yml
+++ b/ci/requirements/py37-windows.yml
@@ -9,7 +9,7 @@ dependencies:
   - cartopy
   # - cdms2  # Not available on Windows
   # - cfgrib  # Causes Python interpreter crash on Windows
-  - cftime<1.0.4  # FIXME need 1.0.5 (not released yet); 1.0.4 is broken
+  - cftime
   - coveralls
   - dask
   - distributed
diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml
index a4c974c0176..d816019dd65 100644
--- a/ci/requirements/py37.yml
+++ b/ci/requirements/py37.yml
@@ -9,7 +9,7 @@ dependencies:
   - cartopy
   - cdms2
   - cfgrib
-  - cftime<1.0.4  # FIXME need 1.0.5 (not released yet); 1.0.4 is broken
+  - cftime
   - coveralls
   - dask
   - distributed
diff --git a/doc/data-structures.rst b/doc/data-structures.rst
index f7b34036a03..d5567f4863e 100644
--- a/doc/data-structures.rst
+++ b/doc/data-structures.rst
@@ -411,7 +411,7 @@ Any variables using that dimension are dropped:
 
 As an alternate to dictionary-like modifications, you can use
 :py:meth:`~xarray.Dataset.assign` and :py:meth:`~xarray.Dataset.assign_coords`.
-These methods return a new dataset with additional (or replaced) or values:
+These methods return a new dataset with additional (or replaced) values:
 
 .. ipython:: python
 
@@ -420,7 +420,7 @@ These methods return a new dataset with additional (or replaced) or values:
 There is also the :py:meth:`~xarray.Dataset.pipe` method that allows you to use
 a method call with an external function (e.g., ``ds.pipe(func)``) instead of
 simply calling it (e.g., ``func(ds)``). This allows you to write pipelines for
-transforming you data (using "method chaining") instead of writing hard to
+transforming your data (using "method chaining") instead of writing hard to
 follow nested function calls:
 
 .. ipython:: python
diff --git a/doc/examples/monthly-means.rst b/doc/examples/monthly-means.rst
index 7cc47eb2847..7d620f1bca3 100644
--- a/doc/examples/monthly-means.rst
+++ b/doc/examples/monthly-means.rst
@@ -83,7 +83,7 @@ the ``calendar.month_range`` function.
 
         for i, (month, year) in enumerate(zip(time.month, time.year)):
             month_length[i] = cal_days[month]
-            if leap_year(year, calendar=calendar):
+            if leap_year(year, calendar=calendar) and month == 2:
                 month_length[i] += 1
         return month_length
 
diff --git a/doc/reshaping.rst b/doc/reshaping.rst
index 51202f9be41..455a24f9216 100644
--- a/doc/reshaping.rst
+++ b/doc/reshaping.rst
@@ -18,12 +18,14 @@ Reordering dimensions
 ---------------------
 
 To reorder dimensions on a :py:class:`~xarray.DataArray` or across all variables
-on a :py:class:`~xarray.Dataset`, use :py:meth:`~xarray.DataArray.transpose`:
+on a :py:class:`~xarray.Dataset`, use :py:meth:`~xarray.DataArray.transpose`. An 
+ellipsis (`...`) can be use to represent all other dimensions:
 
 .. ipython:: python
 
     ds = xr.Dataset({'foo': (('x', 'y', 'z'), [[[42]]]), 'bar': (('y', 'z'), [[24]])})
     ds.transpose('y', 'z', 'x')
+    ds.transpose(..., 'x')  # equivalent
     ds.transpose()  # reverses all dimensions
 
 Expand and squeeze dimensions
diff --git a/doc/terminology.rst b/doc/terminology.rst
index 4ee56190d5f..d1265e4da9d 100644
--- a/doc/terminology.rst
+++ b/doc/terminology.rst
@@ -15,7 +15,7 @@ Terminology
 
 ----
 
-**Variable:** A `NetCDF-like variable <https://www.unidata.ucar.edu/software/netcdf/netcdf/Variables.html>`_ consisting of dimensions, data, and attributes which describe a single array. The main functional difference between variables and numpy arrays is that numerical operations on variables implement array broadcasting by dimension name. Each ``DataArray`` has an underlying variable that can be accessed via ``arr.variable``. However, a variable is not fully described outside of either a ``Dataset`` or a ``DataArray``.
+**Variable:** A `NetCDF-like variable <https://www.unidata.ucar.edu/software/netcdf/docs/netcdf_data_set_components.html#variables>`_ consisting of dimensions, data, and attributes which describe a single array. The main functional difference between variables and numpy arrays is that numerical operations on variables implement array broadcasting by dimension name. Each ``DataArray`` has an underlying variable that can be accessed via ``arr.variable``. However, a variable is not fully described outside of either a ``Dataset`` or a ``DataArray``.
 
 .. note::
 
@@ -39,4 +39,4 @@ Terminology
 
 ----
 
-**Index:** An *index* is a data structure optimized for efficient selecting and slicing of an associated array. Xarray creates indexes for dimension coordinates so that operations along dimensions are fast, while non-dimension coordinates are not indexed. Under the hood, indexes are implemented as :py:class:`pandas.Index` objects. The index associated with dimension name ``x`` can be retrieved by ``arr.indexes[x]``. By construction, ``len(arr.dims) == len(arr.indexes)``
\ No newline at end of file
+**Index:** An *index* is a data structure optimized for efficient selecting and slicing of an associated array. Xarray creates indexes for dimension coordinates so that operations along dimensions are fast, while non-dimension coordinates are not indexed. Under the hood, indexes are implemented as :py:class:`pandas.Index` objects. The index associated with dimension name ``x`` can be retrieved by ``arr.indexes[x]``. By construction, ``len(arr.dims) == len(arr.indexes)``
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 1c6d72e599d..62d6006838b 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -21,60 +21,96 @@ v0.14.1 (unreleased)
 Breaking changes
 ~~~~~~~~~~~~~~~~
 
-- Minimum cftime version is now 1.0.3. By `Deepak Cherian <https://github.com/dcherian>`_.
+- Broken compatibility with cftime < 1.0.3.
+  By `Deepak Cherian <https://github.com/dcherian>`_.
+
+  .. note::
+
+    cftime version 1.0.4 is broken
+    (`cftime/126 <https://github.com/Unidata/cftime/issues/126>`_);
+    please use version 1.0.4.2 instead.
+
+- All leftover support for dates from non-standard calendars through netcdftime, the
+  module included in versions of netCDF4 prior to 1.4 that eventually became the
+  cftime package, has been removed in favor of relying solely on the standalone
+  cftime package (:pull:`3450`).
+  By `Spencer Clark <https://github.com/spencerkclark>`_.
 
 New Features
 ~~~~~~~~~~~~
 - Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and
   :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data
   gap that will be filled by interpolation. By `Deepak Cherian <https://github.com/dcherian>`_.
+- :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (`...`)
+  to represent all 'other' dimensions. For example, to move one dimension to the front,
+  use `.transpose('x', ...)`. (:pull:`3421`)
+  By `Maximilian Roos <https://github.com/max-sixty>`_
 - Changed `xr.ALL_DIMS` to equal python's `Ellipsis` (`...`), and changed internal usages to use
   `...` directly. As before, you can use this to instruct a `groupby` operation
   to reduce over all dimensions. While we have no plans to remove `xr.ALL_DIMS`, we suggest
-  using `...`.
+  using `...`. (:pull:`3418`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
-- Added integration tests against `pint <https://pint.readthedocs.io/>`_.
-  (:pull:`3238`) by `Justus Magin <https://github.com/keewis>`_.
-
-  .. note::
-
-    At the moment of writing, these tests *as well as the ability to use pint in general*
-    require `a highly experimental version of pint
-    <https://github.com/andrewgsavage/pint/pull/6>`_ (install with
-    ``pip install git+https://github.com/andrewgsavage/pint.git@refs/pull/6/head)``.
-    Even with it, interaction with non-numpy array libraries, e.g. dask or sparse, is broken.
-
+- :py:func:`~xarray.dot`, and :py:func:`~xarray.DataArray.dot` now support the
+  `dims=...` option to sum over the union of dimensions of all input arrays
+  (:issue:`3423`) by `Mathias Hauser <https://github.com/mathause>`_.
 - Added new :py:meth:`Dataset._repr_html_` and :py:meth:`DataArray._repr_html_` to improve
   representation of objects in jupyter. By default this feature is turned off
   for now. Enable it with :py:meth:`xarray.set_options(display_style="html")`.
   (:pull:`3425`) by `Benoit Bovy <https://github.com/benbovy>`_ and
   `Julia Signell <https://github.com/jsignell>`_.
+- Implement `dask deterministic hashing
+  <https://docs.dask.org/en/latest/custom-collections.html#deterministic-hashing>`_
+  for xarray objects. Note that xarray objects with a dask.array backend already used
+  deterministic hashing in previous releases; this change implements it when whole
+  xarray objects are embedded in a dask graph, e.g. when :meth:`DataArray.map` is
+  invoked. (:issue:`3378`, :pull:`3446`)
+  By `Deepak Cherian <https://github.com/dcherian>`_ and
+  `Guido Imperiale <https://github.com/crusaderky>`_.
 
 Bug fixes
 ~~~~~~~~~
 - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed
   but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle <https://github.com/rdoyle45>`_
-
-- Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4. 
+- Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`).
+  By `Deepak Cherian <https://github.com/dcherian>`_.
+- Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4.
   By `Anderson Banihirwe <https://github.com/andersy005>`_.
-
+- Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and
+  :py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions.
+  (:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
 
 Documentation
 ~~~~~~~~~~~~~
-
+- Fix leap year condition in example (http://xarray.pydata.org/en/stable/examples/monthly-means.html) by `Mickaël Lalande <https://github.com/mickaellalande>`_.
 - Fix the documentation of :py:meth:`DataArray.resample` and
   :py:meth:`Dataset.resample` and explicitly state that a
   datetime-like dimension is required. (:pull:`3400`)
   By `Justus Magin <https://github.com/keewis>`_.
 - Update the terminology page to address multidimensional coordinates. (:pull:`3410`)
   By `Jon Thielen <https://github.com/jthielen>`_.
+- Fix the documentation of :py:meth:`Dataset.integrate` and
+  :py:meth:`DataArray.integrate` and add an example to
+  :py:meth:`Dataset.integrate`. (:pull:`3469`)
+  By `Justus Magin <https://github.com/keewis>`_.
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
 
+- Added integration tests against `pint <https://pint.readthedocs.io/>`_.
+  (:pull:`3238`) by `Justus Magin <https://github.com/keewis>`_.
+
+  .. note::
+
+    At the moment of writing, these tests *as well as the ability to use pint in general*
+    require `a highly experimental version of pint
+    <https://github.com/andrewgsavage/pint/pull/6>`_ (install with
+    ``pip install git+https://github.com/andrewgsavage/pint.git@refs/pull/6/head)``.
+    Even with it, interaction with non-numpy array libraries, e.g. dask or sparse, is broken.
+
 - Use Python 3.6 idioms throughout the codebase. (:pull:3419)
   By `Maximilian Roos <https://github.com/max-sixty>`_
 
+
 .. _whats-new.0.14.0:
 
 v0.14.0 (14 Oct 2019)
diff --git a/licenses/ICOMOON_LICENSE b/licenses/ICOMOON_LICENSE
new file mode 100644
index 00000000000..4ea99c213c5
--- /dev/null
+++ b/licenses/ICOMOON_LICENSE
@@ -0,0 +1,395 @@
+Attribution 4.0 International
+
+=======================================================================
+
+Creative Commons Corporation ("Creative Commons") is not a law firm and
+does not provide legal services or legal advice. Distribution of
+Creative Commons public licenses does not create a lawyer-client or
+other relationship. Creative Commons makes its licenses and related
+information available on an "as-is" basis. Creative Commons gives no
+warranties regarding its licenses, any material licensed under their
+terms and conditions, or any related information. Creative Commons
+disclaims all liability for damages resulting from their use to the
+fullest extent possible.
+
+Using Creative Commons Public Licenses
+
+Creative Commons public licenses provide a standard set of terms and
+conditions that creators and other rights holders may use to share
+original works of authorship and other material subject to copyright
+and certain other rights specified in the public license below. The
+following considerations are for informational purposes only, are not
+exhaustive, and do not form part of our licenses.
+
+     Considerations for licensors: Our public licenses are
+     intended for use by those authorized to give the public
+     permission to use material in ways otherwise restricted by
+     copyright and certain other rights. Our licenses are
+     irrevocable. Licensors should read and understand the terms
+     and conditions of the license they choose before applying it.
+     Licensors should also secure all rights necessary before
+     applying our licenses so that the public can reuse the
+     material as expected. Licensors should clearly mark any
+     material not subject to the license. This includes other CC-
+     licensed material, or material used under an exception or
+     limitation to copyright. More considerations for licensors:
+    wiki.creativecommons.org/Considerations_for_licensors
+
+     Considerations for the public: By using one of our public
+     licenses, a licensor grants the public permission to use the
+     licensed material under specified terms and conditions. If
+     the licensor's permission is not necessary for any reason--for
+     example, because of any applicable exception or limitation to
+     copyright--then that use is not regulated by the license. Our
+     licenses grant only permissions under copyright and certain
+     other rights that a licensor has authority to grant. Use of
+     the licensed material may still be restricted for other
+     reasons, including because others have copyright or other
+     rights in the material. A licensor may make special requests,
+     such as asking that all changes be marked or described.
+     Although not required by our licenses, you are encouraged to
+     respect those requests where reasonable. More considerations
+     for the public:
+    wiki.creativecommons.org/Considerations_for_licensees
+
+=======================================================================
+
+Creative Commons Attribution 4.0 International Public License
+
+By exercising the Licensed Rights (defined below), You accept and agree
+to be bound by the terms and conditions of this Creative Commons
+Attribution 4.0 International Public License ("Public License"). To the
+extent this Public License may be interpreted as a contract, You are
+granted the Licensed Rights in consideration of Your acceptance of
+these terms and conditions, and the Licensor grants You such rights in
+consideration of benefits the Licensor receives from making the
+Licensed Material available under these terms and conditions.
+
+
+Section 1 -- Definitions.
+
+  a. Adapted Material means material subject to Copyright and Similar
+     Rights that is derived from or based upon the Licensed Material
+     and in which the Licensed Material is translated, altered,
+     arranged, transformed, or otherwise modified in a manner requiring
+     permission under the Copyright and Similar Rights held by the
+     Licensor. For purposes of this Public License, where the Licensed
+     Material is a musical work, performance, or sound recording,
+     Adapted Material is always produced where the Licensed Material is
+     synched in timed relation with a moving image.
+
+  b. Adapter's License means the license You apply to Your Copyright
+     and Similar Rights in Your contributions to Adapted Material in
+     accordance with the terms and conditions of this Public License.
+
+  c. Copyright and Similar Rights means copyright and/or similar rights
+     closely related to copyright including, without limitation,
+     performance, broadcast, sound recording, and Sui Generis Database
+     Rights, without regard to how the rights are labeled or
+     categorized. For purposes of this Public License, the rights
+     specified in Section 2(b)(1)-(2) are not Copyright and Similar
+     Rights.
+
+  d. Effective Technological Measures means those measures that, in the
+     absence of proper authority, may not be circumvented under laws
+     fulfilling obligations under Article 11 of the WIPO Copyright
+     Treaty adopted on December 20, 1996, and/or similar international
+     agreements.
+
+  e. Exceptions and Limitations means fair use, fair dealing, and/or
+     any other exception or limitation to Copyright and Similar Rights
+     that applies to Your use of the Licensed Material.
+
+  f. Licensed Material means the artistic or literary work, database,
+     or other material to which the Licensor applied this Public
+     License.
+
+  g. Licensed Rights means the rights granted to You subject to the
+     terms and conditions of this Public License, which are limited to
+     all Copyright and Similar Rights that apply to Your use of the
+     Licensed Material and that the Licensor has authority to license.
+
+  h. Licensor means the individual(s) or entity(ies) granting rights
+     under this Public License.
+
+  i. Share means to provide material to the public by any means or
+     process that requires permission under the Licensed Rights, such
+     as reproduction, public display, public performance, distribution,
+     dissemination, communication, or importation, and to make material
+     available to the public including in ways that members of the
+     public may access the material from a place and at a time
+     individually chosen by them.
+
+  j. Sui Generis Database Rights means rights other than copyright
+     resulting from Directive 96/9/EC of the European Parliament and of
+     the Council of 11 March 1996 on the legal protection of databases,
+     as amended and/or succeeded, as well as other essentially
+     equivalent rights anywhere in the world.
+
+  k. You means the individual or entity exercising the Licensed Rights
+     under this Public License. Your has a corresponding meaning.
+
+
+Section 2 -- Scope.
+
+  a. License grant.
+
+       1. Subject to the terms and conditions of this Public License,
+          the Licensor hereby grants You a worldwide, royalty-free,
+          non-sublicensable, non-exclusive, irrevocable license to
+          exercise the Licensed Rights in the Licensed Material to:
+
+            a. reproduce and Share the Licensed Material, in whole or
+               in part; and
+
+            b. produce, reproduce, and Share Adapted Material.
+
+       2. Exceptions and Limitations. For the avoidance of doubt, where
+          Exceptions and Limitations apply to Your use, this Public
+          License does not apply, and You do not need to comply with
+          its terms and conditions.
+
+       3. Term. The term of this Public License is specified in Section
+          6(a).
+
+       4. Media and formats; technical modifications allowed. The
+          Licensor authorizes You to exercise the Licensed Rights in
+          all media and formats whether now known or hereafter created,
+          and to make technical modifications necessary to do so. The
+          Licensor waives and/or agrees not to assert any right or
+          authority to forbid You from making technical modifications
+          necessary to exercise the Licensed Rights, including
+          technical modifications necessary to circumvent Effective
+          Technological Measures. For purposes of this Public License,
+          simply making modifications authorized by this Section 2(a)
+          (4) never produces Adapted Material.
+
+       5. Downstream recipients.
+
+            a. Offer from the Licensor -- Licensed Material. Every
+               recipient of the Licensed Material automatically
+               receives an offer from the Licensor to exercise the
+               Licensed Rights under the terms and conditions of this
+               Public License.
+
+            b. No downstream restrictions. You may not offer or impose
+               any additional or different terms or conditions on, or
+               apply any Effective Technological Measures to, the
+               Licensed Material if doing so restricts exercise of the
+               Licensed Rights by any recipient of the Licensed
+               Material.
+
+       6. No endorsement. Nothing in this Public License constitutes or
+          may be construed as permission to assert or imply that You
+          are, or that Your use of the Licensed Material is, connected
+          with, or sponsored, endorsed, or granted official status by,
+          the Licensor or others designated to receive attribution as
+          provided in Section 3(a)(1)(A)(i).
+
+  b. Other rights.
+
+       1. Moral rights, such as the right of integrity, are not
+          licensed under this Public License, nor are publicity,
+          privacy, and/or other similar personality rights; however, to
+          the extent possible, the Licensor waives and/or agrees not to
+          assert any such rights held by the Licensor to the limited
+          extent necessary to allow You to exercise the Licensed
+          Rights, but not otherwise.
+
+       2. Patent and trademark rights are not licensed under this
+          Public License.
+
+       3. To the extent possible, the Licensor waives any right to
+          collect royalties from You for the exercise of the Licensed
+          Rights, whether directly or through a collecting society
+          under any voluntary or waivable statutory or compulsory
+          licensing scheme. In all other cases the Licensor expressly
+          reserves any right to collect such royalties.
+
+
+Section 3 -- License Conditions.
+
+Your exercise of the Licensed Rights is expressly made subject to the
+following conditions.
+
+  a. Attribution.
+
+       1. If You Share the Licensed Material (including in modified
+          form), You must:
+
+            a. retain the following if it is supplied by the Licensor
+               with the Licensed Material:
+
+                 i. identification of the creator(s) of the Licensed
+                    Material and any others designated to receive
+                    attribution, in any reasonable manner requested by
+                    the Licensor (including by pseudonym if
+                    designated);
+
+                ii. a copyright notice;
+
+               iii. a notice that refers to this Public License;
+
+                iv. a notice that refers to the disclaimer of
+                    warranties;
+
+                 v. a URI or hyperlink to the Licensed Material to the
+                    extent reasonably practicable;
+
+            b. indicate if You modified the Licensed Material and
+               retain an indication of any previous modifications; and
+
+            c. indicate the Licensed Material is licensed under this
+               Public License, and include the text of, or the URI or
+               hyperlink to, this Public License.
+
+       2. You may satisfy the conditions in Section 3(a)(1) in any
+          reasonable manner based on the medium, means, and context in
+          which You Share the Licensed Material. For example, it may be
+          reasonable to satisfy the conditions by providing a URI or
+          hyperlink to a resource that includes the required
+          information.
+
+       3. If requested by the Licensor, You must remove any of the
+          information required by Section 3(a)(1)(A) to the extent
+          reasonably practicable.
+
+       4. If You Share Adapted Material You produce, the Adapter's
+          License You apply must not prevent recipients of the Adapted
+          Material from complying with this Public License.
+
+
+Section 4 -- Sui Generis Database Rights.
+
+Where the Licensed Rights include Sui Generis Database Rights that
+apply to Your use of the Licensed Material:
+
+  a. for the avoidance of doubt, Section 2(a)(1) grants You the right
+     to extract, reuse, reproduce, and Share all or a substantial
+     portion of the contents of the database;
+
+  b. if You include all or a substantial portion of the database
+     contents in a database in which You have Sui Generis Database
+     Rights, then the database in which You have Sui Generis Database
+     Rights (but not its individual contents) is Adapted Material; and
+
+  c. You must comply with the conditions in Section 3(a) if You Share
+     all or a substantial portion of the contents of the database.
+
+For the avoidance of doubt, this Section 4 supplements and does not
+replace Your obligations under this Public License where the Licensed
+Rights include other Copyright and Similar Rights.
+
+
+Section 5 -- Disclaimer of Warranties and Limitation of Liability.
+
+  a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
+     EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
+     AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
+     ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
+     IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
+     WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
+     PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
+     ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
+     KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
+     ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
+
+  b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
+     TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
+     NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
+     INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
+     COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
+     USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
+     ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
+     DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
+     IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
+
+  c. The disclaimer of warranties and limitation of liability provided
+     above shall be interpreted in a manner that, to the extent
+     possible, most closely approximates an absolute disclaimer and
+     waiver of all liability.
+
+
+Section 6 -- Term and Termination.
+
+  a. This Public License applies for the term of the Copyright and
+     Similar Rights licensed here. However, if You fail to comply with
+     this Public License, then Your rights under this Public License
+     terminate automatically.
+
+  b. Where Your right to use the Licensed Material has terminated under
+     Section 6(a), it reinstates:
+
+       1. automatically as of the date the violation is cured, provided
+          it is cured within 30 days of Your discovery of the
+          violation; or
+
+       2. upon express reinstatement by the Licensor.
+
+     For the avoidance of doubt, this Section 6(b) does not affect any
+     right the Licensor may have to seek remedies for Your violations
+     of this Public License.
+
+  c. For the avoidance of doubt, the Licensor may also offer the
+     Licensed Material under separate terms or conditions or stop
+     distributing the Licensed Material at any time; however, doing so
+     will not terminate this Public License.
+
+  d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
+     License.
+
+
+Section 7 -- Other Terms and Conditions.
+
+  a. The Licensor shall not be bound by any additional or different
+     terms or conditions communicated by You unless expressly agreed.
+
+  b. Any arrangements, understandings, or agreements regarding the
+     Licensed Material not stated herein are separate from and
+     independent of the terms and conditions of this Public License.
+
+
+Section 8 -- Interpretation.
+
+  a. For the avoidance of doubt, this Public License does not, and
+     shall not be interpreted to, reduce, limit, restrict, or impose
+     conditions on any use of the Licensed Material that could lawfully
+     be made without permission under this Public License.
+
+  b. To the extent possible, if any provision of this Public License is
+     deemed unenforceable, it shall be automatically reformed to the
+     minimum extent necessary to make it enforceable. If the provision
+     cannot be reformed, it shall be severed from this Public License
+     without affecting the enforceability of the remaining terms and
+     conditions.
+
+  c. No term or condition of this Public License will be waived and no
+     failure to comply consented to unless expressly agreed to by the
+     Licensor.
+
+  d. Nothing in this Public License constitutes or may be interpreted
+     as a limitation upon, or waiver of, any privileges and immunities
+     that apply to the Licensor or You, including from the legal
+     processes of any jurisdiction or authority.
+
+
+=======================================================================
+
+Creative Commons is not a party to its public
+licenses. Notwithstanding, Creative Commons may elect to apply one of
+its public licenses to material it publishes and in those instances
+will be considered the “Licensor.” The text of the Creative Commons
+public licenses is dedicated to the public domain under the CC0 Public
+Domain Dedication. Except for the limited purpose of indicating that
+material is shared under a Creative Commons public license or as
+otherwise permitted by the Creative Commons policies published at
+creativecommons.org/policies, Creative Commons does not authorize the
+use of the trademark "Creative Commons" or any other trademark or logo
+of Creative Commons without its prior written consent including,
+without limitation, in connection with any unauthorized modifications
+to any of its public licenses or any other arrangements,
+understandings, or agreements concerning use of licensed material. For
+the avoidance of doubt, this paragraph does not form part of the
+public licenses.
+
+Creative Commons may be contacted at creativecommons.org.
diff --git a/properties/conftest.py b/properties/conftest.py
new file mode 100644
index 00000000000..0a66d92ebc6
--- /dev/null
+++ b/properties/conftest.py
@@ -0,0 +1,8 @@
+try:
+    from hypothesis import settings
+except ImportError:
+    pass
+else:
+    # Run for a while - arrays are a bigger search space than usual
+    settings.register_profile("ci", deadline=None, print_blob=True)
+    settings.load_profile("ci")
diff --git a/properties/test_encode_decode.py b/properties/test_encode_decode.py
index 011e7a922d1..221083e16a1 100644
--- a/properties/test_encode_decode.py
+++ b/properties/test_encode_decode.py
@@ -10,15 +10,10 @@
 
 import hypothesis.extra.numpy as npst
 import hypothesis.strategies as st
-from hypothesis import given, settings
+from hypothesis import given
 
 import xarray as xr
 
-# Run for a while - arrays are a bigger search space than usual
-settings.register_profile("ci", deadline=None)
-settings.load_profile("ci")
-
-
 an_array = npst.arrays(
     dtype=st.one_of(
         npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes()
diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py
new file mode 100644
index 00000000000..a8005d319d6
--- /dev/null
+++ b/properties/test_pandas_roundtrip.py
@@ -0,0 +1,97 @@
+"""
+Property-based tests for roundtripping between xarray and pandas objects.
+"""
+import pytest
+
+pytest.importorskip("hypothesis")
+
+from functools import partial
+import hypothesis.extra.numpy as npst
+import hypothesis.extra.pandas as pdst
+import hypothesis.strategies as st
+from hypothesis import given
+
+import numpy as np
+import pandas as pd
+import xarray as xr
+
+numeric_dtypes = st.one_of(
+    npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes()
+)
+
+numeric_series = numeric_dtypes.flatmap(lambda dt: pdst.series(dtype=dt))
+
+an_array = npst.arrays(
+    dtype=numeric_dtypes,
+    shape=npst.array_shapes(max_dims=2),  # can only convert 1D/2D to pandas
+)
+
+
+@st.composite
+def datasets_1d_vars(draw):
+    """Generate datasets with only 1D variables
+
+    Suitable for converting to pandas dataframes.
+    """
+    # Generate an index for the dataset
+    idx = draw(pdst.indexes(dtype="u8", min_size=0, max_size=100))
+
+    # Generate 1-3 variables, 1D with the same length as the index
+    vars_strategy = st.dictionaries(
+        keys=st.text(),
+        values=npst.arrays(dtype=numeric_dtypes, shape=len(idx)).map(
+            partial(xr.Variable, ("rows",))
+        ),
+        min_size=1,
+        max_size=3,
+    )
+    return xr.Dataset(draw(vars_strategy), coords={"rows": idx})
+
+
+@given(st.data(), an_array)
+def test_roundtrip_dataarray(data, arr):
+    names = data.draw(
+        st.lists(st.text(), min_size=arr.ndim, max_size=arr.ndim, unique=True).map(
+            tuple
+        )
+    )
+    coords = {name: np.arange(n) for (name, n) in zip(names, arr.shape)}
+    original = xr.DataArray(arr, dims=names, coords=coords)
+    roundtripped = xr.DataArray(original.to_pandas())
+    xr.testing.assert_identical(original, roundtripped)
+
+
+@given(datasets_1d_vars())
+def test_roundtrip_dataset(dataset):
+    df = dataset.to_dataframe()
+    assert isinstance(df, pd.DataFrame)
+    roundtripped = xr.Dataset(df)
+    xr.testing.assert_identical(dataset, roundtripped)
+
+
+@given(numeric_series, st.text())
+def test_roundtrip_pandas_series(ser, ix_name):
+    # Need to name the index, otherwise Xarray calls it 'dim_0'.
+    ser.index.name = ix_name
+    arr = xr.DataArray(ser)
+    roundtripped = arr.to_pandas()
+    pd.testing.assert_series_equal(ser, roundtripped)
+    xr.testing.assert_identical(arr, roundtripped.to_xarray())
+
+
+# Dataframes with columns of all the same dtype - for roundtrip to DataArray
+numeric_homogeneous_dataframe = numeric_dtypes.flatmap(
+    lambda dt: pdst.data_frames(columns=pdst.columns(["a", "b", "c"], dtype=dt))
+)
+
+
+@pytest.mark.xfail
+@given(numeric_homogeneous_dataframe)
+def test_roundtrip_pandas_dataframe(df):
+    # Need to name the indexes, otherwise Xarray names them 'dim_0', 'dim_1'.
+    df.index.name = "rows"
+    df.columns.name = "cols"
+    arr = xr.DataArray(df)
+    roundtripped = arr.to_pandas()
+    pd.testing.assert_frame_equal(df, roundtripped)
+    xr.testing.assert_identical(arr, roundtripped.to_xarray())
diff --git a/setup.cfg b/setup.cfg
index eee8b2477b2..fec2ca6bbe4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -117,4 +117,7 @@ tag_prefix = v
 parentdir_prefix = xarray-
 
 [aliases]
-test = pytest
\ No newline at end of file
+test = pytest
+
+[pytest-watch]
+nobeep = True
\ No newline at end of file
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 199516116b0..d23594fc675 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -677,7 +677,7 @@ def open_dataarray(
             "then select the variable you want."
         )
     else:
-        data_array, = dataset.data_vars.values()
+        (data_array,) = dataset.data_vars.values()
 
     data_array._file_obj = dataset._file_obj
 
diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 0174088064b..965ddd8f043 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -39,34 +39,6 @@
 )
 
 
-def _import_cftime():
-    """
-    helper function handle the transition to netcdftime/cftime
-    as a stand-alone package
-    """
-    try:
-        import cftime
-    except ImportError:
-        # in netCDF4 the num2date/date2num function are top-level api
-        try:
-            import netCDF4 as cftime
-        except ImportError:
-            raise ImportError("Failed to import cftime")
-    return cftime
-
-
-def _require_standalone_cftime():
-    """Raises an ImportError if the standalone cftime is not found"""
-    try:
-        import cftime  # noqa: F401
-    except ImportError:
-        raise ImportError(
-            "Decoding times with non-standard calendars "
-            "or outside the pandas.Timestamp-valid range "
-            "requires the standalone cftime package."
-        )
-
-
 def _netcdf_to_numpy_timeunit(units):
     units = units.lower()
     if not units.endswith("s"):
@@ -119,16 +91,11 @@ def _decode_cf_datetime_dtype(data, units, calendar, use_cftime):
 
 
 def _decode_datetime_with_cftime(num_dates, units, calendar):
-    cftime = _import_cftime()
+    import cftime
 
-    if cftime.__name__ == "cftime":
-        return np.asarray(
-            cftime.num2date(num_dates, units, calendar, only_use_cftime_datetimes=True)
-        )
-    else:
-        # Must be using num2date from an old version of netCDF4 which
-        # does not have the only_use_cftime_datetimes option.
-        return np.asarray(cftime.num2date(num_dates, units, calendar))
+    return np.asarray(
+        cftime.num2date(num_dates, units, calendar, only_use_cftime_datetimes=True)
+    )
 
 
 def _decode_datetime_with_pandas(flat_num_dates, units, calendar):
@@ -354,7 +321,7 @@ def _encode_datetime_with_cftime(dates, units, calendar):
     This method is more flexible than xarray's parsing using datetime64[ns]
     arrays but also slower because it loops over each element.
     """
-    cftime = _import_cftime()
+    import cftime
 
     if np.issubdtype(dates.dtype, np.datetime64):
         # numpy's broken datetime conversion only works for us precision
diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
index 1a33cb955c3..41ff5a3b32d 100644
--- a/xarray/core/alignment.py
+++ b/xarray/core/alignment.py
@@ -252,7 +252,7 @@ def align(
 
     if not indexes and len(objects) == 1:
         # fast path for the trivial case
-        obj, = objects
+        (obj,) = objects
         return (obj.copy(deep=copy),)
 
     all_indexes = defaultdict(list)
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 19c327ec597..3308dcef285 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -954,7 +954,7 @@ def _auto_concat(
                     "supply the ``concat_dim`` argument "
                     "explicitly"
                 )
-            dim, = concat_dims
+            (dim,) = concat_dims
         return concat(
             datasets,
             dim=dim,
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 1393d76f283..bb5ab07d8dd 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -145,7 +145,7 @@ def result_name(objects: list) -> Any:
     names = {getattr(obj, "name", _DEFAULT_NAME) for obj in objects}
     names.discard(_DEFAULT_NAME)
     if len(names) == 1:
-        name, = names
+        (name,) = names
     else:
         name = None
     return name
@@ -187,7 +187,7 @@ def build_output_coords(
 
     if len(coords_list) == 1 and not exclude_dims:
         # we can skip the expensive merge
-        unpacked_coords, = coords_list
+        (unpacked_coords,) = coords_list
         merged_vars = dict(unpacked_coords.variables)
     else:
         # TODO: save these merged indexes, instead of re-computing them later
@@ -237,7 +237,7 @@ def apply_dataarray_vfunc(
             for variable, coords in zip(result_var, result_coords)
         )
     else:
-        coords, = result_coords
+        (coords,) = result_coords
         out = DataArray(result_var, coords, name=name, fastpath=True)
 
     return out
@@ -384,7 +384,7 @@ def apply_dataset_vfunc(
     if signature.num_outputs > 1:
         out = tuple(_fast_dataset(*args) for args in zip(result_vars, list_of_coords))
     else:
-        coord_vars, = list_of_coords
+        (coord_vars,) = list_of_coords
         out = _fast_dataset(result_vars, coord_vars)
 
     if keep_attrs and isinstance(first_obj, Dataset):
@@ -884,7 +884,7 @@ def apply_ufunc(
     Plain scalars, numpy arrays and a mix of these with xarray objects is also
     supported:
 
-    >>> magnitude(4, 5)
+    >>> magnitude(3, 4)
     5.0
     >>> magnitude(3, np.array([0, 4]))
     array([3., 5.])
@@ -1055,9 +1055,9 @@ def dot(*arrays, dims=None, **kwargs):
     ----------
     arrays: DataArray (or Variable) objects
         Arrays to compute.
-    dims: str or tuple of strings, optional
-        Which dimensions to sum over.
-        If not speciified, then all the common dimensions are summed over.
+    dims: '...', str or tuple of strings, optional
+        Which dimensions to sum over. Ellipsis ('...') sums over all dimensions.
+        If not specified, then all the common dimensions are summed over.
     **kwargs: dict
         Additional keyword arguments passed to numpy.einsum or
         dask.array.einsum
@@ -1070,7 +1070,7 @@ def dot(*arrays, dims=None, **kwargs):
     --------
 
     >>> import numpy as np
-    >>> import xarray as xp
+    >>> import xarray as xr
     >>> da_a = xr.DataArray(np.arange(3 * 2).reshape(3, 2), dims=['a', 'b'])
     >>> da_b = xr.DataArray(np.arange(3 * 2 * 2).reshape(3, 2, 2),
     ...                     dims=['a', 'b', 'c'])
@@ -1117,6 +1117,14 @@ def dot(*arrays, dims=None, **kwargs):
            [273, 446, 619]])
     Dimensions without coordinates: a, d
 
+    >>> xr.dot(da_a, da_b)
+    <xarray.DataArray (c: 2)>
+    array([110, 125])
+    Dimensions without coordinates: c
+
+    >>> xr.dot(da_a, da_b, dims=...)
+    <xarray.DataArray ()>
+    array(235)
     """
     from .dataarray import DataArray
     from .variable import Variable
@@ -1141,7 +1149,9 @@ def dot(*arrays, dims=None, **kwargs):
     einsum_axes = "abcdefghijklmnopqrstuvwxyz"
     dim_map = {d: einsum_axes[i] for i, d in enumerate(all_dims)}
 
-    if dims is None:
+    if dims is ...:
+        dims = all_dims
+    elif dims is None:
         # find dimensions that occur more than one times
         dim_counts = Counter()
         for arr in arrays:
diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index bcab136de8d..0d19990bdd0 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -148,10 +148,10 @@ def _calc_concat_dim_coord(dim):
         dim = dim_name
     elif not isinstance(dim, DataArray):
         coord = as_variable(dim).to_index_variable()
-        dim, = coord.dims
+        (dim,) = coord.dims
     else:
         coord = dim
-        dim, = coord.dims
+        (dim,) = coord.dims
     return dim, coord
 
 
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 206c9172429..8d1f1d878e8 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -53,7 +53,7 @@
 from .formatting import format_item
 from .indexes import Indexes, default_indexes
 from .options import OPTIONS
-from .utils import ReprObject, _check_inplace, either_dict_or_kwargs
+from .utils import Default, ReprObject, _default, _check_inplace, either_dict_or_kwargs
 from .variable import (
     IndexVariable,
     Variable,
@@ -270,8 +270,6 @@ class DataArray(AbstractArray, DataWithCoords):
     _coarsen_cls = rolling.DataArrayCoarsen
     _resample_cls = resample.DataArrayResample
 
-    __default = ReprObject("<default>")
-
     dt = property(DatetimeAccessor)
 
     def __init__(
@@ -387,18 +385,18 @@ def _replace(
         self,
         variable: Variable = None,
         coords=None,
-        name: Optional[Hashable] = __default,
+        name: Union[Hashable, None, Default] = _default,
     ) -> "DataArray":
         if variable is None:
             variable = self.variable
         if coords is None:
             coords = self._coords
-        if name is self.__default:
+        if name is _default:
             name = self.name
         return type(self)(variable, coords, name=name, fastpath=True)
 
     def _replace_maybe_drop_dims(
-        self, variable: Variable, name: Optional[Hashable] = __default
+        self, variable: Variable, name: Union[Hashable, None, Default] = _default
     ) -> "DataArray":
         if variable.dims == self.dims and variable.shape == self.shape:
             coords = self._coords.copy()
@@ -438,7 +436,7 @@ def _to_temp_dataset(self) -> Dataset:
         return self._to_dataset_whole(name=_THIS_ARRAY, shallow_copy=False)
 
     def _from_temp_dataset(
-        self, dataset: Dataset, name: Hashable = __default
+        self, dataset: Dataset, name: Hashable = _default
     ) -> "DataArray":
         variable = dataset._variables.pop(_THIS_ARRAY)
         coords = dataset._variables
@@ -616,7 +614,7 @@ def _level_coords(self) -> Dict[Hashable, Hashable]:
             if var.ndim == 1 and isinstance(var, IndexVariable):
                 level_names = var.level_names
                 if level_names is not None:
-                    dim, = var.dims
+                    (dim,) = var.dims
                     level_coords.update({lname: dim for lname in level_names})
         return level_coords
 
@@ -754,6 +752,9 @@ def reset_coords(
             dataset[self.name] = self.variable
             return dataset
 
+    def __dask_tokenize__(self):
+        return (type(self), self._variable, self._coords, self._name)
+
     def __dask_graph__(self):
         return self._to_temp_dataset().__dask_graph__()
 
@@ -1863,12 +1864,7 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra
         Dataset.transpose
         """
         if dims:
-            if set(dims) ^ set(self.dims):
-                raise ValueError(
-                    "arguments to transpose (%s) must be "
-                    "permuted array dimensions (%s)" % (dims, tuple(self.dims))
-                )
-
+            dims = tuple(utils.infix_dims(dims, self.dims))
         variable = self.variable.transpose(*dims)
         if transpose_coords:
             coords: Dict[Hashable, Variable] = {}
@@ -2481,13 +2477,11 @@ def identical(self, other: "DataArray") -> bool:
         except (TypeError, AttributeError):
             return False
 
-    __default_name = object()
-
     def _result_name(self, other: Any = None) -> Optional[Hashable]:
         # use the same naming heuristics as pandas:
         # https://github.com/ContinuumIO/blaze/issues/458#issuecomment-51936356
-        other_name = getattr(other, "name", self.__default_name)
-        if other_name is self.__default_name or other_name == self.name:
+        other_name = getattr(other, "name", _default)
+        if other_name is _default or other_name == self.name:
             return self.name
         else:
             return None
@@ -2773,9 +2767,9 @@ def dot(
         ----------
         other : DataArray
             The other array with which the dot product is performed.
-        dims: hashable or sequence of hashables, optional
-            Along which dimensions to be summed over. Default all the common
-            dimensions are summed over.
+        dims: '...', hashable or sequence of hashables, optional
+            Which dimensions to sum over. Ellipsis ('...') sums over all dimensions.
+            If not specified, then all the common dimensions are summed over.
 
         Returns
         -------
@@ -3026,7 +3020,7 @@ def integrate(
         """ integrate the array with the trapezoidal rule.
 
         .. note::
-            This feature is limited to simple cartesian geometry, i.e. coord
+            This feature is limited to simple cartesian geometry, i.e. dim
             must be one dimensional.
 
         Parameters
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 1741207536f..16229946cac 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -70,8 +70,10 @@
 from .options import OPTIONS, _get_keep_attrs
 from .pycompat import dask_array_type
 from .utils import (
+    Default,
     Frozen,
     SortedKeysDict,
+    _default,
     _check_inplace,
     decode_numpy_dict_values,
     either_dict_or_kwargs,
@@ -649,6 +651,9 @@ def load(self, **kwargs) -> "Dataset":
 
         return self
 
+    def __dask_tokenize__(self):
+        return (type(self), self._variables, self._coord_names, self._attrs)
+
     def __dask_graph__(self):
         graphs = {k: v.__dask_graph__() for k, v in self.variables.items()}
         graphs = {k: v for k, v in graphs.items() if v is not None}
@@ -856,23 +861,18 @@ def _construct_direct(
         obj._accessors = None
         return obj
 
-    __default = object()
-
     @classmethod
     def _from_vars_and_coord_names(cls, variables, coord_names, attrs=None):
         return cls._construct_direct(variables, coord_names, attrs=attrs)
 
-    # TODO(shoyer): renable type checking on this signature when pytype has a
-    # good way to handle defaulting arguments to a sentinel value:
-    # https://github.com/python/mypy/issues/1803
-    def _replace(  # type: ignore
+    def _replace(
         self,
         variables: Dict[Hashable, Variable] = None,
         coord_names: Set[Hashable] = None,
         dims: Dict[Any, int] = None,
-        attrs: Optional[Dict[Hashable, Any]] = __default,
-        indexes: Optional[Dict[Any, pd.Index]] = __default,
-        encoding: Optional[dict] = __default,
+        attrs: Union[Dict[Hashable, Any], None, Default] = _default,
+        indexes: Union[Dict[Any, pd.Index], None, Default] = _default,
+        encoding: Union[dict, None, Default] = _default,
         inplace: bool = False,
     ) -> "Dataset":
         """Fastpath constructor for internal use.
@@ -890,12 +890,12 @@ def _replace(  # type: ignore
                 self._coord_names = coord_names
             if dims is not None:
                 self._dims = dims
-            if attrs is not self.__default:
-                self._attrs = attrs
-            if indexes is not self.__default:
-                self._indexes = indexes
-            if encoding is not self.__default:
-                self._encoding = encoding
+            if attrs is not _default:
+                self._attrs = attrs  # type: ignore # FIXME need mypy 0.750
+            if indexes is not _default:
+                self._indexes = indexes  # type: ignore # FIXME need mypy 0.750
+            if encoding is not _default:
+                self._encoding = encoding  # type: ignore # FIXME need mypy 0.750
             obj = self
         else:
             if variables is None:
@@ -904,23 +904,23 @@ def _replace(  # type: ignore
                 coord_names = self._coord_names.copy()
             if dims is None:
                 dims = self._dims.copy()
-            if attrs is self.__default:
+            if attrs is _default:
                 attrs = copy.copy(self._attrs)
-            if indexes is self.__default:
+            if indexes is _default:
                 indexes = copy.copy(self._indexes)
-            if encoding is self.__default:
+            if encoding is _default:
                 encoding = copy.copy(self._encoding)
             obj = self._construct_direct(
                 variables, coord_names, dims, attrs, indexes, encoding
             )
         return obj
 
-    def _replace_with_new_dims(  # type: ignore
+    def _replace_with_new_dims(
         self,
         variables: Dict[Hashable, Variable],
         coord_names: set = None,
-        attrs: Optional[Dict[Hashable, Any]] = __default,
-        indexes: Dict[Hashable, pd.Index] = __default,
+        attrs: Union[Dict[Hashable, Any], None, Default] = _default,
+        indexes: Union[Dict[Hashable, pd.Index], None, Default] = _default,
         inplace: bool = False,
     ) -> "Dataset":
         """Replace variables with recalculated dimensions."""
@@ -929,12 +929,12 @@ def _replace_with_new_dims(  # type: ignore
             variables, coord_names, dims, attrs, indexes, inplace=inplace
         )
 
-    def _replace_vars_and_dims(  # type: ignore
+    def _replace_vars_and_dims(
         self,
         variables: Dict[Hashable, Variable],
         coord_names: set = None,
         dims: Dict[Hashable, int] = None,
-        attrs: Dict[Hashable, Any] = __default,
+        attrs: Union[Dict[Hashable, Any], None, Default] = _default,
         inplace: bool = False,
     ) -> "Dataset":
         """Deprecated version of _replace_with_new_dims().
@@ -3542,7 +3542,6 @@ def drop(  # noqa: F811
         ----------
         labels : hashable or iterable of hashables
             Name(s) of variables or index labels to drop.
-            If dim is not None, labels can be any array-like.
         dim : None or hashable, optional
             Dimension along which to drop index labels. By default (if
             ``dim is None``), drops variables rather than index labels.
@@ -3712,14 +3711,14 @@ def transpose(self, *dims: Hashable) -> "Dataset":
         DataArray.transpose
         """
         if dims:
-            if set(dims) ^ set(self.dims):
+            if set(dims) ^ set(self.dims) and ... not in dims:
                 raise ValueError(
                     "arguments to transpose (%s) must be "
                     "permuted dataset dimensions (%s)" % (dims, tuple(self.dims))
                 )
         ds = self.copy()
         for name, var in self._variables.items():
-            var_dims = tuple(dim for dim in dims if dim in var.dims)
+            var_dims = tuple(dim for dim in dims if dim in (var.dims + (...,)))
             ds._variables[name] = var.transpose(*var_dims)
         return ds
 
@@ -4091,7 +4090,7 @@ def reduce(
                     if len(reduce_dims) == 1:
                         # unpack dimensions for the benefit of functions
                         # like np.argmin which can't handle tuple arguments
-                        reduce_dims, = reduce_dims
+                        (reduce_dims,) = reduce_dims
                     elif len(reduce_dims) == var.ndim:
                         # prefer to aggregate over axis=None rather than
                         # axis=(0, 1) if they will be equivalent, because
@@ -5190,7 +5189,7 @@ def integrate(self, coord, datetime_unit=None):
 
         Parameters
         ----------
-        dim: str, or a sequence of str
+        coord: str, or a sequence of str
             Coordinate(s) used for the integration.
         datetime_unit
             Can be specify the unit if datetime coordinate is used. One of
@@ -5205,6 +5204,34 @@ def integrate(self, coord, datetime_unit=None):
         --------
         DataArray.integrate
         numpy.trapz: corresponding numpy function
+
+        Examples
+        --------
+        >>> ds = xr.Dataset(
+        ...     data_vars={"a": ("x", [5, 5, 6, 6]), "b": ("x", [1, 2, 1, 0])},
+        ...     coords={"x": [0, 1, 2, 3], "y": ("x", [1, 7, 3, 5])},
+        ... )
+        >>> ds
+        <xarray.Dataset>
+        Dimensions:  (x: 4)
+        Coordinates:
+          * x        (x) int64 0 1 2 3
+            y        (x) int64 1 7 3 5
+        Data variables:
+            a        (x) int64 5 5 6 6
+            b        (x) int64 1 2 1 0
+        >>> ds.integrate("x")
+        <xarray.Dataset>
+        Dimensions:  ()
+        Data variables:
+            a        float64 16.5
+            b        float64 3.5
+        >>> ds.integrate("y")
+        <xarray.Dataset>
+        Dimensions:  ()
+        Data variables:
+            a        float64 20.0
+            b        float64 4.0
         """
         if not isinstance(coord, (list, tuple)):
             coord = (coord,)
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 68bd28ddb12..353566eb345 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -15,6 +15,7 @@
 from .utils import (
     either_dict_or_kwargs,
     hashable,
+    is_scalar,
     maybe_wrap_array,
     peek_at,
     safe_cast_to_index,
@@ -22,6 +23,18 @@
 from .variable import IndexVariable, Variable, as_variable
 
 
+def check_reduce_dims(reduce_dims, dimensions):
+
+    if reduce_dims is not ...:
+        if is_scalar(reduce_dims):
+            reduce_dims = [reduce_dims]
+        if any([dim not in dimensions for dim in reduce_dims]):
+            raise ValueError(
+                "cannot reduce over dimensions %r. expected either '...' to reduce over all dimensions or one or more of %r."
+                % (reduce_dims, dimensions)
+            )
+
+
 def unique_value_groups(ar, sort=True):
     """Group an array by its unique values.
 
@@ -308,7 +321,7 @@ def __init__(
             raise ValueError("`group` must have a name")
 
         group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj)
-        group_dim, = group.dims
+        (group_dim,) = group.dims
 
         expected_size = obj.sizes[group_dim]
         if group.size != expected_size:
@@ -348,6 +361,13 @@ def __init__(
                 group_indices = [slice(i, i + 1) for i in group_indices]
             unique_coord = group
         else:
+            if group.isnull().any():
+                # drop any NaN valued groups.
+                # also drop obj values where group was NaN
+                # Use where instead of reindex to account for duplicate coordinate labels.
+                obj = obj.where(group.notnull(), drop=True)
+                group = group.dropna(group_dim)
+
             # look through group to find the unique values
             unique_values, group_indices = unique_value_groups(
                 safe_cast_to_index(group), sort=(bins is None)
@@ -450,7 +470,7 @@ def _infer_concat_args(self, applied_example):
         else:
             coord = self._unique_coord
             positions = None
-        dim, = coord.dims
+        (dim,) = coord.dims
         if isinstance(coord, _DummyGroup):
             coord = None
         return coord, dim, positions
@@ -624,7 +644,7 @@ def _concat_shortcut(self, applied, dim, positions=None):
     def _restore_dim_order(self, stacked):
         def lookup_order(dimension):
             if dimension == self._group.name:
-                dimension, = self._group.dims
+                (dimension,) = self._group.dims
             if dimension in self._obj.dims:
                 axis = self._obj.get_axis_num(dimension)
             else:
@@ -794,15 +814,11 @@ def reduce(
         if keep_attrs is None:
             keep_attrs = _get_keep_attrs(default=False)
 
-        if dim is not ... and dim not in self.dims:
-            raise ValueError(
-                "cannot reduce over dimension %r. expected either '...' to reduce over all dimensions or one or more of %r."
-                % (dim, self.dims)
-            )
-
         def reduce_array(ar):
             return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs)
 
+        check_reduce_dims(dim, self.dims)
+
         return self.apply(reduce_array, shortcut=shortcut)
 
 
@@ -895,11 +911,7 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs):
         def reduce_dataset(ds):
             return ds.reduce(func, dim, keep_attrs, **kwargs)
 
-        if dim is not ... and dim not in self.dims:
-            raise ValueError(
-                "cannot reduce over dimension %r. expected either '...' to reduce over all dimensions or one or more of %r."
-                % (dim, self.dims)
-            )
+        check_reduce_dims(dim, self.dims)
 
         return self.apply(reduce_dataset)
 
diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index b9809a8d2b9..f48c9e72af1 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -212,7 +212,7 @@ def get_dim_indexers(data_obj, indexers):
     level_indexers = defaultdict(dict)
     dim_indexers = {}
     for key, label in indexers.items():
-        dim, = data_obj[key].dims
+        (dim,) = data_obj[key].dims
         if key != dim:
             # assume here multi-index level indexer
             level_indexers[dim][key] = label
@@ -1368,7 +1368,7 @@ def __getitem__(
         if isinstance(key, tuple) and len(key) == 1:
             # unpack key so it can index a pandas.Index object (pandas.Index
             # objects don't like tuples)
-            key, = key
+            (key,) = key
 
         if getattr(key, "ndim", 0) > 1:  # Return np-array if multidimensional
             return NumpyIndexingAdapter(self.array.values)[indexer]
diff --git a/xarray/core/merge.py b/xarray/core/merge.py
index db5ef9531df..389ceb155f7 100644
--- a/xarray/core/merge.py
+++ b/xarray/core/merge.py
@@ -277,7 +277,7 @@ def append_all(variables, indexes):
 
 
 def collect_from_coordinates(
-    list_of_coords: "List[Coordinates]"
+    list_of_coords: "List[Coordinates]",
 ) -> Dict[Hashable, List[MergeElement]]:
     """Collect variables and indexes to be merged from Coordinate objects."""
     grouped: Dict[Hashable, List[Tuple[Variable, pd.Index]]] = {}
@@ -320,7 +320,7 @@ def merge_coordinates_without_align(
 
 
 def determine_coords(
-    list_of_mappings: Iterable["DatasetLike"]
+    list_of_mappings: Iterable["DatasetLike"],
 ) -> Tuple[Set[Hashable], Set[Hashable]]:
     """Given a list of dicts with xarray object values, identify coordinates.
 
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index 6befe0b5efc..6681375c18e 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -6,10 +6,12 @@
 import os.path
 import re
 import warnings
+from enum import Enum
 from typing import (
     AbstractSet,
     Any,
     Callable,
+    Collection,
     Container,
     Dict,
     Hashable,
@@ -660,6 +662,30 @@ def __len__(self) -> int:
         return len(self._data) - num_hidden
 
 
+def infix_dims(dims_supplied: Collection, dims_all: Collection) -> Iterator:
+    """
+    Resolves a supplied list containing an ellispsis representing other items, to
+    a generator with the 'realized' list of all items
+    """
+    if ... in dims_supplied:
+        if len(set(dims_all)) != len(dims_all):
+            raise ValueError("Cannot use ellipsis with repeated dims")
+        if len([d for d in dims_supplied if d == ...]) > 1:
+            raise ValueError("More than one ellipsis supplied")
+        other_dims = [d for d in dims_all if d not in dims_supplied]
+        for d in dims_supplied:
+            if d == ...:
+                yield from other_dims
+            else:
+                yield d
+    else:
+        if set(dims_supplied) ^ set(dims_all):
+            raise ValueError(
+                f"{dims_supplied} must be a permuted list of {dims_all}, unless `...` is included"
+            )
+        yield from dims_supplied
+
+
 def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable:
     """ Get an new dimension name based on new_dim, that is not used in dims.
     If the same name exists, we add an underscore(s) in the head.
@@ -676,3 +702,11 @@ def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable:
     while new_dim in dims:
         new_dim = "_" + str(new_dim)
     return new_dim
+
+
+# Singleton type, as per https://github.com/python/typing/pull/240
+class Default(Enum):
+    token = 0
+
+
+_default = Default.token
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 93ad1eafb97..117ab85ae65 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -25,6 +25,7 @@
     OrderedSet,
     decode_numpy_dict_values,
     either_dict_or_kwargs,
+    infix_dims,
     ensure_us_time_resolution,
 )
 
@@ -389,6 +390,11 @@ def compute(self, **kwargs):
         new = self.copy(deep=False)
         return new.load(**kwargs)
 
+    def __dask_tokenize__(self):
+        # Use v.data, instead of v._data, in order to cope with the wrappers
+        # around NetCDF and the like
+        return type(self), self._dims, self.data, self._attrs
+
     def __dask_graph__(self):
         if isinstance(self._data, dask_array_type):
             return self._data.__dask_graph__()
@@ -1228,6 +1234,7 @@ def transpose(self, *dims) -> "Variable":
         """
         if len(dims) == 0:
             dims = self.dims[::-1]
+        dims = tuple(infix_dims(dims, self.dims))
         axes = self.get_axis_num(dims)
         if len(dims) < 2:  # no need to transpose if only one dimension
             return self.copy(deep=False)
@@ -1524,7 +1531,7 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False):
             along the given dimension.
         """
         if not isinstance(dim, str):
-            dim, = dim.dims
+            (dim,) = dim.dims
 
         # can't do this lazily: we need to loop through variables at least
         # twice
@@ -1961,6 +1968,10 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False):
         if not isinstance(self._data, PandasIndexAdapter):
             self._data = PandasIndexAdapter(self._data)
 
+    def __dask_tokenize__(self):
+        # Don't waste time converting pd.Index to np.ndarray
+        return (type(self), self._dims, self._data.array, self._attrs)
+
     def load(self):
         # data is already loaded into memory for IndexVariable
         return self
@@ -1994,7 +2005,7 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False):
         arrays, if possible.
         """
         if not isinstance(dim, str):
-            dim, = dim.dims
+            (dim,) = dim.dims
 
         variables = list(variables)
         first_var = variables[0]
diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py
index a288f195e32..ca68f617144 100644
--- a/xarray/plot/plot.py
+++ b/xarray/plot/plot.py
@@ -83,8 +83,8 @@ def _infer_line_data(darray, x, y, hue):
                     )
 
             else:
-                xdim, = darray[xname].dims
-                huedim, = darray[huename].dims
+                (xdim,) = darray[xname].dims
+                (huedim,) = darray[huename].dims
                 yplt = darray.transpose(xdim, huedim)
 
         else:
@@ -102,8 +102,8 @@ def _infer_line_data(darray, x, y, hue):
                     )
 
             else:
-                ydim, = darray[yname].dims
-                huedim, = darray[huename].dims
+                (ydim,) = darray[yname].dims
+                (huedim,) = darray[huename].dims
                 xplt = darray.transpose(ydim, huedim)
 
         huelabel = label_from_attrs(darray[huename])
diff --git a/xarray/static/css/style.css b/xarray/static/css/style.css
index 536b8ab6103..7e382de3b5b 100644
--- a/xarray/static/css/style.css
+++ b/xarray/static/css/style.css
@@ -2,6 +2,17 @@
  *
  */
 
+:root {
+  --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));
+  --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));
+  --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));
+  --xr-border-color: var(--jp-border-color2, #e0e0e0);
+  --xr-disabled-color: var(--jp-layout-color3, #bdbdbd);
+  --xr-background-color: var(--jp-layout-color0, white);
+  --xr-background-color-row-even: var(--jp-layout-color1, white);
+  --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);
+}
+
 .xr-wrap {
   min-width: 300px;
   max-width: 700px;
@@ -11,7 +22,7 @@
   padding-top: 6px;
   padding-bottom: 6px;
   margin-bottom: 4px;
-  border-bottom: solid 1px #ddd;
+  border-bottom: solid 1px var(--xr-border-color);
 }
 
 .xr-header > div,
@@ -28,11 +39,7 @@
 }
 
 .xr-obj-type {
-  color: #555;
-}
-
-.xr-array-name {
-  color: #000;
+  color: var(--xr-font-color2);
 }
 
 .xr-sections {
@@ -50,21 +57,21 @@
 }
 
 .xr-section-item input + label {
-  color: #ccc;
+  color: var(--xr-disabled-color);
 }
 
 .xr-section-item input:enabled + label {
   cursor: pointer;
-  color: #555;
+  color: var(--xr-font-color2);
 }
 
 .xr-section-item input:enabled + label:hover {
-  color: #000;
+  color: var(--xr-font-color0);
 }
 
 .xr-section-summary {
   grid-column: 1;
-  color: #555;
+  color: var(--xr-font-color2);
   font-weight: 500;
 }
 
@@ -74,7 +81,7 @@
 }
 
 .xr-section-summary-in:disabled + label {
-  color: #555;
+  color: var(--xr-font-color2);
 }
 
 .xr-section-summary-in + label:before {
@@ -86,7 +93,7 @@
 }
 
 .xr-section-summary-in:disabled + label:before {
-  color: #ccc;
+  color: var(--xr-disabled-color);
 }
 
 .xr-section-summary-in:checked + label:before {
@@ -129,7 +136,7 @@
 }
 
 .xr-preview {
-  color: #888;
+  color: var(--xr-font-color3);
 }
 
 .xr-array-preview,
@@ -186,7 +193,7 @@
 .xr-var-item > div,
 .xr-var-item label,
 .xr-var-item > .xr-var-name span {
-  background-color: #fcfcfc;
+  background-color: var(--xr-background-color-row-even);
   margin-bottom: 0;
 }
 
@@ -197,7 +204,7 @@
 .xr-var-list > li:nth-child(odd) > div,
 .xr-var-list > li:nth-child(odd) > label,
 .xr-var-list > li:nth-child(odd) > .xr-var-name span {
-  background-color: #efefef;
+  background-color: var(--xr-background-color-row-odd);
 }
 
 .xr-var-name {
@@ -211,7 +218,7 @@
 .xr-var-dtype {
   grid-column: 3;
   text-align: right;
-  color: #555;
+  color: var(--xr-font-color2);
 }
 
 .xr-var-preview {
@@ -241,7 +248,7 @@
 .xr-var-attrs,
 .xr-var-data {
   display: none;
-  background-color: #fff !important;
+  background-color: var(--xr-background-color) !important;
   padding-bottom: 5px !important;
 }
 
@@ -288,7 +295,7 @@ dl.xr-attrs {
 
 .xr-attrs dt:hover span {
   display: inline-block;
-  background: #fff;
+  background: var(--xr-background-color);
   padding-right: 10px;
 }
 
diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index 88476e5e730..6592360cdf2 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -78,10 +78,6 @@ def LooseVersion(vstring):
 requires_scipy_or_netCDF4 = pytest.mark.skipif(
     not has_scipy_or_netCDF4, reason="requires scipy or netCDF4"
 )
-has_cftime_or_netCDF4 = has_cftime or has_netCDF4
-requires_cftime_or_netCDF4 = pytest.mark.skipif(
-    not has_cftime_or_netCDF4, reason="requires cftime or netCDF4"
-)
 try:
     import_seaborn()
     has_seaborn = True
@@ -158,18 +154,21 @@ def source_ndarray(array):
 
 
 def assert_equal(a, b):
+    __tracebackhide__ = True
     xarray.testing.assert_equal(a, b)
     xarray.testing._assert_internal_invariants(a)
     xarray.testing._assert_internal_invariants(b)
 
 
 def assert_identical(a, b):
+    __tracebackhide__ = True
     xarray.testing.assert_identical(a, b)
     xarray.testing._assert_internal_invariants(a)
     xarray.testing._assert_internal_invariants(b)
 
 
 def assert_allclose(a, b, **kwargs):
+    __tracebackhide__ = True
     xarray.testing.assert_allclose(a, b, **kwargs)
     xarray.testing._assert_internal_invariants(a)
     xarray.testing._assert_internal_invariants(b)
diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py
index 0058747db71..5fe5b8c3f59 100644
--- a/xarray/tests/test_accessor_dt.py
+++ b/xarray/tests/test_accessor_dt.py
@@ -7,10 +7,8 @@
 from . import (
     assert_array_equal,
     assert_equal,
-    has_cftime,
-    has_cftime_or_netCDF4,
-    has_dask,
     raises_regex,
+    requires_cftime,
     requires_dask,
 )
 
@@ -199,7 +197,7 @@ def times_3d(times):
     )
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     "field", ["year", "month", "day", "hour", "dayofyear", "dayofweek"]
 )
@@ -217,7 +215,7 @@ def test_field_access(data, field):
     assert_equal(result, expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_cftime_strftime_access(data):
     """ compare cftime formatting against datetime formatting """
     date_format = "%Y%m%d%H"
@@ -232,8 +230,8 @@ def test_cftime_strftime_access(data):
     assert_equal(result, expected)
 
 
-@pytest.mark.skipif(not has_dask, reason="dask not installed")
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
+@requires_dask
 @pytest.mark.parametrize(
     "field", ["year", "month", "day", "hour", "dayofyear", "dayofweek"]
 )
@@ -254,8 +252,8 @@ def test_dask_field_access_1d(data, field):
     assert_equal(result.compute(), expected)
 
 
-@pytest.mark.skipif(not has_dask, reason="dask not installed")
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
+@requires_dask
 @pytest.mark.parametrize(
     "field", ["year", "month", "day", "hour", "dayofyear", "dayofweek"]
 )
@@ -286,7 +284,7 @@ def cftime_date_type(calendar):
     return _all_cftime_date_types()[calendar]
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_seasons(cftime_date_type):
     dates = np.array([cftime_date_type(2000, month, 15) for month in range(1, 13)])
     dates = xr.DataArray(dates)
@@ -307,15 +305,3 @@ def test_seasons(cftime_date_type):
     seasons = xr.DataArray(seasons)
 
     assert_array_equal(seasons.values, dates.dt.season.values)
-
-
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime or netCDF4 not installed")
-def test_dt_accessor_error_netCDF4(cftime_date_type):
-    da = xr.DataArray(
-        [cftime_date_type(1, 1, 1), cftime_date_type(2, 1, 1)], dims=["time"]
-    )
-    if not has_cftime:
-        with pytest.raises(TypeError):
-            da.dt.month
-    else:
-        da.dt.month
diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py
index 142769dbbe7..343e059f53c 100644
--- a/xarray/tests/test_cftime_offsets.py
+++ b/xarray/tests/test_cftime_offsets.py
@@ -1187,5 +1187,5 @@ def test_dayofyear_after_cftime_range(freq):
 def test_cftime_range_standard_calendar_refers_to_gregorian():
     from cftime import DatetimeGregorian
 
-    result, = cftime_range("2000", periods=1)
+    (result,) = cftime_range("2000", periods=1)
     assert isinstance(result, DatetimeGregorian)
diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py
index e49dc72abdd..a8ee3c97042 100644
--- a/xarray/tests/test_cftimeindex.py
+++ b/xarray/tests/test_cftimeindex.py
@@ -15,7 +15,7 @@
 )
 from xarray.tests import assert_array_equal, assert_identical
 
-from . import has_cftime, has_cftime_or_netCDF4, raises_regex, requires_cftime
+from . import raises_regex, requires_cftime
 from .test_coding_times import (
     _ALL_CALENDARS,
     _NON_STANDARD_CALENDARS,
@@ -653,7 +653,7 @@ def test_indexing_in_dataframe_iloc(df, index):
     assert result.equals(expected)
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 def test_concat_cftimeindex(date_type):
     da1 = xr.DataArray(
         [1.0, 2.0], coords=[[date_type(1, 1, 1), date_type(1, 2, 1)]], dims=["time"]
@@ -663,11 +663,7 @@ def test_concat_cftimeindex(date_type):
     )
     da = xr.concat([da1, da2], dim="time")
 
-    if has_cftime:
-        assert isinstance(da.indexes["time"], CFTimeIndex)
-    else:
-        assert isinstance(da.indexes["time"], pd.Index)
-        assert not isinstance(da.indexes["time"], CFTimeIndex)
+    assert isinstance(da.indexes["time"], CFTimeIndex)
 
 
 @requires_cftime
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 021d76e2b11..d012fb36c35 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -8,7 +8,6 @@
 
 from xarray import DataArray, Dataset, Variable, coding, decode_cf
 from xarray.coding.times import (
-    _import_cftime,
     cftime_to_nptime,
     decode_cf_datetime,
     encode_cf_datetime,
@@ -19,15 +18,7 @@
 from xarray.core.common import contains_cftime_datetimes
 from xarray.testing import assert_equal
 
-from . import (
-    arm_xfail,
-    assert_array_equal,
-    has_cftime,
-    has_cftime_or_netCDF4,
-    has_dask,
-    requires_cftime,
-    requires_cftime_or_netCDF4,
-)
+from . import arm_xfail, assert_array_equal, has_cftime, requires_cftime, requires_dask
 
 _NON_STANDARD_CALENDARS_SET = {
     "noleap",
@@ -79,10 +70,8 @@
 
 
 def _all_cftime_date_types():
-    try:
-        import cftime
-    except ImportError:
-        import netcdftime as cftime
+    import cftime
+
     return {
         "noleap": cftime.DatetimeNoLeap,
         "365_day": cftime.DatetimeNoLeap,
@@ -95,16 +84,14 @@ def _all_cftime_date_types():
     }
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(["num_dates", "units", "calendar"], _CF_DATETIME_TESTS)
 def test_cf_datetime(num_dates, units, calendar):
-    cftime = _import_cftime()
-    if cftime.__name__ == "cftime":
-        expected = cftime.num2date(
-            num_dates, units, calendar, only_use_cftime_datetimes=True
-        )
-    else:
-        expected = cftime.num2date(num_dates, units, calendar)
+    import cftime
+
+    expected = cftime.num2date(
+        num_dates, units, calendar, only_use_cftime_datetimes=True
+    )
     min_y = np.ravel(np.atleast_1d(expected))[np.nanargmin(num_dates)].year
     max_y = np.ravel(np.atleast_1d(expected))[np.nanargmax(num_dates)].year
     if min_y >= 1678 and max_y < 2262:
@@ -138,15 +125,12 @@ def test_cf_datetime(num_dates, units, calendar):
             assert_array_equal(num_dates, np.around(encoded, 1))
 
 
-@requires_cftime_or_netCDF4
+@requires_cftime
 def test_decode_cf_datetime_overflow():
     # checks for
     # https://github.com/pydata/pandas/issues/14068
     # https://github.com/pydata/xarray/issues/975
-    try:
-        from cftime import DatetimeGregorian
-    except ImportError:
-        from netcdftime import DatetimeGregorian
+    from cftime import DatetimeGregorian
 
     datetime = DatetimeGregorian
     units = "days since 2000-01-01 00:00:00"
@@ -171,7 +155,7 @@ def test_decode_cf_datetime_non_standard_units():
     assert_array_equal(actual, expected)
 
 
-@requires_cftime_or_netCDF4
+@requires_cftime
 def test_decode_cf_datetime_non_iso_strings():
     # datetime strings that are _almost_ ISO compliant but not quite,
     # but which cftime.num2date can still parse correctly
@@ -190,10 +174,10 @@ def test_decode_cf_datetime_non_iso_strings():
         assert (abs_diff <= np.timedelta64(1, "s")).all()
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
 def test_decode_standard_calendar_inside_timestamp_range(calendar):
-    cftime = _import_cftime()
+    import cftime
 
     units = "days since 0001-01-01"
     times = pd.date_range("2001-04-01-00", end="2001-04-30-23", freq="H")
@@ -210,21 +194,18 @@ def test_decode_standard_calendar_inside_timestamp_range(calendar):
     assert (abs_diff <= np.timedelta64(1, "s")).all()
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS)
 def test_decode_non_standard_calendar_inside_timestamp_range(calendar):
-    cftime = _import_cftime()
+    import cftime
+
     units = "days since 0001-01-01"
     times = pd.date_range("2001-04-01-00", end="2001-04-30-23", freq="H")
     non_standard_time = cftime.date2num(times.to_pydatetime(), units, calendar=calendar)
 
-    if cftime.__name__ == "cftime":
-        expected = cftime.num2date(
-            non_standard_time, units, calendar=calendar, only_use_cftime_datetimes=True
-        )
-    else:
-        expected = cftime.num2date(non_standard_time, units, calendar=calendar)
-
+    expected = cftime.num2date(
+        non_standard_time, units, calendar=calendar, only_use_cftime_datetimes=True
+    )
     expected_dtype = np.dtype("O")
 
     actual = coding.times.decode_cf_datetime(
@@ -238,24 +219,19 @@ def test_decode_non_standard_calendar_inside_timestamp_range(calendar):
     assert (abs_diff <= np.timedelta64(1, "s")).all()
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _ALL_CALENDARS)
 def test_decode_dates_outside_timestamp_range(calendar):
+    import cftime
     from datetime import datetime
 
-    cftime = _import_cftime()
-
     units = "days since 0001-01-01"
     times = [datetime(1, 4, 1, h) for h in range(1, 5)]
     time = cftime.date2num(times, units, calendar=calendar)
 
-    if cftime.__name__ == "cftime":
-        expected = cftime.num2date(
-            time, units, calendar=calendar, only_use_cftime_datetimes=True
-        )
-    else:
-        expected = cftime.num2date(time, units, calendar=calendar)
-
+    expected = cftime.num2date(
+        time, units, calendar=calendar, only_use_cftime_datetimes=True
+    )
     expected_date_type = type(expected[0])
 
     with warnings.catch_warnings():
@@ -269,7 +245,7 @@ def test_decode_dates_outside_timestamp_range(calendar):
     assert (abs_diff <= np.timedelta64(1, "s")).all()
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
 def test_decode_standard_calendar_single_element_inside_timestamp_range(calendar):
     units = "days since 0001-01-01"
@@ -280,7 +256,7 @@ def test_decode_standard_calendar_single_element_inside_timestamp_range(calendar
         assert actual.dtype == np.dtype("M8[ns]")
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS)
 def test_decode_non_standard_calendar_single_element_inside_timestamp_range(calendar):
     units = "days since 0001-01-01"
@@ -291,10 +267,11 @@ def test_decode_non_standard_calendar_single_element_inside_timestamp_range(cale
         assert actual.dtype == np.dtype("O")
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS)
 def test_decode_single_element_outside_timestamp_range(calendar):
-    cftime = _import_cftime()
+    import cftime
+
     units = "days since 0001-01-01"
     for days in [1, 1470376]:
         for num_time in [days, [days], [[days]]]:
@@ -304,20 +281,16 @@ def test_decode_single_element_outside_timestamp_range(calendar):
                     num_time, units, calendar=calendar
                 )
 
-            if cftime.__name__ == "cftime":
-                expected = cftime.num2date(
-                    days, units, calendar, only_use_cftime_datetimes=True
-                )
-            else:
-                expected = cftime.num2date(days, units, calendar)
-
+            expected = cftime.num2date(
+                days, units, calendar, only_use_cftime_datetimes=True
+            )
             assert isinstance(actual.item(), type(expected))
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
 def test_decode_standard_calendar_multidim_time_inside_timestamp_range(calendar):
-    cftime = _import_cftime()
+    import cftime
 
     units = "days since 0001-01-01"
     times1 = pd.date_range("2001-04-01", end="2001-04-05", freq="D")
@@ -343,10 +316,10 @@ def test_decode_standard_calendar_multidim_time_inside_timestamp_range(calendar)
     assert (abs_diff2 <= np.timedelta64(1, "s")).all()
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS)
 def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range(calendar):
-    cftime = _import_cftime()
+    import cftime
 
     units = "days since 0001-01-01"
     times1 = pd.date_range("2001-04-01", end="2001-04-05", freq="D")
@@ -382,13 +355,12 @@ def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range(calend
     assert (abs_diff2 <= np.timedelta64(1, "s")).all()
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _ALL_CALENDARS)
 def test_decode_multidim_time_outside_timestamp_range(calendar):
+    import cftime
     from datetime import datetime
 
-    cftime = _import_cftime()
-
     units = "days since 0001-01-01"
     times1 = [datetime(1, 4, day) for day in range(1, 6)]
     times2 = [datetime(1, 5, day) for day in range(1, 6)]
@@ -398,16 +370,8 @@ def test_decode_multidim_time_outside_timestamp_range(calendar):
     mdim_time[:, 0] = time1
     mdim_time[:, 1] = time2
 
-    if cftime.__name__ == "cftime":
-        expected1 = cftime.num2date(
-            time1, units, calendar, only_use_cftime_datetimes=True
-        )
-        expected2 = cftime.num2date(
-            time2, units, calendar, only_use_cftime_datetimes=True
-        )
-    else:
-        expected1 = cftime.num2date(time1, units, calendar)
-        expected2 = cftime.num2date(time2, units, calendar)
+    expected1 = cftime.num2date(time1, units, calendar, only_use_cftime_datetimes=True)
+    expected2 = cftime.num2date(time2, units, calendar, only_use_cftime_datetimes=True)
 
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore", "Unable to decode time axis")
@@ -424,46 +388,38 @@ def test_decode_multidim_time_outside_timestamp_range(calendar):
     assert (abs_diff2 <= np.timedelta64(1, "s")).all()
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", ["360_day", "all_leap", "366_day"])
 def test_decode_non_standard_calendar_single_element(calendar):
-    cftime = _import_cftime()
+    import cftime
+
     units = "days since 0001-01-01"
 
-    try:
-        dt = cftime.netcdftime.datetime(2001, 2, 29)
-    except AttributeError:
-        # Must be using the standalone cftime library
-        dt = cftime.datetime(2001, 2, 29)
+    dt = cftime.datetime(2001, 2, 29)
 
     num_time = cftime.date2num(dt, units, calendar)
     actual = coding.times.decode_cf_datetime(num_time, units, calendar=calendar)
 
-    if cftime.__name__ == "cftime":
-        expected = np.asarray(
-            cftime.num2date(num_time, units, calendar, only_use_cftime_datetimes=True)
-        )
-    else:
-        expected = np.asarray(cftime.num2date(num_time, units, calendar))
+    expected = np.asarray(
+        cftime.num2date(num_time, units, calendar, only_use_cftime_datetimes=True)
+    )
     assert actual.dtype == np.dtype("O")
     assert expected == actual
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 def test_decode_360_day_calendar():
-    cftime = _import_cftime()
+    import cftime
+
     calendar = "360_day"
     # ensure leap year doesn't matter
     for year in [2010, 2011, 2012, 2013, 2014]:
         units = f"days since {year}-01-01"
         num_times = np.arange(100)
 
-        if cftime.__name__ == "cftime":
-            expected = cftime.num2date(
-                num_times, units, calendar, only_use_cftime_datetimes=True
-            )
-        else:
-            expected = cftime.num2date(num_times, units, calendar)
+        expected = cftime.num2date(
+            num_times, units, calendar, only_use_cftime_datetimes=True
+        )
 
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("always")
@@ -477,7 +433,7 @@ def test_decode_360_day_calendar():
 
 
 @arm_xfail
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     ["num_dates", "units", "expected_list"],
     [
@@ -499,7 +455,7 @@ def test_cf_datetime_nan(num_dates, units, expected_list):
     assert_array_equal(expected, actual)
 
 
-@requires_cftime_or_netCDF4
+@requires_cftime
 def test_decoded_cf_datetime_array_2d():
     # regression test for GH1229
     variable = Variable(
@@ -548,7 +504,7 @@ def test_infer_datetime_units(dates, expected):
 ]
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     "calendar", _NON_STANDARD_CALENDARS + ["gregorian", "proleptic_gregorian"]
 )
@@ -622,7 +578,7 @@ def test_infer_timedelta_units(deltas, expected):
     assert expected == coding.times.infer_timedelta_units(deltas)
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     ["date_args", "expected"],
     [
@@ -649,7 +605,7 @@ def test_decode_cf(calendar):
         ds[v].attrs["units"] = "days since 2001-01-01"
         ds[v].attrs["calendar"] = calendar
 
-    if not has_cftime_or_netCDF4 and calendar not in _STANDARD_CALENDARS:
+    if not has_cftime and calendar not in _STANDARD_CALENDARS:
         with pytest.raises(ValueError):
             ds = decode_cf(ds)
     else:
@@ -703,7 +659,7 @@ def test_decode_cf_time_bounds():
     _update_bounds_attributes(ds.variables)
 
 
-@requires_cftime_or_netCDF4
+@requires_cftime
 def test_encode_time_bounds():
 
     time = pd.date_range("2000-01-16", periods=1)
@@ -749,7 +705,7 @@ def calendar(request):
 
 @pytest.fixture()
 def times(calendar):
-    cftime = _import_cftime()
+    import cftime
 
     return cftime.num2date(
         np.arange(4),
@@ -779,24 +735,24 @@ def times_3d(times):
     )
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_contains_cftime_datetimes_1d(data):
     assert contains_cftime_datetimes(data.time)
 
 
-@pytest.mark.skipif(not has_dask, reason="dask not installed")
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
+@requires_dask
 def test_contains_cftime_datetimes_dask_1d(data):
     assert contains_cftime_datetimes(data.time.chunk())
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_contains_cftime_datetimes_3d(times_3d):
     assert contains_cftime_datetimes(times_3d)
 
 
-@pytest.mark.skipif(not has_dask, reason="dask not installed")
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
+@requires_dask
 def test_contains_cftime_datetimes_dask_3d(times_3d):
     assert contains_cftime_datetimes(times_3d.chunk())
 
@@ -806,13 +762,13 @@ def test_contains_cftime_datetimes_non_cftimes(non_cftime_data):
     assert not contains_cftime_datetimes(non_cftime_data)
 
 
-@pytest.mark.skipif(not has_dask, reason="dask not installed")
+@requires_dask
 @pytest.mark.parametrize("non_cftime_data", [DataArray([]), DataArray([1, 2])])
 def test_contains_cftime_datetimes_non_cftimes_dask(non_cftime_data):
     assert not contains_cftime_datetimes(non_cftime_data.chunk())
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("shape", [(24,), (8, 3), (2, 4, 3)])
 def test_encode_cf_datetime_overflow(shape):
     # Test for fix to GH 2272
@@ -837,7 +793,7 @@ def test_encode_cf_datetime_pandas_min():
     assert calendar == expected_calendar
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 def test_time_units_with_timezone_roundtrip(calendar):
     # Regression test for GH 2649
     expected_units = "days since 2000-01-01T00:00:00-05:00"
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 383427b479b..1f2634cc9b0 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -998,6 +998,23 @@ def test_dot(use_dask):
     assert actual.dims == ("b",)
     assert (actual.data == np.zeros(actual.shape)).all()
 
+    # Ellipsis (...) sums over all dimensions
+    actual = xr.dot(da_a, da_b, dims=...)
+    assert actual.dims == ()
+    assert (actual.data == np.einsum("ij,ijk->", a, b)).all()
+
+    actual = xr.dot(da_a, da_b, da_c, dims=...)
+    assert actual.dims == ()
+    assert (actual.data == np.einsum("ij,ijk,kl-> ", a, b, c)).all()
+
+    actual = xr.dot(da_a, dims=...)
+    assert actual.dims == ()
+    assert (actual.data == np.einsum("ij-> ", a)).all()
+
+    actual = xr.dot(da_a.sel(a=[]), da_a.sel(a=[]), dims=...)
+    assert actual.dims == ()
+    assert (actual.data == np.zeros(actual.shape)).all()
+
     # Invalid cases
     if not use_dask:
         with pytest.raises(TypeError):
diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
index 42b2a679347..09002e252b4 100644
--- a/xarray/tests/test_conventions.py
+++ b/xarray/tests/test_conventions.py
@@ -21,7 +21,7 @@
 from . import (
     assert_array_equal,
     raises_regex,
-    requires_cftime_or_netCDF4,
+    requires_cftime,
     requires_dask,
     requires_netCDF4,
 )
@@ -81,7 +81,7 @@ def test_decode_cf_with_conflicting_fill_missing_value():
     assert_identical(actual, expected)
 
 
-@requires_cftime_or_netCDF4
+@requires_cftime
 class TestEncodeCFVariable:
     def test_incompatible_attributes(self):
         invalid_vars = [
@@ -144,7 +144,7 @@ def test_string_object_warning(self):
         assert_identical(original, encoded)
 
 
-@requires_cftime_or_netCDF4
+@requires_cftime
 class TestDecodeCF:
     def test_dataset(self):
         original = Dataset(
@@ -226,7 +226,7 @@ def test_invalid_time_units_raises_eagerly(self):
         with raises_regex(ValueError, "unable to decode time"):
             decode_cf(ds)
 
-    @requires_cftime_or_netCDF4
+    @requires_cftime
     def test_dataset_repr_with_netcdf4_datetimes(self):
         # regression test for #347
         attrs = {"units": "days since 0001-01-01", "calendar": "noleap"}
@@ -239,7 +239,7 @@ def test_dataset_repr_with_netcdf4_datetimes(self):
         ds = decode_cf(Dataset({"time": ("time", [0, 1], attrs)}))
         assert "(time) datetime64[ns]" in repr(ds)
 
-    @requires_cftime_or_netCDF4
+    @requires_cftime
     def test_decode_cf_datetime_transition_to_invalid(self):
         # manually create dataset with not-decoded date
         from datetime import datetime
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index 50517ae3c9c..c4323d1d317 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -1,5 +1,6 @@
 import operator
 import pickle
+import sys
 from contextlib import suppress
 from distutils.version import LooseVersion
 from textwrap import dedent
@@ -21,12 +22,16 @@
     assert_frame_equal,
     assert_identical,
     raises_regex,
+    requires_scipy_or_netCDF4,
 )
+from .test_backends import create_tmp_file
 
 dask = pytest.importorskip("dask")
 da = pytest.importorskip("dask.array")
 dd = pytest.importorskip("dask.dataframe")
 
+ON_WINDOWS = sys.platform == "win32"
+
 
 class CountingScheduler:
     """ Simple dask scheduler counting the number of computes.
@@ -1135,3 +1140,92 @@ def test_make_meta(map_ds):
     for variable in map_ds.data_vars:
         assert variable in meta.data_vars
         assert meta.data_vars[variable].shape == (0,) * meta.data_vars[variable].ndim
+
+
+@pytest.mark.parametrize(
+    "obj", [make_da(), make_da().compute(), make_ds(), make_ds().compute()]
+)
+@pytest.mark.parametrize(
+    "transform",
+    [
+        lambda x: x.reset_coords(),
+        lambda x: x.reset_coords(drop=True),
+        lambda x: x.isel(x=1),
+        lambda x: x.attrs.update(new_attrs=1),
+        lambda x: x.assign_coords(cxy=1),
+        lambda x: x.rename({"x": "xnew"}),
+        lambda x: x.rename({"cxy": "cxynew"}),
+    ],
+)
+def test_token_changes_on_transform(obj, transform):
+    with raise_if_dask_computes():
+        assert dask.base.tokenize(obj) != dask.base.tokenize(transform(obj))
+
+
+@pytest.mark.parametrize(
+    "obj", [make_da(), make_da().compute(), make_ds(), make_ds().compute()]
+)
+def test_token_changes_when_data_changes(obj):
+    with raise_if_dask_computes():
+        t1 = dask.base.tokenize(obj)
+
+    # Change data_var
+    if isinstance(obj, DataArray):
+        obj *= 2
+    else:
+        obj["a"] *= 2
+    with raise_if_dask_computes():
+        t2 = dask.base.tokenize(obj)
+    assert t2 != t1
+
+    # Change non-index coord
+    obj.coords["ndcoord"] *= 2
+    with raise_if_dask_computes():
+        t3 = dask.base.tokenize(obj)
+    assert t3 != t2
+
+    # Change IndexVariable
+    obj.coords["x"] *= 2
+    with raise_if_dask_computes():
+        t4 = dask.base.tokenize(obj)
+    assert t4 != t3
+
+
+@pytest.mark.parametrize("obj", [make_da().compute(), make_ds().compute()])
+def test_token_changes_when_buffer_changes(obj):
+    with raise_if_dask_computes():
+        t1 = dask.base.tokenize(obj)
+
+    if isinstance(obj, DataArray):
+        obj[0, 0] = 123
+    else:
+        obj["a"][0, 0] = 123
+    with raise_if_dask_computes():
+        t2 = dask.base.tokenize(obj)
+    assert t2 != t1
+
+    obj.coords["ndcoord"][0] = 123
+    with raise_if_dask_computes():
+        t3 = dask.base.tokenize(obj)
+    assert t3 != t2
+
+
+@pytest.mark.parametrize(
+    "transform",
+    [lambda x: x, lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)],
+)
+@pytest.mark.parametrize("obj", [make_da(), make_ds(), make_ds().variables["a"]])
+def test_token_identical(obj, transform):
+    with raise_if_dask_computes():
+        assert dask.base.tokenize(obj) == dask.base.tokenize(transform(obj))
+    assert dask.base.tokenize(obj.compute()) == dask.base.tokenize(
+        transform(obj.compute())
+    )
+
+
+@requires_scipy_or_netCDF4
+def test_normalize_token_with_backend(map_ds):
+    with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as tmp_file:
+        map_ds.to_netcdf(tmp_file)
+        read = xr.open_dataset(tmp_file)
+        assert not dask.base.tokenize(map_ds) == dask.base.tokenize(read)
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index b13527bc098..5114d13b0dc 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -2068,6 +2068,10 @@ def test_transpose(self):
         )
         assert_equal(expected, actual)
 
+        # same as previous but with ellipsis
+        actual = da.transpose("z", ..., "x", transpose_coords=True)
+        assert_equal(expected, actual)
+
         with pytest.raises(ValueError):
             da.transpose("x", "y")
 
@@ -2560,15 +2564,6 @@ def change_metadata(x):
         expected = change_metadata(expected)
         assert_equal(expected, actual)
 
-    def test_groupby_reduce_dimension_error(self):
-        array = self.make_groupby_example_array()
-        grouped = array.groupby("y")
-        with raises_regex(ValueError, "cannot reduce over dimension 'y'"):
-            grouped.mean()
-
-        grouped = array.groupby("y", squeeze=False)
-        assert_identical(array, grouped.mean())
-
     def test_groupby_math(self):
         array = self.make_groupby_example_array()
         for squeeze in [True, False]:
@@ -3130,11 +3125,11 @@ def test_align_copy(self):
 
         # Trivial align - 1 element
         x = DataArray([1, 2, 3], coords=[("a", [1, 2, 3])])
-        x2, = align(x, copy=False)
+        (x2,) = align(x, copy=False)
         assert_identical(x, x2)
         assert source_ndarray(x2.data) is source_ndarray(x.data)
 
-        x2, = align(x, copy=True)
+        (x2,) = align(x, copy=True)
         assert_identical(x, x2)
         assert source_ndarray(x2.data) is not source_ndarray(x.data)
 
@@ -3219,7 +3214,7 @@ def test_align_indexes(self):
         assert_identical(expected_x2, x2)
         assert_identical(expected_y2, y2)
 
-        x2, = align(x, join="outer", indexes={"a": [-2, 7, 10, -1]})
+        (x2,) = align(x, join="outer", indexes={"a": [-2, 7, 10, -1]})
         expected_x2 = DataArray([3, np.nan, 2, 1], coords=[("a", [-2, 7, 10, -1])])
         assert_identical(expected_x2, x2)
 
@@ -3298,7 +3293,7 @@ def test_broadcast_arrays_nocopy(self):
         assert source_ndarray(x2.data) is source_ndarray(x.data)
 
         # single-element broadcast (trivial case)
-        x2, = broadcast(x)
+        (x2,) = broadcast(x)
         assert_identical(x, x2)
         assert source_ndarray(x2.data) is source_ndarray(x.data)
 
@@ -3930,6 +3925,16 @@ def test_dot(self):
         expected = DataArray(expected_vals, coords=[x, j], dims=["x", "j"])
         assert_equal(expected, actual)
 
+        # Ellipsis: all dims are shared
+        actual = da.dot(da, dims=...)
+        expected = da.dot(da)
+        assert_equal(expected, actual)
+
+        # Ellipsis: not all dims are shared
+        actual = da.dot(dm, dims=...)
+        expected = da.dot(dm, dims=("j", "x", "y", "z"))
+        assert_equal(expected, actual)
+
         with pytest.raises(NotImplementedError):
             da.dot(dm.to_dataset(name="dm"))
         with pytest.raises(TypeError):
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index b3ffdf68e3f..eab6040e17e 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -1945,7 +1945,7 @@ def test_align_nocopy(self):
 
     def test_align_indexes(self):
         x = Dataset({"foo": DataArray([1, 2, 3], dims="x", coords=[("x", [1, 2, 3])])})
-        x2, = align(x, indexes={"x": [2, 3, 1]})
+        (x2,) = align(x, indexes={"x": [2, 3, 1]})
         expected_x2 = Dataset(
             {"foo": DataArray([2, 3, 1], dims="x", coords={"x": [2, 3, 1]})}
         )
@@ -1973,7 +1973,7 @@ def test_broadcast(self):
             },
             {"c": ("x", [4])},
         )
-        actual, = broadcast(ds)
+        (actual,) = broadcast(ds)
         assert_identical(expected, actual)
 
         ds_x = Dataset({"foo": ("x", [1])})
@@ -1995,7 +1995,7 @@ def test_broadcast_nocopy(self):
         x = Dataset({"foo": (("x", "y"), [[1, 1]])})
         y = Dataset({"bar": ("y", [2, 3])})
 
-        actual_x, = broadcast(x)
+        (actual_x,) = broadcast(x)
         assert_identical(x, actual_x)
         assert source_ndarray(actual_x["foo"].data) is source_ndarray(x["foo"].data)
 
@@ -2117,25 +2117,31 @@ def test_drop_variables(self):
     def test_drop_index_labels(self):
         data = Dataset({"A": (["x", "y"], np.random.randn(2, 3)), "x": ["a", "b"]})
 
-        actual = data.drop(["a"], "x")
+        with pytest.warns(DeprecationWarning):
+            actual = data.drop(["a"], "x")
         expected = data.isel(x=[1])
         assert_identical(expected, actual)
 
-        actual = data.drop(["a", "b"], "x")
+        with pytest.warns(DeprecationWarning):
+            actual = data.drop(["a", "b"], "x")
         expected = data.isel(x=slice(0, 0))
         assert_identical(expected, actual)
 
         with pytest.raises(KeyError):
             # not contained in axis
-            data.drop(["c"], dim="x")
+            with pytest.warns(DeprecationWarning):
+                data.drop(["c"], dim="x")
 
-        actual = data.drop(["c"], dim="x", errors="ignore")
+        with pytest.warns(DeprecationWarning):
+            actual = data.drop(["c"], dim="x", errors="ignore")
         assert_identical(data, actual)
 
         with pytest.raises(ValueError):
-            data.drop(["c"], dim="x", errors="wrong_value")
+            with pytest.warns(DeprecationWarning):
+                data.drop(["c"], dim="x", errors="wrong_value")
 
-        actual = data.drop(["a", "b", "c"], "x", errors="ignore")
+        with pytest.warns(DeprecationWarning):
+            actual = data.drop(["a", "b", "c"], "x", errors="ignore")
         expected = data.isel(x=slice(0, 0))
         assert_identical(expected, actual)
 
@@ -4675,6 +4681,10 @@ def test_dataset_transpose(self):
         )
         assert_identical(expected, actual)
 
+        actual = ds.transpose(...)
+        expected = ds
+        assert_identical(expected, actual)
+
         actual = ds.transpose("x", "y")
         expected = ds.apply(lambda x: x.transpose("x", "y", transpose_coords=True))
         assert_identical(expected, actual)
@@ -4690,13 +4700,32 @@ def test_dataset_transpose(self):
             expected_dims = tuple(d for d in new_order if d in ds[k].dims)
             assert actual[k].dims == expected_dims
 
-        with raises_regex(ValueError, "arguments to transpose"):
+        # same as above but with ellipsis
+        new_order = ("dim2", "dim3", "dim1", "time")
+        actual = ds.transpose("dim2", "dim3", ...)
+        for k in ds.variables:
+            expected_dims = tuple(d for d in new_order if d in ds[k].dims)
+            assert actual[k].dims == expected_dims
+
+        with raises_regex(ValueError, "permuted"):
             ds.transpose("dim1", "dim2", "dim3")
-        with raises_regex(ValueError, "arguments to transpose"):
+        with raises_regex(ValueError, "permuted"):
             ds.transpose("dim1", "dim2", "dim3", "time", "extra_dim")
 
         assert "T" not in dir(ds)
 
+    def test_dataset_ellipsis_transpose_different_ordered_vars(self):
+        # https://github.com/pydata/xarray/issues/1081#issuecomment-544350457
+        ds = Dataset(
+            dict(
+                a=(("w", "x", "y", "z"), np.ones((2, 3, 4, 5))),
+                b=(("x", "w", "y", "z"), np.zeros((3, 2, 4, 5))),
+            )
+        )
+        result = ds.transpose(..., "z", "y")
+        assert list(result["a"].dims) == list("wxzy")
+        assert list(result["b"].dims) == list("xwzy")
+
     def test_dataset_retains_period_index_on_transpose(self):
 
         ds = create_test_data()
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index eb073a14aae..9df2f167cf2 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -440,7 +440,9 @@ def test_argmin_max(dim_num, dtype, contains_nan, dask, func, skipna, aggdim):
             **{aggdim: getattr(da, "arg" + func)(dim=aggdim, skipna=skipna).compute()}
         )
         expected = getattr(da, func)(dim=aggdim, skipna=skipna)
-        assert_allclose(actual.drop(actual.coords), expected.drop(expected.coords))
+        assert_allclose(
+            actual.drop(list(actual.coords)), expected.drop(list(expected.coords))
+        )
 
 
 def test_argmin_max_error():
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index a6de41beb66..e2216547ac8 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -5,7 +5,23 @@
 import xarray as xr
 from xarray.core.groupby import _consolidate_slices
 
-from . import assert_identical, raises_regex
+from . import assert_allclose, assert_equal, assert_identical, raises_regex
+
+
+@pytest.fixture
+def dataset():
+    ds = xr.Dataset(
+        {"foo": (("x", "y", "z"), np.random.randn(3, 4, 2))},
+        {"x": ["a", "b", "c"], "y": [1, 2, 3, 4], "z": [1, 2]},
+    )
+    ds["boo"] = (("z", "y"), [["f", "g", "h", "j"]] * 2)
+
+    return ds
+
+
+@pytest.fixture
+def array(dataset):
+    return dataset["foo"]
 
 
 def test_consolidate_slices():
@@ -21,33 +37,25 @@ def test_consolidate_slices():
         _consolidate_slices([slice(3), 4])
 
 
-def test_groupby_dims_property():
-    ds = xr.Dataset(
-        {"foo": (("x", "y", "z"), np.random.randn(3, 4, 2))},
-        {"x": ["a", "bcd", "c"], "y": [1, 2, 3, 4], "z": [1, 2]},
-    )
+def test_groupby_dims_property(dataset):
+    assert dataset.groupby("x").dims == dataset.isel(x=1).dims
+    assert dataset.groupby("y").dims == dataset.isel(y=1).dims
 
-    assert ds.groupby("x").dims == ds.isel(x=1).dims
-    assert ds.groupby("y").dims == ds.isel(y=1).dims
-
-    stacked = ds.stack({"xy": ("x", "y")})
+    stacked = dataset.stack({"xy": ("x", "y")})
     assert stacked.groupby("xy").dims == stacked.isel(xy=0).dims
 
 
-def test_multi_index_groupby_apply():
+def test_multi_index_groupby_apply(dataset):
     # regression test for GH873
-    ds = xr.Dataset(
-        {"foo": (("x", "y"), np.random.randn(3, 4))},
-        {"x": ["a", "b", "c"], "y": [1, 2, 3, 4]},
-    )
-    doubled = 2 * ds
-    group_doubled = (
+    ds = dataset.isel(z=1, drop=True)[["foo"]]
+    expected = 2 * ds
+    actual = (
         ds.stack(space=["x", "y"])
         .groupby("space")
         .apply(lambda x: 2 * x)
         .unstack("space")
     )
-    assert doubled.equals(group_doubled)
+    assert_equal(expected, actual)
 
 
 def test_multi_index_groupby_sum():
@@ -58,7 +66,7 @@ def test_multi_index_groupby_sum():
     )
     expected = ds.sum("z")
     actual = ds.stack(space=["x", "y"]).groupby("space").sum("z").unstack("space")
-    assert expected.equals(actual)
+    assert_equal(expected, actual)
 
 
 def test_groupby_da_datetime():
@@ -78,7 +86,7 @@ def test_groupby_da_datetime():
     expected = xr.DataArray(
         [3, 7], coords=dict(reference_date=reference_dates), dims="reference_date"
     )
-    assert actual.equals(expected)
+    assert_equal(expected, actual)
 
 
 def test_groupby_duplicate_coordinate_labels():
@@ -86,7 +94,7 @@ def test_groupby_duplicate_coordinate_labels():
     array = xr.DataArray([1, 2, 3], [("x", [1, 1, 2])])
     expected = xr.DataArray([3, 3], [("x", [1, 2])])
     actual = array.groupby("x").sum()
-    assert expected.equals(actual)
+    assert_equal(expected, actual)
 
 
 def test_groupby_input_mutation():
@@ -255,6 +263,72 @@ def test_groupby_repr_datetime(obj):
     assert actual == expected
 
 
+def test_groupby_drops_nans():
+    # GH2383
+    # nan in 2D data variable (requires stacking)
+    ds = xr.Dataset(
+        {
+            "variable": (("lat", "lon", "time"), np.arange(60.0).reshape((4, 3, 5))),
+            "id": (("lat", "lon"), np.arange(12.0).reshape((4, 3))),
+        },
+        coords={"lat": np.arange(4), "lon": np.arange(3), "time": np.arange(5)},
+    )
+
+    ds["id"].values[0, 0] = np.nan
+    ds["id"].values[3, 0] = np.nan
+    ds["id"].values[-1, -1] = np.nan
+
+    grouped = ds.groupby(ds.id)
+
+    # non reduction operation
+    expected = ds.copy()
+    expected.variable.values[0, 0, :] = np.nan
+    expected.variable.values[-1, -1, :] = np.nan
+    expected.variable.values[3, 0, :] = np.nan
+    actual = grouped.apply(lambda x: x).transpose(*ds.variable.dims)
+    assert_identical(actual, expected)
+
+    # reduction along grouped dimension
+    actual = grouped.mean()
+    stacked = ds.stack({"xy": ["lat", "lon"]})
+    expected = (
+        stacked.variable.where(stacked.id.notnull()).rename({"xy": "id"}).to_dataset()
+    )
+    expected["id"] = stacked.id.values
+    assert_identical(actual, expected.dropna("id").transpose(*actual.dims))
+
+    # reduction operation along a different dimension
+    actual = grouped.mean("time")
+    expected = ds.mean("time").where(ds.id.notnull())
+    assert_identical(actual, expected)
+
+    # NaN in non-dimensional coordinate
+    array = xr.DataArray([1, 2, 3], [("x", [1, 2, 3])])
+    array["x1"] = ("x", [1, 1, np.nan])
+    expected = xr.DataArray(3, [("x1", [1])])
+    actual = array.groupby("x1").sum()
+    assert_equal(expected, actual)
+
+    # NaT in non-dimensional coordinate
+    array["t"] = (
+        "x",
+        [
+            np.datetime64("2001-01-01"),
+            np.datetime64("2001-01-01"),
+            np.datetime64("NaT"),
+        ],
+    )
+    expected = xr.DataArray(3, [("t", [np.datetime64("2001-01-01")])])
+    actual = array.groupby("t").sum()
+    assert_equal(expected, actual)
+
+    # test for repeated coordinate labels
+    array = xr.DataArray([0, 1, 2, 4, 3, 4], [("x", [np.nan, 1, 1, np.nan, 2, np.nan])])
+    expected = xr.DataArray([3, 3], [("x", [1, 2])])
+    actual = array.groupby("x").sum()
+    assert_equal(expected, actual)
+
+
 def test_groupby_grouping_errors():
     dataset = xr.Dataset({"foo": ("x", [1, 1, 1])}, {"x": [1, 2, 3]})
     with raises_regex(ValueError, "None of the data falls within bins with edges"):
@@ -276,6 +350,24 @@ def test_groupby_grouping_errors():
         dataset.to_array().groupby(dataset.foo * np.nan)
 
 
+def test_groupby_reduce_dimension_error(array):
+    grouped = array.groupby("y")
+    with raises_regex(ValueError, "cannot reduce over dimensions"):
+        grouped.mean()
+
+    with raises_regex(ValueError, "cannot reduce over dimensions"):
+        grouped.mean("huh")
+
+    with raises_regex(ValueError, "cannot reduce over dimensions"):
+        grouped.mean(("x", "y", "asd"))
+
+    grouped = array.groupby("y", squeeze=False)
+    assert_identical(array, grouped.mean())
+
+    assert_identical(array.mean("x"), grouped.reduce(np.mean, "x"))
+    assert_allclose(array.mean(["x", "z"]), grouped.reduce(np.mean, ["x", "z"]))
+
+
 def test_groupby_bins_timeseries():
     ds = xr.Dataset()
     ds["time"] = xr.DataArray(
diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py
index 73c4b9b8c74..8e2d4b8e064 100644
--- a/xarray/tests/test_sparse.py
+++ b/xarray/tests/test_sparse.py
@@ -11,7 +11,7 @@
 from xarray.core.npcompat import IS_NEP18_ACTIVE
 from xarray.core.pycompat import sparse_array_type
 
-from . import assert_equal, assert_identical
+from . import assert_equal, assert_identical, requires_dask
 
 param = pytest.param
 xfail = pytest.mark.xfail
@@ -849,3 +849,23 @@ def test_chunk():
     dsc = ds.chunk(2)
     assert dsc.chunks == {"dim_0": (2, 2)}
     assert_identical(dsc, ds)
+
+
+@requires_dask
+def test_dask_token():
+    import dask
+
+    s = sparse.COO.from_numpy(np.array([0, 0, 1, 2]))
+    a = DataArray(s)
+    t1 = dask.base.tokenize(a)
+    t2 = dask.base.tokenize(a)
+    t3 = dask.base.tokenize(a + 1)
+    assert t1 == t2
+    assert t3 != t2
+    assert isinstance(a.data, sparse.COO)
+
+    ac = a.chunk(2)
+    t4 = dask.base.tokenize(ac)
+    t5 = dask.base.tokenize(ac + 1)
+    assert t4 != t5
+    assert isinstance(ac.data._meta, sparse.COO)
diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py
index c36e8a1775d..af87b94393d 100644
--- a/xarray/tests/test_utils.py
+++ b/xarray/tests/test_utils.py
@@ -9,7 +9,7 @@
 from xarray.core import duck_array_ops, utils
 from xarray.core.utils import either_dict_or_kwargs
 
-from . import assert_array_equal, has_cftime, has_cftime_or_netCDF4, requires_dask
+from . import assert_array_equal, requires_cftime, requires_dask
 from .test_coding_times import _all_cftime_date_types
 
 
@@ -39,17 +39,12 @@ def test_safe_cast_to_index():
         assert expected.dtype == actual.dtype
 
 
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 def test_safe_cast_to_index_cftimeindex():
     date_types = _all_cftime_date_types()
     for date_type in date_types.values():
         dates = [date_type(1, 1, day) for day in range(1, 20)]
-
-        if has_cftime:
-            expected = CFTimeIndex(dates)
-        else:
-            expected = pd.Index(dates)
-
+        expected = CFTimeIndex(dates)
         actual = utils.safe_cast_to_index(np.array(dates))
         assert_array_equal(expected, actual)
         assert expected.dtype == actual.dtype
@@ -57,7 +52,7 @@ def test_safe_cast_to_index_cftimeindex():
 
 
 # Test that datetime.datetime objects are never used in a CFTimeIndex
-@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed")
+@requires_cftime
 def test_safe_cast_to_index_datetime_datetime():
     dates = [datetime(1, 1, day) for day in range(1, 20)]
 
@@ -275,3 +270,27 @@ def test_either_dict_or_kwargs():
 
     with pytest.raises(ValueError, match=r"foo"):
         result = either_dict_or_kwargs(dict(a=1), dict(a=1), "foo")
+
+
+@pytest.mark.parametrize(
+    ["supplied", "all_", "expected"],
+    [
+        (list("abc"), list("abc"), list("abc")),
+        (["a", ..., "c"], list("abc"), list("abc")),
+        (["a", ...], list("abc"), list("abc")),
+        (["c", ...], list("abc"), list("cab")),
+        ([..., "b"], list("abc"), list("acb")),
+        ([...], list("abc"), list("abc")),
+    ],
+)
+def test_infix_dims(supplied, all_, expected):
+    result = list(utils.infix_dims(supplied, all_))
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    ["supplied", "all_"], [([..., ...], list("abc")), ([...], list("aac"))]
+)
+def test_infix_dims_errors(supplied, all_):
+    with pytest.raises(ValueError):
+        list(utils.infix_dims(supplied, all_))
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 78723eda013..528027ed149 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -1280,6 +1280,9 @@ def test_transpose(self):
         w2 = Variable(["d", "b", "c", "a"], np.einsum("abcd->dbca", x))
         assert w2.shape == (5, 3, 4, 2)
         assert_identical(w2, w.transpose("d", "b", "c", "a"))
+        assert_identical(w2, w.transpose("d", ..., "a"))
+        assert_identical(w2, w.transpose("d", "b", "c", ...))
+        assert_identical(w2, w.transpose(..., "b", "c", "a"))
         assert_identical(w, w2.transpose("a", "b", "c", "d"))
         w3 = Variable(["b", "c", "d", "a"], np.einsum("abcd->bcda", x))
         assert_identical(w, w3.transpose("a", "b", "c", "d"))