diff --git a/README.rst b/README.rst index 53f51392a1a..5ee7234f221 100644 --- a/README.rst +++ b/README.rst @@ -138,4 +138,7 @@ under a "3-clause BSD" license: xarray also bundles portions of CPython, which is available under the "Python Software Foundation License" in xarray/core/pycompat.py. +xarray uses icons from the icomoon package (free version), which is +available under the "CC BY 4.0" license. + The full text of these licenses are included in the licenses directory. diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml index c99ae39e5d9..3f10a158f91 100644 --- a/ci/requirements/py36-min-all-deps.yml +++ b/ci/requirements/py36-min-all-deps.yml @@ -13,7 +13,7 @@ dependencies: - cartopy=0.17 - cdms2=3.1 - cfgrib=0.9 - - cftime=1.0.3 # FIXME need 1.0.5 (not released yet); 1.0.4 is broken + - cftime=1.0 - coveralls - dask=1.2 - distributed=1.27 diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml index 6e27cea2ffe..f9847ef6da5 100644 --- a/ci/requirements/py36.yml +++ b/ci/requirements/py36.yml @@ -9,7 +9,7 @@ dependencies: - cartopy - cdms2 - cfgrib - - cftime<1.0.4 # FIXME need 1.0.5 (not released yet); 1.0.4 is broken + - cftime - coveralls - dask - distributed diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml index 7027fc11ab7..111cd96c30c 100644 --- a/ci/requirements/py37-windows.yml +++ b/ci/requirements/py37-windows.yml @@ -9,7 +9,7 @@ dependencies: - cartopy # - cdms2 # Not available on Windows # - cfgrib # Causes Python interpreter crash on Windows - - cftime<1.0.4 # FIXME need 1.0.5 (not released yet); 1.0.4 is broken + - cftime - coveralls - dask - distributed diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml index a4c974c0176..d816019dd65 100644 --- a/ci/requirements/py37.yml +++ b/ci/requirements/py37.yml @@ -9,7 +9,7 @@ dependencies: - cartopy - cdms2 - cfgrib - - cftime<1.0.4 # FIXME need 1.0.5 (not released yet); 1.0.4 is broken + - cftime - coveralls - dask - distributed diff --git a/doc/data-structures.rst b/doc/data-structures.rst index f7b34036a03..d5567f4863e 100644 --- a/doc/data-structures.rst +++ b/doc/data-structures.rst @@ -411,7 +411,7 @@ Any variables using that dimension are dropped: As an alternate to dictionary-like modifications, you can use :py:meth:`~xarray.Dataset.assign` and :py:meth:`~xarray.Dataset.assign_coords`. -These methods return a new dataset with additional (or replaced) or values: +These methods return a new dataset with additional (or replaced) values: .. ipython:: python @@ -420,7 +420,7 @@ These methods return a new dataset with additional (or replaced) or values: There is also the :py:meth:`~xarray.Dataset.pipe` method that allows you to use a method call with an external function (e.g., ``ds.pipe(func)``) instead of simply calling it (e.g., ``func(ds)``). This allows you to write pipelines for -transforming you data (using "method chaining") instead of writing hard to +transforming your data (using "method chaining") instead of writing hard to follow nested function calls: .. ipython:: python diff --git a/doc/examples/monthly-means.rst b/doc/examples/monthly-means.rst index 7cc47eb2847..7d620f1bca3 100644 --- a/doc/examples/monthly-means.rst +++ b/doc/examples/monthly-means.rst @@ -83,7 +83,7 @@ the ``calendar.month_range`` function. for i, (month, year) in enumerate(zip(time.month, time.year)): month_length[i] = cal_days[month] - if leap_year(year, calendar=calendar): + if leap_year(year, calendar=calendar) and month == 2: month_length[i] += 1 return month_length diff --git a/doc/reshaping.rst b/doc/reshaping.rst index 51202f9be41..455a24f9216 100644 --- a/doc/reshaping.rst +++ b/doc/reshaping.rst @@ -18,12 +18,14 @@ Reordering dimensions --------------------- To reorder dimensions on a :py:class:`~xarray.DataArray` or across all variables -on a :py:class:`~xarray.Dataset`, use :py:meth:`~xarray.DataArray.transpose`: +on a :py:class:`~xarray.Dataset`, use :py:meth:`~xarray.DataArray.transpose`. An +ellipsis (`...`) can be use to represent all other dimensions: .. ipython:: python ds = xr.Dataset({'foo': (('x', 'y', 'z'), [[[42]]]), 'bar': (('y', 'z'), [[24]])}) ds.transpose('y', 'z', 'x') + ds.transpose(..., 'x') # equivalent ds.transpose() # reverses all dimensions Expand and squeeze dimensions diff --git a/doc/terminology.rst b/doc/terminology.rst index 4ee56190d5f..d1265e4da9d 100644 --- a/doc/terminology.rst +++ b/doc/terminology.rst @@ -15,7 +15,7 @@ Terminology ---- -**Variable:** A `NetCDF-like variable `_ consisting of dimensions, data, and attributes which describe a single array. The main functional difference between variables and numpy arrays is that numerical operations on variables implement array broadcasting by dimension name. Each ``DataArray`` has an underlying variable that can be accessed via ``arr.variable``. However, a variable is not fully described outside of either a ``Dataset`` or a ``DataArray``. +**Variable:** A `NetCDF-like variable `_ consisting of dimensions, data, and attributes which describe a single array. The main functional difference between variables and numpy arrays is that numerical operations on variables implement array broadcasting by dimension name. Each ``DataArray`` has an underlying variable that can be accessed via ``arr.variable``. However, a variable is not fully described outside of either a ``Dataset`` or a ``DataArray``. .. note:: @@ -39,4 +39,4 @@ Terminology ---- -**Index:** An *index* is a data structure optimized for efficient selecting and slicing of an associated array. Xarray creates indexes for dimension coordinates so that operations along dimensions are fast, while non-dimension coordinates are not indexed. Under the hood, indexes are implemented as :py:class:`pandas.Index` objects. The index associated with dimension name ``x`` can be retrieved by ``arr.indexes[x]``. By construction, ``len(arr.dims) == len(arr.indexes)`` \ No newline at end of file +**Index:** An *index* is a data structure optimized for efficient selecting and slicing of an associated array. Xarray creates indexes for dimension coordinates so that operations along dimensions are fast, while non-dimension coordinates are not indexed. Under the hood, indexes are implemented as :py:class:`pandas.Index` objects. The index associated with dimension name ``x`` can be retrieved by ``arr.indexes[x]``. By construction, ``len(arr.dims) == len(arr.indexes)`` diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1c6d72e599d..62d6006838b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,60 +21,96 @@ v0.14.1 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ -- Minimum cftime version is now 1.0.3. By `Deepak Cherian `_. +- Broken compatibility with cftime < 1.0.3. + By `Deepak Cherian `_. + + .. note:: + + cftime version 1.0.4 is broken + (`cftime/126 `_); + please use version 1.0.4.2 instead. + +- All leftover support for dates from non-standard calendars through netcdftime, the + module included in versions of netCDF4 prior to 1.4 that eventually became the + cftime package, has been removed in favor of relying solely on the standalone + cftime package (:pull:`3450`). + By `Spencer Clark `_. New Features ~~~~~~~~~~~~ - Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data gap that will be filled by interpolation. By `Deepak Cherian `_. +- :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (`...`) + to represent all 'other' dimensions. For example, to move one dimension to the front, + use `.transpose('x', ...)`. (:pull:`3421`) + By `Maximilian Roos `_ - Changed `xr.ALL_DIMS` to equal python's `Ellipsis` (`...`), and changed internal usages to use `...` directly. As before, you can use this to instruct a `groupby` operation to reduce over all dimensions. While we have no plans to remove `xr.ALL_DIMS`, we suggest - using `...`. + using `...`. (:pull:`3418`) By `Maximilian Roos `_ -- Added integration tests against `pint `_. - (:pull:`3238`) by `Justus Magin `_. - - .. note:: - - At the moment of writing, these tests *as well as the ability to use pint in general* - require `a highly experimental version of pint - `_ (install with - ``pip install git+https://github.com/andrewgsavage/pint.git@refs/pull/6/head)``. - Even with it, interaction with non-numpy array libraries, e.g. dask or sparse, is broken. - +- :py:func:`~xarray.dot`, and :py:func:`~xarray.DataArray.dot` now support the + `dims=...` option to sum over the union of dimensions of all input arrays + (:issue:`3423`) by `Mathias Hauser `_. - Added new :py:meth:`Dataset._repr_html_` and :py:meth:`DataArray._repr_html_` to improve representation of objects in jupyter. By default this feature is turned off for now. Enable it with :py:meth:`xarray.set_options(display_style="html")`. (:pull:`3425`) by `Benoit Bovy `_ and `Julia Signell `_. +- Implement `dask deterministic hashing + `_ + for xarray objects. Note that xarray objects with a dask.array backend already used + deterministic hashing in previous releases; this change implements it when whole + xarray objects are embedded in a dask graph, e.g. when :meth:`DataArray.map` is + invoked. (:issue:`3378`, :pull:`3446`) + By `Deepak Cherian `_ and + `Guido Imperiale `_. Bug fixes ~~~~~~~~~ - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle `_ - -- Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4. +- Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`). + By `Deepak Cherian `_. +- Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4. By `Anderson Banihirwe `_. - +- Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and + :py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions. + (:issue:`3402`). By `Deepak Cherian `_ Documentation ~~~~~~~~~~~~~ - +- Fix leap year condition in example (http://xarray.pydata.org/en/stable/examples/monthly-means.html) by `Mickaël Lalande `_. - Fix the documentation of :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` and explicitly state that a datetime-like dimension is required. (:pull:`3400`) By `Justus Magin `_. - Update the terminology page to address multidimensional coordinates. (:pull:`3410`) By `Jon Thielen `_. +- Fix the documentation of :py:meth:`Dataset.integrate` and + :py:meth:`DataArray.integrate` and add an example to + :py:meth:`Dataset.integrate`. (:pull:`3469`) + By `Justus Magin `_. Internal Changes ~~~~~~~~~~~~~~~~ +- Added integration tests against `pint `_. + (:pull:`3238`) by `Justus Magin `_. + + .. note:: + + At the moment of writing, these tests *as well as the ability to use pint in general* + require `a highly experimental version of pint + `_ (install with + ``pip install git+https://github.com/andrewgsavage/pint.git@refs/pull/6/head)``. + Even with it, interaction with non-numpy array libraries, e.g. dask or sparse, is broken. + - Use Python 3.6 idioms throughout the codebase. (:pull:3419) By `Maximilian Roos `_ + .. _whats-new.0.14.0: v0.14.0 (14 Oct 2019) diff --git a/licenses/ICOMOON_LICENSE b/licenses/ICOMOON_LICENSE new file mode 100644 index 00000000000..4ea99c213c5 --- /dev/null +++ b/licenses/ICOMOON_LICENSE @@ -0,0 +1,395 @@ +Attribution 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution 4.0 International Public License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution 4.0 International Public License ("Public License"). To the +extent this Public License may be interpreted as a contract, You are +granted the Licensed Rights in consideration of Your acceptance of +these terms and conditions, and the Licensor grants You such rights in +consideration of benefits the Licensor receives from making the +Licensed Material available under these terms and conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + d. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + e. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + f. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + g. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + h. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + i. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + j. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + k. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + 4. If You Share Adapted Material You produce, the Adapter's + License You apply must not prevent recipients of the Adapted + Material from complying with this Public License. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material; and + + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + + +======================================================================= + +Creative Commons is not a party to its public +licenses. Notwithstanding, Creative Commons may elect to apply one of +its public licenses to material it publishes and in those instances +will be considered the “Licensor.” The text of the Creative Commons +public licenses is dedicated to the public domain under the CC0 Public +Domain Dedication. Except for the limited purpose of indicating that +material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the +public licenses. + +Creative Commons may be contacted at creativecommons.org. diff --git a/properties/conftest.py b/properties/conftest.py new file mode 100644 index 00000000000..0a66d92ebc6 --- /dev/null +++ b/properties/conftest.py @@ -0,0 +1,8 @@ +try: + from hypothesis import settings +except ImportError: + pass +else: + # Run for a while - arrays are a bigger search space than usual + settings.register_profile("ci", deadline=None, print_blob=True) + settings.load_profile("ci") diff --git a/properties/test_encode_decode.py b/properties/test_encode_decode.py index 011e7a922d1..221083e16a1 100644 --- a/properties/test_encode_decode.py +++ b/properties/test_encode_decode.py @@ -10,15 +10,10 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st -from hypothesis import given, settings +from hypothesis import given import xarray as xr -# Run for a while - arrays are a bigger search space than usual -settings.register_profile("ci", deadline=None) -settings.load_profile("ci") - - an_array = npst.arrays( dtype=st.one_of( npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes() diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py new file mode 100644 index 00000000000..a8005d319d6 --- /dev/null +++ b/properties/test_pandas_roundtrip.py @@ -0,0 +1,97 @@ +""" +Property-based tests for roundtripping between xarray and pandas objects. +""" +import pytest + +pytest.importorskip("hypothesis") + +from functools import partial +import hypothesis.extra.numpy as npst +import hypothesis.extra.pandas as pdst +import hypothesis.strategies as st +from hypothesis import given + +import numpy as np +import pandas as pd +import xarray as xr + +numeric_dtypes = st.one_of( + npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes() +) + +numeric_series = numeric_dtypes.flatmap(lambda dt: pdst.series(dtype=dt)) + +an_array = npst.arrays( + dtype=numeric_dtypes, + shape=npst.array_shapes(max_dims=2), # can only convert 1D/2D to pandas +) + + +@st.composite +def datasets_1d_vars(draw): + """Generate datasets with only 1D variables + + Suitable for converting to pandas dataframes. + """ + # Generate an index for the dataset + idx = draw(pdst.indexes(dtype="u8", min_size=0, max_size=100)) + + # Generate 1-3 variables, 1D with the same length as the index + vars_strategy = st.dictionaries( + keys=st.text(), + values=npst.arrays(dtype=numeric_dtypes, shape=len(idx)).map( + partial(xr.Variable, ("rows",)) + ), + min_size=1, + max_size=3, + ) + return xr.Dataset(draw(vars_strategy), coords={"rows": idx}) + + +@given(st.data(), an_array) +def test_roundtrip_dataarray(data, arr): + names = data.draw( + st.lists(st.text(), min_size=arr.ndim, max_size=arr.ndim, unique=True).map( + tuple + ) + ) + coords = {name: np.arange(n) for (name, n) in zip(names, arr.shape)} + original = xr.DataArray(arr, dims=names, coords=coords) + roundtripped = xr.DataArray(original.to_pandas()) + xr.testing.assert_identical(original, roundtripped) + + +@given(datasets_1d_vars()) +def test_roundtrip_dataset(dataset): + df = dataset.to_dataframe() + assert isinstance(df, pd.DataFrame) + roundtripped = xr.Dataset(df) + xr.testing.assert_identical(dataset, roundtripped) + + +@given(numeric_series, st.text()) +def test_roundtrip_pandas_series(ser, ix_name): + # Need to name the index, otherwise Xarray calls it 'dim_0'. + ser.index.name = ix_name + arr = xr.DataArray(ser) + roundtripped = arr.to_pandas() + pd.testing.assert_series_equal(ser, roundtripped) + xr.testing.assert_identical(arr, roundtripped.to_xarray()) + + +# Dataframes with columns of all the same dtype - for roundtrip to DataArray +numeric_homogeneous_dataframe = numeric_dtypes.flatmap( + lambda dt: pdst.data_frames(columns=pdst.columns(["a", "b", "c"], dtype=dt)) +) + + +@pytest.mark.xfail +@given(numeric_homogeneous_dataframe) +def test_roundtrip_pandas_dataframe(df): + # Need to name the indexes, otherwise Xarray names them 'dim_0', 'dim_1'. + df.index.name = "rows" + df.columns.name = "cols" + arr = xr.DataArray(df) + roundtripped = arr.to_pandas() + pd.testing.assert_frame_equal(df, roundtripped) + xr.testing.assert_identical(arr, roundtripped.to_xarray()) diff --git a/setup.cfg b/setup.cfg index eee8b2477b2..fec2ca6bbe4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -117,4 +117,7 @@ tag_prefix = v parentdir_prefix = xarray- [aliases] -test = pytest \ No newline at end of file +test = pytest + +[pytest-watch] +nobeep = True \ No newline at end of file diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 199516116b0..d23594fc675 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -677,7 +677,7 @@ def open_dataarray( "then select the variable you want." ) else: - data_array, = dataset.data_vars.values() + (data_array,) = dataset.data_vars.values() data_array._file_obj = dataset._file_obj diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 0174088064b..965ddd8f043 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -39,34 +39,6 @@ ) -def _import_cftime(): - """ - helper function handle the transition to netcdftime/cftime - as a stand-alone package - """ - try: - import cftime - except ImportError: - # in netCDF4 the num2date/date2num function are top-level api - try: - import netCDF4 as cftime - except ImportError: - raise ImportError("Failed to import cftime") - return cftime - - -def _require_standalone_cftime(): - """Raises an ImportError if the standalone cftime is not found""" - try: - import cftime # noqa: F401 - except ImportError: - raise ImportError( - "Decoding times with non-standard calendars " - "or outside the pandas.Timestamp-valid range " - "requires the standalone cftime package." - ) - - def _netcdf_to_numpy_timeunit(units): units = units.lower() if not units.endswith("s"): @@ -119,16 +91,11 @@ def _decode_cf_datetime_dtype(data, units, calendar, use_cftime): def _decode_datetime_with_cftime(num_dates, units, calendar): - cftime = _import_cftime() + import cftime - if cftime.__name__ == "cftime": - return np.asarray( - cftime.num2date(num_dates, units, calendar, only_use_cftime_datetimes=True) - ) - else: - # Must be using num2date from an old version of netCDF4 which - # does not have the only_use_cftime_datetimes option. - return np.asarray(cftime.num2date(num_dates, units, calendar)) + return np.asarray( + cftime.num2date(num_dates, units, calendar, only_use_cftime_datetimes=True) + ) def _decode_datetime_with_pandas(flat_num_dates, units, calendar): @@ -354,7 +321,7 @@ def _encode_datetime_with_cftime(dates, units, calendar): This method is more flexible than xarray's parsing using datetime64[ns] arrays but also slower because it loops over each element. """ - cftime = _import_cftime() + import cftime if np.issubdtype(dates.dtype, np.datetime64): # numpy's broken datetime conversion only works for us precision diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 1a33cb955c3..41ff5a3b32d 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -252,7 +252,7 @@ def align( if not indexes and len(objects) == 1: # fast path for the trivial case - obj, = objects + (obj,) = objects return (obj.copy(deep=copy),) all_indexes = defaultdict(list) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 19c327ec597..3308dcef285 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -954,7 +954,7 @@ def _auto_concat( "supply the ``concat_dim`` argument " "explicitly" ) - dim, = concat_dims + (dim,) = concat_dims return concat( datasets, dim=dim, diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 1393d76f283..bb5ab07d8dd 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -145,7 +145,7 @@ def result_name(objects: list) -> Any: names = {getattr(obj, "name", _DEFAULT_NAME) for obj in objects} names.discard(_DEFAULT_NAME) if len(names) == 1: - name, = names + (name,) = names else: name = None return name @@ -187,7 +187,7 @@ def build_output_coords( if len(coords_list) == 1 and not exclude_dims: # we can skip the expensive merge - unpacked_coords, = coords_list + (unpacked_coords,) = coords_list merged_vars = dict(unpacked_coords.variables) else: # TODO: save these merged indexes, instead of re-computing them later @@ -237,7 +237,7 @@ def apply_dataarray_vfunc( for variable, coords in zip(result_var, result_coords) ) else: - coords, = result_coords + (coords,) = result_coords out = DataArray(result_var, coords, name=name, fastpath=True) return out @@ -384,7 +384,7 @@ def apply_dataset_vfunc( if signature.num_outputs > 1: out = tuple(_fast_dataset(*args) for args in zip(result_vars, list_of_coords)) else: - coord_vars, = list_of_coords + (coord_vars,) = list_of_coords out = _fast_dataset(result_vars, coord_vars) if keep_attrs and isinstance(first_obj, Dataset): @@ -884,7 +884,7 @@ def apply_ufunc( Plain scalars, numpy arrays and a mix of these with xarray objects is also supported: - >>> magnitude(4, 5) + >>> magnitude(3, 4) 5.0 >>> magnitude(3, np.array([0, 4])) array([3., 5.]) @@ -1055,9 +1055,9 @@ def dot(*arrays, dims=None, **kwargs): ---------- arrays: DataArray (or Variable) objects Arrays to compute. - dims: str or tuple of strings, optional - Which dimensions to sum over. - If not speciified, then all the common dimensions are summed over. + dims: '...', str or tuple of strings, optional + Which dimensions to sum over. Ellipsis ('...') sums over all dimensions. + If not specified, then all the common dimensions are summed over. **kwargs: dict Additional keyword arguments passed to numpy.einsum or dask.array.einsum @@ -1070,7 +1070,7 @@ def dot(*arrays, dims=None, **kwargs): -------- >>> import numpy as np - >>> import xarray as xp + >>> import xarray as xr >>> da_a = xr.DataArray(np.arange(3 * 2).reshape(3, 2), dims=['a', 'b']) >>> da_b = xr.DataArray(np.arange(3 * 2 * 2).reshape(3, 2, 2), ... dims=['a', 'b', 'c']) @@ -1117,6 +1117,14 @@ def dot(*arrays, dims=None, **kwargs): [273, 446, 619]]) Dimensions without coordinates: a, d + >>> xr.dot(da_a, da_b) + + array([110, 125]) + Dimensions without coordinates: c + + >>> xr.dot(da_a, da_b, dims=...) + + array(235) """ from .dataarray import DataArray from .variable import Variable @@ -1141,7 +1149,9 @@ def dot(*arrays, dims=None, **kwargs): einsum_axes = "abcdefghijklmnopqrstuvwxyz" dim_map = {d: einsum_axes[i] for i, d in enumerate(all_dims)} - if dims is None: + if dims is ...: + dims = all_dims + elif dims is None: # find dimensions that occur more than one times dim_counts = Counter() for arr in arrays: diff --git a/xarray/core/concat.py b/xarray/core/concat.py index bcab136de8d..0d19990bdd0 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -148,10 +148,10 @@ def _calc_concat_dim_coord(dim): dim = dim_name elif not isinstance(dim, DataArray): coord = as_variable(dim).to_index_variable() - dim, = coord.dims + (dim,) = coord.dims else: coord = dim - dim, = coord.dims + (dim,) = coord.dims return dim, coord diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 206c9172429..8d1f1d878e8 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -53,7 +53,7 @@ from .formatting import format_item from .indexes import Indexes, default_indexes from .options import OPTIONS -from .utils import ReprObject, _check_inplace, either_dict_or_kwargs +from .utils import Default, ReprObject, _default, _check_inplace, either_dict_or_kwargs from .variable import ( IndexVariable, Variable, @@ -270,8 +270,6 @@ class DataArray(AbstractArray, DataWithCoords): _coarsen_cls = rolling.DataArrayCoarsen _resample_cls = resample.DataArrayResample - __default = ReprObject("") - dt = property(DatetimeAccessor) def __init__( @@ -387,18 +385,18 @@ def _replace( self, variable: Variable = None, coords=None, - name: Optional[Hashable] = __default, + name: Union[Hashable, None, Default] = _default, ) -> "DataArray": if variable is None: variable = self.variable if coords is None: coords = self._coords - if name is self.__default: + if name is _default: name = self.name return type(self)(variable, coords, name=name, fastpath=True) def _replace_maybe_drop_dims( - self, variable: Variable, name: Optional[Hashable] = __default + self, variable: Variable, name: Union[Hashable, None, Default] = _default ) -> "DataArray": if variable.dims == self.dims and variable.shape == self.shape: coords = self._coords.copy() @@ -438,7 +436,7 @@ def _to_temp_dataset(self) -> Dataset: return self._to_dataset_whole(name=_THIS_ARRAY, shallow_copy=False) def _from_temp_dataset( - self, dataset: Dataset, name: Hashable = __default + self, dataset: Dataset, name: Hashable = _default ) -> "DataArray": variable = dataset._variables.pop(_THIS_ARRAY) coords = dataset._variables @@ -616,7 +614,7 @@ def _level_coords(self) -> Dict[Hashable, Hashable]: if var.ndim == 1 and isinstance(var, IndexVariable): level_names = var.level_names if level_names is not None: - dim, = var.dims + (dim,) = var.dims level_coords.update({lname: dim for lname in level_names}) return level_coords @@ -754,6 +752,9 @@ def reset_coords( dataset[self.name] = self.variable return dataset + def __dask_tokenize__(self): + return (type(self), self._variable, self._coords, self._name) + def __dask_graph__(self): return self._to_temp_dataset().__dask_graph__() @@ -1863,12 +1864,7 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra Dataset.transpose """ if dims: - if set(dims) ^ set(self.dims): - raise ValueError( - "arguments to transpose (%s) must be " - "permuted array dimensions (%s)" % (dims, tuple(self.dims)) - ) - + dims = tuple(utils.infix_dims(dims, self.dims)) variable = self.variable.transpose(*dims) if transpose_coords: coords: Dict[Hashable, Variable] = {} @@ -2481,13 +2477,11 @@ def identical(self, other: "DataArray") -> bool: except (TypeError, AttributeError): return False - __default_name = object() - def _result_name(self, other: Any = None) -> Optional[Hashable]: # use the same naming heuristics as pandas: # https://github.com/ContinuumIO/blaze/issues/458#issuecomment-51936356 - other_name = getattr(other, "name", self.__default_name) - if other_name is self.__default_name or other_name == self.name: + other_name = getattr(other, "name", _default) + if other_name is _default or other_name == self.name: return self.name else: return None @@ -2773,9 +2767,9 @@ def dot( ---------- other : DataArray The other array with which the dot product is performed. - dims: hashable or sequence of hashables, optional - Along which dimensions to be summed over. Default all the common - dimensions are summed over. + dims: '...', hashable or sequence of hashables, optional + Which dimensions to sum over. Ellipsis ('...') sums over all dimensions. + If not specified, then all the common dimensions are summed over. Returns ------- @@ -3026,7 +3020,7 @@ def integrate( """ integrate the array with the trapezoidal rule. .. note:: - This feature is limited to simple cartesian geometry, i.e. coord + This feature is limited to simple cartesian geometry, i.e. dim must be one dimensional. Parameters diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1741207536f..16229946cac 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -70,8 +70,10 @@ from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .utils import ( + Default, Frozen, SortedKeysDict, + _default, _check_inplace, decode_numpy_dict_values, either_dict_or_kwargs, @@ -649,6 +651,9 @@ def load(self, **kwargs) -> "Dataset": return self + def __dask_tokenize__(self): + return (type(self), self._variables, self._coord_names, self._attrs) + def __dask_graph__(self): graphs = {k: v.__dask_graph__() for k, v in self.variables.items()} graphs = {k: v for k, v in graphs.items() if v is not None} @@ -856,23 +861,18 @@ def _construct_direct( obj._accessors = None return obj - __default = object() - @classmethod def _from_vars_and_coord_names(cls, variables, coord_names, attrs=None): return cls._construct_direct(variables, coord_names, attrs=attrs) - # TODO(shoyer): renable type checking on this signature when pytype has a - # good way to handle defaulting arguments to a sentinel value: - # https://github.com/python/mypy/issues/1803 - def _replace( # type: ignore + def _replace( self, variables: Dict[Hashable, Variable] = None, coord_names: Set[Hashable] = None, dims: Dict[Any, int] = None, - attrs: Optional[Dict[Hashable, Any]] = __default, - indexes: Optional[Dict[Any, pd.Index]] = __default, - encoding: Optional[dict] = __default, + attrs: Union[Dict[Hashable, Any], None, Default] = _default, + indexes: Union[Dict[Any, pd.Index], None, Default] = _default, + encoding: Union[dict, None, Default] = _default, inplace: bool = False, ) -> "Dataset": """Fastpath constructor for internal use. @@ -890,12 +890,12 @@ def _replace( # type: ignore self._coord_names = coord_names if dims is not None: self._dims = dims - if attrs is not self.__default: - self._attrs = attrs - if indexes is not self.__default: - self._indexes = indexes - if encoding is not self.__default: - self._encoding = encoding + if attrs is not _default: + self._attrs = attrs # type: ignore # FIXME need mypy 0.750 + if indexes is not _default: + self._indexes = indexes # type: ignore # FIXME need mypy 0.750 + if encoding is not _default: + self._encoding = encoding # type: ignore # FIXME need mypy 0.750 obj = self else: if variables is None: @@ -904,23 +904,23 @@ def _replace( # type: ignore coord_names = self._coord_names.copy() if dims is None: dims = self._dims.copy() - if attrs is self.__default: + if attrs is _default: attrs = copy.copy(self._attrs) - if indexes is self.__default: + if indexes is _default: indexes = copy.copy(self._indexes) - if encoding is self.__default: + if encoding is _default: encoding = copy.copy(self._encoding) obj = self._construct_direct( variables, coord_names, dims, attrs, indexes, encoding ) return obj - def _replace_with_new_dims( # type: ignore + def _replace_with_new_dims( self, variables: Dict[Hashable, Variable], coord_names: set = None, - attrs: Optional[Dict[Hashable, Any]] = __default, - indexes: Dict[Hashable, pd.Index] = __default, + attrs: Union[Dict[Hashable, Any], None, Default] = _default, + indexes: Union[Dict[Hashable, pd.Index], None, Default] = _default, inplace: bool = False, ) -> "Dataset": """Replace variables with recalculated dimensions.""" @@ -929,12 +929,12 @@ def _replace_with_new_dims( # type: ignore variables, coord_names, dims, attrs, indexes, inplace=inplace ) - def _replace_vars_and_dims( # type: ignore + def _replace_vars_and_dims( self, variables: Dict[Hashable, Variable], coord_names: set = None, dims: Dict[Hashable, int] = None, - attrs: Dict[Hashable, Any] = __default, + attrs: Union[Dict[Hashable, Any], None, Default] = _default, inplace: bool = False, ) -> "Dataset": """Deprecated version of _replace_with_new_dims(). @@ -3542,7 +3542,6 @@ def drop( # noqa: F811 ---------- labels : hashable or iterable of hashables Name(s) of variables or index labels to drop. - If dim is not None, labels can be any array-like. dim : None or hashable, optional Dimension along which to drop index labels. By default (if ``dim is None``), drops variables rather than index labels. @@ -3712,14 +3711,14 @@ def transpose(self, *dims: Hashable) -> "Dataset": DataArray.transpose """ if dims: - if set(dims) ^ set(self.dims): + if set(dims) ^ set(self.dims) and ... not in dims: raise ValueError( "arguments to transpose (%s) must be " "permuted dataset dimensions (%s)" % (dims, tuple(self.dims)) ) ds = self.copy() for name, var in self._variables.items(): - var_dims = tuple(dim for dim in dims if dim in var.dims) + var_dims = tuple(dim for dim in dims if dim in (var.dims + (...,))) ds._variables[name] = var.transpose(*var_dims) return ds @@ -4091,7 +4090,7 @@ def reduce( if len(reduce_dims) == 1: # unpack dimensions for the benefit of functions # like np.argmin which can't handle tuple arguments - reduce_dims, = reduce_dims + (reduce_dims,) = reduce_dims elif len(reduce_dims) == var.ndim: # prefer to aggregate over axis=None rather than # axis=(0, 1) if they will be equivalent, because @@ -5190,7 +5189,7 @@ def integrate(self, coord, datetime_unit=None): Parameters ---------- - dim: str, or a sequence of str + coord: str, or a sequence of str Coordinate(s) used for the integration. datetime_unit Can be specify the unit if datetime coordinate is used. One of @@ -5205,6 +5204,34 @@ def integrate(self, coord, datetime_unit=None): -------- DataArray.integrate numpy.trapz: corresponding numpy function + + Examples + -------- + >>> ds = xr.Dataset( + ... data_vars={"a": ("x", [5, 5, 6, 6]), "b": ("x", [1, 2, 1, 0])}, + ... coords={"x": [0, 1, 2, 3], "y": ("x", [1, 7, 3, 5])}, + ... ) + >>> ds + + Dimensions: (x: 4) + Coordinates: + * x (x) int64 0 1 2 3 + y (x) int64 1 7 3 5 + Data variables: + a (x) int64 5 5 6 6 + b (x) int64 1 2 1 0 + >>> ds.integrate("x") + + Dimensions: () + Data variables: + a float64 16.5 + b float64 3.5 + >>> ds.integrate("y") + + Dimensions: () + Data variables: + a float64 20.0 + b float64 4.0 """ if not isinstance(coord, (list, tuple)): coord = (coord,) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 68bd28ddb12..353566eb345 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -15,6 +15,7 @@ from .utils import ( either_dict_or_kwargs, hashable, + is_scalar, maybe_wrap_array, peek_at, safe_cast_to_index, @@ -22,6 +23,18 @@ from .variable import IndexVariable, Variable, as_variable +def check_reduce_dims(reduce_dims, dimensions): + + if reduce_dims is not ...: + if is_scalar(reduce_dims): + reduce_dims = [reduce_dims] + if any([dim not in dimensions for dim in reduce_dims]): + raise ValueError( + "cannot reduce over dimensions %r. expected either '...' to reduce over all dimensions or one or more of %r." + % (reduce_dims, dimensions) + ) + + def unique_value_groups(ar, sort=True): """Group an array by its unique values. @@ -308,7 +321,7 @@ def __init__( raise ValueError("`group` must have a name") group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj) - group_dim, = group.dims + (group_dim,) = group.dims expected_size = obj.sizes[group_dim] if group.size != expected_size: @@ -348,6 +361,13 @@ def __init__( group_indices = [slice(i, i + 1) for i in group_indices] unique_coord = group else: + if group.isnull().any(): + # drop any NaN valued groups. + # also drop obj values where group was NaN + # Use where instead of reindex to account for duplicate coordinate labels. + obj = obj.where(group.notnull(), drop=True) + group = group.dropna(group_dim) + # look through group to find the unique values unique_values, group_indices = unique_value_groups( safe_cast_to_index(group), sort=(bins is None) @@ -450,7 +470,7 @@ def _infer_concat_args(self, applied_example): else: coord = self._unique_coord positions = None - dim, = coord.dims + (dim,) = coord.dims if isinstance(coord, _DummyGroup): coord = None return coord, dim, positions @@ -624,7 +644,7 @@ def _concat_shortcut(self, applied, dim, positions=None): def _restore_dim_order(self, stacked): def lookup_order(dimension): if dimension == self._group.name: - dimension, = self._group.dims + (dimension,) = self._group.dims if dimension in self._obj.dims: axis = self._obj.get_axis_num(dimension) else: @@ -794,15 +814,11 @@ def reduce( if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) - if dim is not ... and dim not in self.dims: - raise ValueError( - "cannot reduce over dimension %r. expected either '...' to reduce over all dimensions or one or more of %r." - % (dim, self.dims) - ) - def reduce_array(ar): return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs) + check_reduce_dims(dim, self.dims) + return self.apply(reduce_array, shortcut=shortcut) @@ -895,11 +911,7 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): def reduce_dataset(ds): return ds.reduce(func, dim, keep_attrs, **kwargs) - if dim is not ... and dim not in self.dims: - raise ValueError( - "cannot reduce over dimension %r. expected either '...' to reduce over all dimensions or one or more of %r." - % (dim, self.dims) - ) + check_reduce_dims(dim, self.dims) return self.apply(reduce_dataset) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index b9809a8d2b9..f48c9e72af1 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -212,7 +212,7 @@ def get_dim_indexers(data_obj, indexers): level_indexers = defaultdict(dict) dim_indexers = {} for key, label in indexers.items(): - dim, = data_obj[key].dims + (dim,) = data_obj[key].dims if key != dim: # assume here multi-index level indexer level_indexers[dim][key] = label @@ -1368,7 +1368,7 @@ def __getitem__( if isinstance(key, tuple) and len(key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index # objects don't like tuples) - key, = key + (key,) = key if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional return NumpyIndexingAdapter(self.array.values)[indexer] diff --git a/xarray/core/merge.py b/xarray/core/merge.py index db5ef9531df..389ceb155f7 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -277,7 +277,7 @@ def append_all(variables, indexes): def collect_from_coordinates( - list_of_coords: "List[Coordinates]" + list_of_coords: "List[Coordinates]", ) -> Dict[Hashable, List[MergeElement]]: """Collect variables and indexes to be merged from Coordinate objects.""" grouped: Dict[Hashable, List[Tuple[Variable, pd.Index]]] = {} @@ -320,7 +320,7 @@ def merge_coordinates_without_align( def determine_coords( - list_of_mappings: Iterable["DatasetLike"] + list_of_mappings: Iterable["DatasetLike"], ) -> Tuple[Set[Hashable], Set[Hashable]]: """Given a list of dicts with xarray object values, identify coordinates. diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 6befe0b5efc..6681375c18e 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -6,10 +6,12 @@ import os.path import re import warnings +from enum import Enum from typing import ( AbstractSet, Any, Callable, + Collection, Container, Dict, Hashable, @@ -660,6 +662,30 @@ def __len__(self) -> int: return len(self._data) - num_hidden +def infix_dims(dims_supplied: Collection, dims_all: Collection) -> Iterator: + """ + Resolves a supplied list containing an ellispsis representing other items, to + a generator with the 'realized' list of all items + """ + if ... in dims_supplied: + if len(set(dims_all)) != len(dims_all): + raise ValueError("Cannot use ellipsis with repeated dims") + if len([d for d in dims_supplied if d == ...]) > 1: + raise ValueError("More than one ellipsis supplied") + other_dims = [d for d in dims_all if d not in dims_supplied] + for d in dims_supplied: + if d == ...: + yield from other_dims + else: + yield d + else: + if set(dims_supplied) ^ set(dims_all): + raise ValueError( + f"{dims_supplied} must be a permuted list of {dims_all}, unless `...` is included" + ) + yield from dims_supplied + + def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable: """ Get an new dimension name based on new_dim, that is not used in dims. If the same name exists, we add an underscore(s) in the head. @@ -676,3 +702,11 @@ def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable: while new_dim in dims: new_dim = "_" + str(new_dim) return new_dim + + +# Singleton type, as per https://github.com/python/typing/pull/240 +class Default(Enum): + token = 0 + + +_default = Default.token diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 93ad1eafb97..117ab85ae65 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -25,6 +25,7 @@ OrderedSet, decode_numpy_dict_values, either_dict_or_kwargs, + infix_dims, ensure_us_time_resolution, ) @@ -389,6 +390,11 @@ def compute(self, **kwargs): new = self.copy(deep=False) return new.load(**kwargs) + def __dask_tokenize__(self): + # Use v.data, instead of v._data, in order to cope with the wrappers + # around NetCDF and the like + return type(self), self._dims, self.data, self._attrs + def __dask_graph__(self): if isinstance(self._data, dask_array_type): return self._data.__dask_graph__() @@ -1228,6 +1234,7 @@ def transpose(self, *dims) -> "Variable": """ if len(dims) == 0: dims = self.dims[::-1] + dims = tuple(infix_dims(dims, self.dims)) axes = self.get_axis_num(dims) if len(dims) < 2: # no need to transpose if only one dimension return self.copy(deep=False) @@ -1524,7 +1531,7 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False): along the given dimension. """ if not isinstance(dim, str): - dim, = dim.dims + (dim,) = dim.dims # can't do this lazily: we need to loop through variables at least # twice @@ -1961,6 +1968,10 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False): if not isinstance(self._data, PandasIndexAdapter): self._data = PandasIndexAdapter(self._data) + def __dask_tokenize__(self): + # Don't waste time converting pd.Index to np.ndarray + return (type(self), self._dims, self._data.array, self._attrs) + def load(self): # data is already loaded into memory for IndexVariable return self @@ -1994,7 +2005,7 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False): arrays, if possible. """ if not isinstance(dim, str): - dim, = dim.dims + (dim,) = dim.dims variables = list(variables) first_var = variables[0] diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index a288f195e32..ca68f617144 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -83,8 +83,8 @@ def _infer_line_data(darray, x, y, hue): ) else: - xdim, = darray[xname].dims - huedim, = darray[huename].dims + (xdim,) = darray[xname].dims + (huedim,) = darray[huename].dims yplt = darray.transpose(xdim, huedim) else: @@ -102,8 +102,8 @@ def _infer_line_data(darray, x, y, hue): ) else: - ydim, = darray[yname].dims - huedim, = darray[huename].dims + (ydim,) = darray[yname].dims + (huedim,) = darray[huename].dims xplt = darray.transpose(ydim, huedim) huelabel = label_from_attrs(darray[huename]) diff --git a/xarray/static/css/style.css b/xarray/static/css/style.css index 536b8ab6103..7e382de3b5b 100644 --- a/xarray/static/css/style.css +++ b/xarray/static/css/style.css @@ -2,6 +2,17 @@ * */ +:root { + --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1)); + --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54)); + --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38)); + --xr-border-color: var(--jp-border-color2, #e0e0e0); + --xr-disabled-color: var(--jp-layout-color3, #bdbdbd); + --xr-background-color: var(--jp-layout-color0, white); + --xr-background-color-row-even: var(--jp-layout-color1, white); + --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee); +} + .xr-wrap { min-width: 300px; max-width: 700px; @@ -11,7 +22,7 @@ padding-top: 6px; padding-bottom: 6px; margin-bottom: 4px; - border-bottom: solid 1px #ddd; + border-bottom: solid 1px var(--xr-border-color); } .xr-header > div, @@ -28,11 +39,7 @@ } .xr-obj-type { - color: #555; -} - -.xr-array-name { - color: #000; + color: var(--xr-font-color2); } .xr-sections { @@ -50,21 +57,21 @@ } .xr-section-item input + label { - color: #ccc; + color: var(--xr-disabled-color); } .xr-section-item input:enabled + label { cursor: pointer; - color: #555; + color: var(--xr-font-color2); } .xr-section-item input:enabled + label:hover { - color: #000; + color: var(--xr-font-color0); } .xr-section-summary { grid-column: 1; - color: #555; + color: var(--xr-font-color2); font-weight: 500; } @@ -74,7 +81,7 @@ } .xr-section-summary-in:disabled + label { - color: #555; + color: var(--xr-font-color2); } .xr-section-summary-in + label:before { @@ -86,7 +93,7 @@ } .xr-section-summary-in:disabled + label:before { - color: #ccc; + color: var(--xr-disabled-color); } .xr-section-summary-in:checked + label:before { @@ -129,7 +136,7 @@ } .xr-preview { - color: #888; + color: var(--xr-font-color3); } .xr-array-preview, @@ -186,7 +193,7 @@ .xr-var-item > div, .xr-var-item label, .xr-var-item > .xr-var-name span { - background-color: #fcfcfc; + background-color: var(--xr-background-color-row-even); margin-bottom: 0; } @@ -197,7 +204,7 @@ .xr-var-list > li:nth-child(odd) > div, .xr-var-list > li:nth-child(odd) > label, .xr-var-list > li:nth-child(odd) > .xr-var-name span { - background-color: #efefef; + background-color: var(--xr-background-color-row-odd); } .xr-var-name { @@ -211,7 +218,7 @@ .xr-var-dtype { grid-column: 3; text-align: right; - color: #555; + color: var(--xr-font-color2); } .xr-var-preview { @@ -241,7 +248,7 @@ .xr-var-attrs, .xr-var-data { display: none; - background-color: #fff !important; + background-color: var(--xr-background-color) !important; padding-bottom: 5px !important; } @@ -288,7 +295,7 @@ dl.xr-attrs { .xr-attrs dt:hover span { display: inline-block; - background: #fff; + background: var(--xr-background-color); padding-right: 10px; } diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 88476e5e730..6592360cdf2 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -78,10 +78,6 @@ def LooseVersion(vstring): requires_scipy_or_netCDF4 = pytest.mark.skipif( not has_scipy_or_netCDF4, reason="requires scipy or netCDF4" ) -has_cftime_or_netCDF4 = has_cftime or has_netCDF4 -requires_cftime_or_netCDF4 = pytest.mark.skipif( - not has_cftime_or_netCDF4, reason="requires cftime or netCDF4" -) try: import_seaborn() has_seaborn = True @@ -158,18 +154,21 @@ def source_ndarray(array): def assert_equal(a, b): + __tracebackhide__ = True xarray.testing.assert_equal(a, b) xarray.testing._assert_internal_invariants(a) xarray.testing._assert_internal_invariants(b) def assert_identical(a, b): + __tracebackhide__ = True xarray.testing.assert_identical(a, b) xarray.testing._assert_internal_invariants(a) xarray.testing._assert_internal_invariants(b) def assert_allclose(a, b, **kwargs): + __tracebackhide__ = True xarray.testing.assert_allclose(a, b, **kwargs) xarray.testing._assert_internal_invariants(a) xarray.testing._assert_internal_invariants(b) diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 0058747db71..5fe5b8c3f59 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -7,10 +7,8 @@ from . import ( assert_array_equal, assert_equal, - has_cftime, - has_cftime_or_netCDF4, - has_dask, raises_regex, + requires_cftime, requires_dask, ) @@ -199,7 +197,7 @@ def times_3d(times): ) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( "field", ["year", "month", "day", "hour", "dayofyear", "dayofweek"] ) @@ -217,7 +215,7 @@ def test_field_access(data, field): assert_equal(result, expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_cftime_strftime_access(data): """ compare cftime formatting against datetime formatting """ date_format = "%Y%m%d%H" @@ -232,8 +230,8 @@ def test_cftime_strftime_access(data): assert_equal(result, expected) -@pytest.mark.skipif(not has_dask, reason="dask not installed") -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime +@requires_dask @pytest.mark.parametrize( "field", ["year", "month", "day", "hour", "dayofyear", "dayofweek"] ) @@ -254,8 +252,8 @@ def test_dask_field_access_1d(data, field): assert_equal(result.compute(), expected) -@pytest.mark.skipif(not has_dask, reason="dask not installed") -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime +@requires_dask @pytest.mark.parametrize( "field", ["year", "month", "day", "hour", "dayofyear", "dayofweek"] ) @@ -286,7 +284,7 @@ def cftime_date_type(calendar): return _all_cftime_date_types()[calendar] -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_seasons(cftime_date_type): dates = np.array([cftime_date_type(2000, month, 15) for month in range(1, 13)]) dates = xr.DataArray(dates) @@ -307,15 +305,3 @@ def test_seasons(cftime_date_type): seasons = xr.DataArray(seasons) assert_array_equal(seasons.values, dates.dt.season.values) - - -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime or netCDF4 not installed") -def test_dt_accessor_error_netCDF4(cftime_date_type): - da = xr.DataArray( - [cftime_date_type(1, 1, 1), cftime_date_type(2, 1, 1)], dims=["time"] - ) - if not has_cftime: - with pytest.raises(TypeError): - da.dt.month - else: - da.dt.month diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 142769dbbe7..343e059f53c 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1187,5 +1187,5 @@ def test_dayofyear_after_cftime_range(freq): def test_cftime_range_standard_calendar_refers_to_gregorian(): from cftime import DatetimeGregorian - result, = cftime_range("2000", periods=1) + (result,) = cftime_range("2000", periods=1) assert isinstance(result, DatetimeGregorian) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index e49dc72abdd..a8ee3c97042 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -15,7 +15,7 @@ ) from xarray.tests import assert_array_equal, assert_identical -from . import has_cftime, has_cftime_or_netCDF4, raises_regex, requires_cftime +from . import raises_regex, requires_cftime from .test_coding_times import ( _ALL_CALENDARS, _NON_STANDARD_CALENDARS, @@ -653,7 +653,7 @@ def test_indexing_in_dataframe_iloc(df, index): assert result.equals(expected) -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime def test_concat_cftimeindex(date_type): da1 = xr.DataArray( [1.0, 2.0], coords=[[date_type(1, 1, 1), date_type(1, 2, 1)]], dims=["time"] @@ -663,11 +663,7 @@ def test_concat_cftimeindex(date_type): ) da = xr.concat([da1, da2], dim="time") - if has_cftime: - assert isinstance(da.indexes["time"], CFTimeIndex) - else: - assert isinstance(da.indexes["time"], pd.Index) - assert not isinstance(da.indexes["time"], CFTimeIndex) + assert isinstance(da.indexes["time"], CFTimeIndex) @requires_cftime diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 021d76e2b11..d012fb36c35 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -8,7 +8,6 @@ from xarray import DataArray, Dataset, Variable, coding, decode_cf from xarray.coding.times import ( - _import_cftime, cftime_to_nptime, decode_cf_datetime, encode_cf_datetime, @@ -19,15 +18,7 @@ from xarray.core.common import contains_cftime_datetimes from xarray.testing import assert_equal -from . import ( - arm_xfail, - assert_array_equal, - has_cftime, - has_cftime_or_netCDF4, - has_dask, - requires_cftime, - requires_cftime_or_netCDF4, -) +from . import arm_xfail, assert_array_equal, has_cftime, requires_cftime, requires_dask _NON_STANDARD_CALENDARS_SET = { "noleap", @@ -79,10 +70,8 @@ def _all_cftime_date_types(): - try: - import cftime - except ImportError: - import netcdftime as cftime + import cftime + return { "noleap": cftime.DatetimeNoLeap, "365_day": cftime.DatetimeNoLeap, @@ -95,16 +84,14 @@ def _all_cftime_date_types(): } -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize(["num_dates", "units", "calendar"], _CF_DATETIME_TESTS) def test_cf_datetime(num_dates, units, calendar): - cftime = _import_cftime() - if cftime.__name__ == "cftime": - expected = cftime.num2date( - num_dates, units, calendar, only_use_cftime_datetimes=True - ) - else: - expected = cftime.num2date(num_dates, units, calendar) + import cftime + + expected = cftime.num2date( + num_dates, units, calendar, only_use_cftime_datetimes=True + ) min_y = np.ravel(np.atleast_1d(expected))[np.nanargmin(num_dates)].year max_y = np.ravel(np.atleast_1d(expected))[np.nanargmax(num_dates)].year if min_y >= 1678 and max_y < 2262: @@ -138,15 +125,12 @@ def test_cf_datetime(num_dates, units, calendar): assert_array_equal(num_dates, np.around(encoded, 1)) -@requires_cftime_or_netCDF4 +@requires_cftime def test_decode_cf_datetime_overflow(): # checks for # https://github.com/pydata/pandas/issues/14068 # https://github.com/pydata/xarray/issues/975 - try: - from cftime import DatetimeGregorian - except ImportError: - from netcdftime import DatetimeGregorian + from cftime import DatetimeGregorian datetime = DatetimeGregorian units = "days since 2000-01-01 00:00:00" @@ -171,7 +155,7 @@ def test_decode_cf_datetime_non_standard_units(): assert_array_equal(actual, expected) -@requires_cftime_or_netCDF4 +@requires_cftime def test_decode_cf_datetime_non_iso_strings(): # datetime strings that are _almost_ ISO compliant but not quite, # but which cftime.num2date can still parse correctly @@ -190,10 +174,10 @@ def test_decode_cf_datetime_non_iso_strings(): assert (abs_diff <= np.timedelta64(1, "s")).all() -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) def test_decode_standard_calendar_inside_timestamp_range(calendar): - cftime = _import_cftime() + import cftime units = "days since 0001-01-01" times = pd.date_range("2001-04-01-00", end="2001-04-30-23", freq="H") @@ -210,21 +194,18 @@ def test_decode_standard_calendar_inside_timestamp_range(calendar): assert (abs_diff <= np.timedelta64(1, "s")).all() -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) def test_decode_non_standard_calendar_inside_timestamp_range(calendar): - cftime = _import_cftime() + import cftime + units = "days since 0001-01-01" times = pd.date_range("2001-04-01-00", end="2001-04-30-23", freq="H") non_standard_time = cftime.date2num(times.to_pydatetime(), units, calendar=calendar) - if cftime.__name__ == "cftime": - expected = cftime.num2date( - non_standard_time, units, calendar=calendar, only_use_cftime_datetimes=True - ) - else: - expected = cftime.num2date(non_standard_time, units, calendar=calendar) - + expected = cftime.num2date( + non_standard_time, units, calendar=calendar, only_use_cftime_datetimes=True + ) expected_dtype = np.dtype("O") actual = coding.times.decode_cf_datetime( @@ -238,24 +219,19 @@ def test_decode_non_standard_calendar_inside_timestamp_range(calendar): assert (abs_diff <= np.timedelta64(1, "s")).all() -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _ALL_CALENDARS) def test_decode_dates_outside_timestamp_range(calendar): + import cftime from datetime import datetime - cftime = _import_cftime() - units = "days since 0001-01-01" times = [datetime(1, 4, 1, h) for h in range(1, 5)] time = cftime.date2num(times, units, calendar=calendar) - if cftime.__name__ == "cftime": - expected = cftime.num2date( - time, units, calendar=calendar, only_use_cftime_datetimes=True - ) - else: - expected = cftime.num2date(time, units, calendar=calendar) - + expected = cftime.num2date( + time, units, calendar=calendar, only_use_cftime_datetimes=True + ) expected_date_type = type(expected[0]) with warnings.catch_warnings(): @@ -269,7 +245,7 @@ def test_decode_dates_outside_timestamp_range(calendar): assert (abs_diff <= np.timedelta64(1, "s")).all() -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) def test_decode_standard_calendar_single_element_inside_timestamp_range(calendar): units = "days since 0001-01-01" @@ -280,7 +256,7 @@ def test_decode_standard_calendar_single_element_inside_timestamp_range(calendar assert actual.dtype == np.dtype("M8[ns]") -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) def test_decode_non_standard_calendar_single_element_inside_timestamp_range(calendar): units = "days since 0001-01-01" @@ -291,10 +267,11 @@ def test_decode_non_standard_calendar_single_element_inside_timestamp_range(cale assert actual.dtype == np.dtype("O") -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) def test_decode_single_element_outside_timestamp_range(calendar): - cftime = _import_cftime() + import cftime + units = "days since 0001-01-01" for days in [1, 1470376]: for num_time in [days, [days], [[days]]]: @@ -304,20 +281,16 @@ def test_decode_single_element_outside_timestamp_range(calendar): num_time, units, calendar=calendar ) - if cftime.__name__ == "cftime": - expected = cftime.num2date( - days, units, calendar, only_use_cftime_datetimes=True - ) - else: - expected = cftime.num2date(days, units, calendar) - + expected = cftime.num2date( + days, units, calendar, only_use_cftime_datetimes=True + ) assert isinstance(actual.item(), type(expected)) -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) def test_decode_standard_calendar_multidim_time_inside_timestamp_range(calendar): - cftime = _import_cftime() + import cftime units = "days since 0001-01-01" times1 = pd.date_range("2001-04-01", end="2001-04-05", freq="D") @@ -343,10 +316,10 @@ def test_decode_standard_calendar_multidim_time_inside_timestamp_range(calendar) assert (abs_diff2 <= np.timedelta64(1, "s")).all() -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range(calendar): - cftime = _import_cftime() + import cftime units = "days since 0001-01-01" times1 = pd.date_range("2001-04-01", end="2001-04-05", freq="D") @@ -382,13 +355,12 @@ def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range(calend assert (abs_diff2 <= np.timedelta64(1, "s")).all() -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _ALL_CALENDARS) def test_decode_multidim_time_outside_timestamp_range(calendar): + import cftime from datetime import datetime - cftime = _import_cftime() - units = "days since 0001-01-01" times1 = [datetime(1, 4, day) for day in range(1, 6)] times2 = [datetime(1, 5, day) for day in range(1, 6)] @@ -398,16 +370,8 @@ def test_decode_multidim_time_outside_timestamp_range(calendar): mdim_time[:, 0] = time1 mdim_time[:, 1] = time2 - if cftime.__name__ == "cftime": - expected1 = cftime.num2date( - time1, units, calendar, only_use_cftime_datetimes=True - ) - expected2 = cftime.num2date( - time2, units, calendar, only_use_cftime_datetimes=True - ) - else: - expected1 = cftime.num2date(time1, units, calendar) - expected2 = cftime.num2date(time2, units, calendar) + expected1 = cftime.num2date(time1, units, calendar, only_use_cftime_datetimes=True) + expected2 = cftime.num2date(time2, units, calendar, only_use_cftime_datetimes=True) with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Unable to decode time axis") @@ -424,46 +388,38 @@ def test_decode_multidim_time_outside_timestamp_range(calendar): assert (abs_diff2 <= np.timedelta64(1, "s")).all() -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", ["360_day", "all_leap", "366_day"]) def test_decode_non_standard_calendar_single_element(calendar): - cftime = _import_cftime() + import cftime + units = "days since 0001-01-01" - try: - dt = cftime.netcdftime.datetime(2001, 2, 29) - except AttributeError: - # Must be using the standalone cftime library - dt = cftime.datetime(2001, 2, 29) + dt = cftime.datetime(2001, 2, 29) num_time = cftime.date2num(dt, units, calendar) actual = coding.times.decode_cf_datetime(num_time, units, calendar=calendar) - if cftime.__name__ == "cftime": - expected = np.asarray( - cftime.num2date(num_time, units, calendar, only_use_cftime_datetimes=True) - ) - else: - expected = np.asarray(cftime.num2date(num_time, units, calendar)) + expected = np.asarray( + cftime.num2date(num_time, units, calendar, only_use_cftime_datetimes=True) + ) assert actual.dtype == np.dtype("O") assert expected == actual -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime def test_decode_360_day_calendar(): - cftime = _import_cftime() + import cftime + calendar = "360_day" # ensure leap year doesn't matter for year in [2010, 2011, 2012, 2013, 2014]: units = f"days since {year}-01-01" num_times = np.arange(100) - if cftime.__name__ == "cftime": - expected = cftime.num2date( - num_times, units, calendar, only_use_cftime_datetimes=True - ) - else: - expected = cftime.num2date(num_times, units, calendar) + expected = cftime.num2date( + num_times, units, calendar, only_use_cftime_datetimes=True + ) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") @@ -477,7 +433,7 @@ def test_decode_360_day_calendar(): @arm_xfail -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( ["num_dates", "units", "expected_list"], [ @@ -499,7 +455,7 @@ def test_cf_datetime_nan(num_dates, units, expected_list): assert_array_equal(expected, actual) -@requires_cftime_or_netCDF4 +@requires_cftime def test_decoded_cf_datetime_array_2d(): # regression test for GH1229 variable = Variable( @@ -548,7 +504,7 @@ def test_infer_datetime_units(dates, expected): ] -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( "calendar", _NON_STANDARD_CALENDARS + ["gregorian", "proleptic_gregorian"] ) @@ -622,7 +578,7 @@ def test_infer_timedelta_units(deltas, expected): assert expected == coding.times.infer_timedelta_units(deltas) -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( ["date_args", "expected"], [ @@ -649,7 +605,7 @@ def test_decode_cf(calendar): ds[v].attrs["units"] = "days since 2001-01-01" ds[v].attrs["calendar"] = calendar - if not has_cftime_or_netCDF4 and calendar not in _STANDARD_CALENDARS: + if not has_cftime and calendar not in _STANDARD_CALENDARS: with pytest.raises(ValueError): ds = decode_cf(ds) else: @@ -703,7 +659,7 @@ def test_decode_cf_time_bounds(): _update_bounds_attributes(ds.variables) -@requires_cftime_or_netCDF4 +@requires_cftime def test_encode_time_bounds(): time = pd.date_range("2000-01-16", periods=1) @@ -749,7 +705,7 @@ def calendar(request): @pytest.fixture() def times(calendar): - cftime = _import_cftime() + import cftime return cftime.num2date( np.arange(4), @@ -779,24 +735,24 @@ def times_3d(times): ) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_contains_cftime_datetimes_1d(data): assert contains_cftime_datetimes(data.time) -@pytest.mark.skipif(not has_dask, reason="dask not installed") -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime +@requires_dask def test_contains_cftime_datetimes_dask_1d(data): assert contains_cftime_datetimes(data.time.chunk()) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_contains_cftime_datetimes_3d(times_3d): assert contains_cftime_datetimes(times_3d) -@pytest.mark.skipif(not has_dask, reason="dask not installed") -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime +@requires_dask def test_contains_cftime_datetimes_dask_3d(times_3d): assert contains_cftime_datetimes(times_3d.chunk()) @@ -806,13 +762,13 @@ def test_contains_cftime_datetimes_non_cftimes(non_cftime_data): assert not contains_cftime_datetimes(non_cftime_data) -@pytest.mark.skipif(not has_dask, reason="dask not installed") +@requires_dask @pytest.mark.parametrize("non_cftime_data", [DataArray([]), DataArray([1, 2])]) def test_contains_cftime_datetimes_non_cftimes_dask(non_cftime_data): assert not contains_cftime_datetimes(non_cftime_data.chunk()) -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("shape", [(24,), (8, 3), (2, 4, 3)]) def test_encode_cf_datetime_overflow(shape): # Test for fix to GH 2272 @@ -837,7 +793,7 @@ def test_encode_cf_datetime_pandas_min(): assert calendar == expected_calendar -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime def test_time_units_with_timezone_roundtrip(calendar): # Regression test for GH 2649 expected_units = "days since 2000-01-01T00:00:00-05:00" diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 383427b479b..1f2634cc9b0 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -998,6 +998,23 @@ def test_dot(use_dask): assert actual.dims == ("b",) assert (actual.data == np.zeros(actual.shape)).all() + # Ellipsis (...) sums over all dimensions + actual = xr.dot(da_a, da_b, dims=...) + assert actual.dims == () + assert (actual.data == np.einsum("ij,ijk->", a, b)).all() + + actual = xr.dot(da_a, da_b, da_c, dims=...) + assert actual.dims == () + assert (actual.data == np.einsum("ij,ijk,kl-> ", a, b, c)).all() + + actual = xr.dot(da_a, dims=...) + assert actual.dims == () + assert (actual.data == np.einsum("ij-> ", a)).all() + + actual = xr.dot(da_a.sel(a=[]), da_a.sel(a=[]), dims=...) + assert actual.dims == () + assert (actual.data == np.zeros(actual.shape)).all() + # Invalid cases if not use_dask: with pytest.raises(TypeError): diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 42b2a679347..09002e252b4 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -21,7 +21,7 @@ from . import ( assert_array_equal, raises_regex, - requires_cftime_or_netCDF4, + requires_cftime, requires_dask, requires_netCDF4, ) @@ -81,7 +81,7 @@ def test_decode_cf_with_conflicting_fill_missing_value(): assert_identical(actual, expected) -@requires_cftime_or_netCDF4 +@requires_cftime class TestEncodeCFVariable: def test_incompatible_attributes(self): invalid_vars = [ @@ -144,7 +144,7 @@ def test_string_object_warning(self): assert_identical(original, encoded) -@requires_cftime_or_netCDF4 +@requires_cftime class TestDecodeCF: def test_dataset(self): original = Dataset( @@ -226,7 +226,7 @@ def test_invalid_time_units_raises_eagerly(self): with raises_regex(ValueError, "unable to decode time"): decode_cf(ds) - @requires_cftime_or_netCDF4 + @requires_cftime def test_dataset_repr_with_netcdf4_datetimes(self): # regression test for #347 attrs = {"units": "days since 0001-01-01", "calendar": "noleap"} @@ -239,7 +239,7 @@ def test_dataset_repr_with_netcdf4_datetimes(self): ds = decode_cf(Dataset({"time": ("time", [0, 1], attrs)})) assert "(time) datetime64[ns]" in repr(ds) - @requires_cftime_or_netCDF4 + @requires_cftime def test_decode_cf_datetime_transition_to_invalid(self): # manually create dataset with not-decoded date from datetime import datetime diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 50517ae3c9c..c4323d1d317 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1,5 +1,6 @@ import operator import pickle +import sys from contextlib import suppress from distutils.version import LooseVersion from textwrap import dedent @@ -21,12 +22,16 @@ assert_frame_equal, assert_identical, raises_regex, + requires_scipy_or_netCDF4, ) +from .test_backends import create_tmp_file dask = pytest.importorskip("dask") da = pytest.importorskip("dask.array") dd = pytest.importorskip("dask.dataframe") +ON_WINDOWS = sys.platform == "win32" + class CountingScheduler: """ Simple dask scheduler counting the number of computes. @@ -1135,3 +1140,92 @@ def test_make_meta(map_ds): for variable in map_ds.data_vars: assert variable in meta.data_vars assert meta.data_vars[variable].shape == (0,) * meta.data_vars[variable].ndim + + +@pytest.mark.parametrize( + "obj", [make_da(), make_da().compute(), make_ds(), make_ds().compute()] +) +@pytest.mark.parametrize( + "transform", + [ + lambda x: x.reset_coords(), + lambda x: x.reset_coords(drop=True), + lambda x: x.isel(x=1), + lambda x: x.attrs.update(new_attrs=1), + lambda x: x.assign_coords(cxy=1), + lambda x: x.rename({"x": "xnew"}), + lambda x: x.rename({"cxy": "cxynew"}), + ], +) +def test_token_changes_on_transform(obj, transform): + with raise_if_dask_computes(): + assert dask.base.tokenize(obj) != dask.base.tokenize(transform(obj)) + + +@pytest.mark.parametrize( + "obj", [make_da(), make_da().compute(), make_ds(), make_ds().compute()] +) +def test_token_changes_when_data_changes(obj): + with raise_if_dask_computes(): + t1 = dask.base.tokenize(obj) + + # Change data_var + if isinstance(obj, DataArray): + obj *= 2 + else: + obj["a"] *= 2 + with raise_if_dask_computes(): + t2 = dask.base.tokenize(obj) + assert t2 != t1 + + # Change non-index coord + obj.coords["ndcoord"] *= 2 + with raise_if_dask_computes(): + t3 = dask.base.tokenize(obj) + assert t3 != t2 + + # Change IndexVariable + obj.coords["x"] *= 2 + with raise_if_dask_computes(): + t4 = dask.base.tokenize(obj) + assert t4 != t3 + + +@pytest.mark.parametrize("obj", [make_da().compute(), make_ds().compute()]) +def test_token_changes_when_buffer_changes(obj): + with raise_if_dask_computes(): + t1 = dask.base.tokenize(obj) + + if isinstance(obj, DataArray): + obj[0, 0] = 123 + else: + obj["a"][0, 0] = 123 + with raise_if_dask_computes(): + t2 = dask.base.tokenize(obj) + assert t2 != t1 + + obj.coords["ndcoord"][0] = 123 + with raise_if_dask_computes(): + t3 = dask.base.tokenize(obj) + assert t3 != t2 + + +@pytest.mark.parametrize( + "transform", + [lambda x: x, lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)], +) +@pytest.mark.parametrize("obj", [make_da(), make_ds(), make_ds().variables["a"]]) +def test_token_identical(obj, transform): + with raise_if_dask_computes(): + assert dask.base.tokenize(obj) == dask.base.tokenize(transform(obj)) + assert dask.base.tokenize(obj.compute()) == dask.base.tokenize( + transform(obj.compute()) + ) + + +@requires_scipy_or_netCDF4 +def test_normalize_token_with_backend(map_ds): + with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as tmp_file: + map_ds.to_netcdf(tmp_file) + read = xr.open_dataset(tmp_file) + assert not dask.base.tokenize(map_ds) == dask.base.tokenize(read) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b13527bc098..5114d13b0dc 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2068,6 +2068,10 @@ def test_transpose(self): ) assert_equal(expected, actual) + # same as previous but with ellipsis + actual = da.transpose("z", ..., "x", transpose_coords=True) + assert_equal(expected, actual) + with pytest.raises(ValueError): da.transpose("x", "y") @@ -2560,15 +2564,6 @@ def change_metadata(x): expected = change_metadata(expected) assert_equal(expected, actual) - def test_groupby_reduce_dimension_error(self): - array = self.make_groupby_example_array() - grouped = array.groupby("y") - with raises_regex(ValueError, "cannot reduce over dimension 'y'"): - grouped.mean() - - grouped = array.groupby("y", squeeze=False) - assert_identical(array, grouped.mean()) - def test_groupby_math(self): array = self.make_groupby_example_array() for squeeze in [True, False]: @@ -3130,11 +3125,11 @@ def test_align_copy(self): # Trivial align - 1 element x = DataArray([1, 2, 3], coords=[("a", [1, 2, 3])]) - x2, = align(x, copy=False) + (x2,) = align(x, copy=False) assert_identical(x, x2) assert source_ndarray(x2.data) is source_ndarray(x.data) - x2, = align(x, copy=True) + (x2,) = align(x, copy=True) assert_identical(x, x2) assert source_ndarray(x2.data) is not source_ndarray(x.data) @@ -3219,7 +3214,7 @@ def test_align_indexes(self): assert_identical(expected_x2, x2) assert_identical(expected_y2, y2) - x2, = align(x, join="outer", indexes={"a": [-2, 7, 10, -1]}) + (x2,) = align(x, join="outer", indexes={"a": [-2, 7, 10, -1]}) expected_x2 = DataArray([3, np.nan, 2, 1], coords=[("a", [-2, 7, 10, -1])]) assert_identical(expected_x2, x2) @@ -3298,7 +3293,7 @@ def test_broadcast_arrays_nocopy(self): assert source_ndarray(x2.data) is source_ndarray(x.data) # single-element broadcast (trivial case) - x2, = broadcast(x) + (x2,) = broadcast(x) assert_identical(x, x2) assert source_ndarray(x2.data) is source_ndarray(x.data) @@ -3930,6 +3925,16 @@ def test_dot(self): expected = DataArray(expected_vals, coords=[x, j], dims=["x", "j"]) assert_equal(expected, actual) + # Ellipsis: all dims are shared + actual = da.dot(da, dims=...) + expected = da.dot(da) + assert_equal(expected, actual) + + # Ellipsis: not all dims are shared + actual = da.dot(dm, dims=...) + expected = da.dot(dm, dims=("j", "x", "y", "z")) + assert_equal(expected, actual) + with pytest.raises(NotImplementedError): da.dot(dm.to_dataset(name="dm")) with pytest.raises(TypeError): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index b3ffdf68e3f..eab6040e17e 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1945,7 +1945,7 @@ def test_align_nocopy(self): def test_align_indexes(self): x = Dataset({"foo": DataArray([1, 2, 3], dims="x", coords=[("x", [1, 2, 3])])}) - x2, = align(x, indexes={"x": [2, 3, 1]}) + (x2,) = align(x, indexes={"x": [2, 3, 1]}) expected_x2 = Dataset( {"foo": DataArray([2, 3, 1], dims="x", coords={"x": [2, 3, 1]})} ) @@ -1973,7 +1973,7 @@ def test_broadcast(self): }, {"c": ("x", [4])}, ) - actual, = broadcast(ds) + (actual,) = broadcast(ds) assert_identical(expected, actual) ds_x = Dataset({"foo": ("x", [1])}) @@ -1995,7 +1995,7 @@ def test_broadcast_nocopy(self): x = Dataset({"foo": (("x", "y"), [[1, 1]])}) y = Dataset({"bar": ("y", [2, 3])}) - actual_x, = broadcast(x) + (actual_x,) = broadcast(x) assert_identical(x, actual_x) assert source_ndarray(actual_x["foo"].data) is source_ndarray(x["foo"].data) @@ -2117,25 +2117,31 @@ def test_drop_variables(self): def test_drop_index_labels(self): data = Dataset({"A": (["x", "y"], np.random.randn(2, 3)), "x": ["a", "b"]}) - actual = data.drop(["a"], "x") + with pytest.warns(DeprecationWarning): + actual = data.drop(["a"], "x") expected = data.isel(x=[1]) assert_identical(expected, actual) - actual = data.drop(["a", "b"], "x") + with pytest.warns(DeprecationWarning): + actual = data.drop(["a", "b"], "x") expected = data.isel(x=slice(0, 0)) assert_identical(expected, actual) with pytest.raises(KeyError): # not contained in axis - data.drop(["c"], dim="x") + with pytest.warns(DeprecationWarning): + data.drop(["c"], dim="x") - actual = data.drop(["c"], dim="x", errors="ignore") + with pytest.warns(DeprecationWarning): + actual = data.drop(["c"], dim="x", errors="ignore") assert_identical(data, actual) with pytest.raises(ValueError): - data.drop(["c"], dim="x", errors="wrong_value") + with pytest.warns(DeprecationWarning): + data.drop(["c"], dim="x", errors="wrong_value") - actual = data.drop(["a", "b", "c"], "x", errors="ignore") + with pytest.warns(DeprecationWarning): + actual = data.drop(["a", "b", "c"], "x", errors="ignore") expected = data.isel(x=slice(0, 0)) assert_identical(expected, actual) @@ -4675,6 +4681,10 @@ def test_dataset_transpose(self): ) assert_identical(expected, actual) + actual = ds.transpose(...) + expected = ds + assert_identical(expected, actual) + actual = ds.transpose("x", "y") expected = ds.apply(lambda x: x.transpose("x", "y", transpose_coords=True)) assert_identical(expected, actual) @@ -4690,13 +4700,32 @@ def test_dataset_transpose(self): expected_dims = tuple(d for d in new_order if d in ds[k].dims) assert actual[k].dims == expected_dims - with raises_regex(ValueError, "arguments to transpose"): + # same as above but with ellipsis + new_order = ("dim2", "dim3", "dim1", "time") + actual = ds.transpose("dim2", "dim3", ...) + for k in ds.variables: + expected_dims = tuple(d for d in new_order if d in ds[k].dims) + assert actual[k].dims == expected_dims + + with raises_regex(ValueError, "permuted"): ds.transpose("dim1", "dim2", "dim3") - with raises_regex(ValueError, "arguments to transpose"): + with raises_regex(ValueError, "permuted"): ds.transpose("dim1", "dim2", "dim3", "time", "extra_dim") assert "T" not in dir(ds) + def test_dataset_ellipsis_transpose_different_ordered_vars(self): + # https://github.com/pydata/xarray/issues/1081#issuecomment-544350457 + ds = Dataset( + dict( + a=(("w", "x", "y", "z"), np.ones((2, 3, 4, 5))), + b=(("x", "w", "y", "z"), np.zeros((3, 2, 4, 5))), + ) + ) + result = ds.transpose(..., "z", "y") + assert list(result["a"].dims) == list("wxzy") + assert list(result["b"].dims) == list("xwzy") + def test_dataset_retains_period_index_on_transpose(self): ds = create_test_data() diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index eb073a14aae..9df2f167cf2 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -440,7 +440,9 @@ def test_argmin_max(dim_num, dtype, contains_nan, dask, func, skipna, aggdim): **{aggdim: getattr(da, "arg" + func)(dim=aggdim, skipna=skipna).compute()} ) expected = getattr(da, func)(dim=aggdim, skipna=skipna) - assert_allclose(actual.drop(actual.coords), expected.drop(expected.coords)) + assert_allclose( + actual.drop(list(actual.coords)), expected.drop(list(expected.coords)) + ) def test_argmin_max_error(): diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index a6de41beb66..e2216547ac8 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -5,7 +5,23 @@ import xarray as xr from xarray.core.groupby import _consolidate_slices -from . import assert_identical, raises_regex +from . import assert_allclose, assert_equal, assert_identical, raises_regex + + +@pytest.fixture +def dataset(): + ds = xr.Dataset( + {"foo": (("x", "y", "z"), np.random.randn(3, 4, 2))}, + {"x": ["a", "b", "c"], "y": [1, 2, 3, 4], "z": [1, 2]}, + ) + ds["boo"] = (("z", "y"), [["f", "g", "h", "j"]] * 2) + + return ds + + +@pytest.fixture +def array(dataset): + return dataset["foo"] def test_consolidate_slices(): @@ -21,33 +37,25 @@ def test_consolidate_slices(): _consolidate_slices([slice(3), 4]) -def test_groupby_dims_property(): - ds = xr.Dataset( - {"foo": (("x", "y", "z"), np.random.randn(3, 4, 2))}, - {"x": ["a", "bcd", "c"], "y": [1, 2, 3, 4], "z": [1, 2]}, - ) +def test_groupby_dims_property(dataset): + assert dataset.groupby("x").dims == dataset.isel(x=1).dims + assert dataset.groupby("y").dims == dataset.isel(y=1).dims - assert ds.groupby("x").dims == ds.isel(x=1).dims - assert ds.groupby("y").dims == ds.isel(y=1).dims - - stacked = ds.stack({"xy": ("x", "y")}) + stacked = dataset.stack({"xy": ("x", "y")}) assert stacked.groupby("xy").dims == stacked.isel(xy=0).dims -def test_multi_index_groupby_apply(): +def test_multi_index_groupby_apply(dataset): # regression test for GH873 - ds = xr.Dataset( - {"foo": (("x", "y"), np.random.randn(3, 4))}, - {"x": ["a", "b", "c"], "y": [1, 2, 3, 4]}, - ) - doubled = 2 * ds - group_doubled = ( + ds = dataset.isel(z=1, drop=True)[["foo"]] + expected = 2 * ds + actual = ( ds.stack(space=["x", "y"]) .groupby("space") .apply(lambda x: 2 * x) .unstack("space") ) - assert doubled.equals(group_doubled) + assert_equal(expected, actual) def test_multi_index_groupby_sum(): @@ -58,7 +66,7 @@ def test_multi_index_groupby_sum(): ) expected = ds.sum("z") actual = ds.stack(space=["x", "y"]).groupby("space").sum("z").unstack("space") - assert expected.equals(actual) + assert_equal(expected, actual) def test_groupby_da_datetime(): @@ -78,7 +86,7 @@ def test_groupby_da_datetime(): expected = xr.DataArray( [3, 7], coords=dict(reference_date=reference_dates), dims="reference_date" ) - assert actual.equals(expected) + assert_equal(expected, actual) def test_groupby_duplicate_coordinate_labels(): @@ -86,7 +94,7 @@ def test_groupby_duplicate_coordinate_labels(): array = xr.DataArray([1, 2, 3], [("x", [1, 1, 2])]) expected = xr.DataArray([3, 3], [("x", [1, 2])]) actual = array.groupby("x").sum() - assert expected.equals(actual) + assert_equal(expected, actual) def test_groupby_input_mutation(): @@ -255,6 +263,72 @@ def test_groupby_repr_datetime(obj): assert actual == expected +def test_groupby_drops_nans(): + # GH2383 + # nan in 2D data variable (requires stacking) + ds = xr.Dataset( + { + "variable": (("lat", "lon", "time"), np.arange(60.0).reshape((4, 3, 5))), + "id": (("lat", "lon"), np.arange(12.0).reshape((4, 3))), + }, + coords={"lat": np.arange(4), "lon": np.arange(3), "time": np.arange(5)}, + ) + + ds["id"].values[0, 0] = np.nan + ds["id"].values[3, 0] = np.nan + ds["id"].values[-1, -1] = np.nan + + grouped = ds.groupby(ds.id) + + # non reduction operation + expected = ds.copy() + expected.variable.values[0, 0, :] = np.nan + expected.variable.values[-1, -1, :] = np.nan + expected.variable.values[3, 0, :] = np.nan + actual = grouped.apply(lambda x: x).transpose(*ds.variable.dims) + assert_identical(actual, expected) + + # reduction along grouped dimension + actual = grouped.mean() + stacked = ds.stack({"xy": ["lat", "lon"]}) + expected = ( + stacked.variable.where(stacked.id.notnull()).rename({"xy": "id"}).to_dataset() + ) + expected["id"] = stacked.id.values + assert_identical(actual, expected.dropna("id").transpose(*actual.dims)) + + # reduction operation along a different dimension + actual = grouped.mean("time") + expected = ds.mean("time").where(ds.id.notnull()) + assert_identical(actual, expected) + + # NaN in non-dimensional coordinate + array = xr.DataArray([1, 2, 3], [("x", [1, 2, 3])]) + array["x1"] = ("x", [1, 1, np.nan]) + expected = xr.DataArray(3, [("x1", [1])]) + actual = array.groupby("x1").sum() + assert_equal(expected, actual) + + # NaT in non-dimensional coordinate + array["t"] = ( + "x", + [ + np.datetime64("2001-01-01"), + np.datetime64("2001-01-01"), + np.datetime64("NaT"), + ], + ) + expected = xr.DataArray(3, [("t", [np.datetime64("2001-01-01")])]) + actual = array.groupby("t").sum() + assert_equal(expected, actual) + + # test for repeated coordinate labels + array = xr.DataArray([0, 1, 2, 4, 3, 4], [("x", [np.nan, 1, 1, np.nan, 2, np.nan])]) + expected = xr.DataArray([3, 3], [("x", [1, 2])]) + actual = array.groupby("x").sum() + assert_equal(expected, actual) + + def test_groupby_grouping_errors(): dataset = xr.Dataset({"foo": ("x", [1, 1, 1])}, {"x": [1, 2, 3]}) with raises_regex(ValueError, "None of the data falls within bins with edges"): @@ -276,6 +350,24 @@ def test_groupby_grouping_errors(): dataset.to_array().groupby(dataset.foo * np.nan) +def test_groupby_reduce_dimension_error(array): + grouped = array.groupby("y") + with raises_regex(ValueError, "cannot reduce over dimensions"): + grouped.mean() + + with raises_regex(ValueError, "cannot reduce over dimensions"): + grouped.mean("huh") + + with raises_regex(ValueError, "cannot reduce over dimensions"): + grouped.mean(("x", "y", "asd")) + + grouped = array.groupby("y", squeeze=False) + assert_identical(array, grouped.mean()) + + assert_identical(array.mean("x"), grouped.reduce(np.mean, "x")) + assert_allclose(array.mean(["x", "z"]), grouped.reduce(np.mean, ["x", "z"])) + + def test_groupby_bins_timeseries(): ds = xr.Dataset() ds["time"] = xr.DataArray( diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 73c4b9b8c74..8e2d4b8e064 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -11,7 +11,7 @@ from xarray.core.npcompat import IS_NEP18_ACTIVE from xarray.core.pycompat import sparse_array_type -from . import assert_equal, assert_identical +from . import assert_equal, assert_identical, requires_dask param = pytest.param xfail = pytest.mark.xfail @@ -849,3 +849,23 @@ def test_chunk(): dsc = ds.chunk(2) assert dsc.chunks == {"dim_0": (2, 2)} assert_identical(dsc, ds) + + +@requires_dask +def test_dask_token(): + import dask + + s = sparse.COO.from_numpy(np.array([0, 0, 1, 2])) + a = DataArray(s) + t1 = dask.base.tokenize(a) + t2 = dask.base.tokenize(a) + t3 = dask.base.tokenize(a + 1) + assert t1 == t2 + assert t3 != t2 + assert isinstance(a.data, sparse.COO) + + ac = a.chunk(2) + t4 = dask.base.tokenize(ac) + t5 = dask.base.tokenize(ac + 1) + assert t4 != t5 + assert isinstance(ac.data._meta, sparse.COO) diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index c36e8a1775d..af87b94393d 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -9,7 +9,7 @@ from xarray.core import duck_array_ops, utils from xarray.core.utils import either_dict_or_kwargs -from . import assert_array_equal, has_cftime, has_cftime_or_netCDF4, requires_dask +from . import assert_array_equal, requires_cftime, requires_dask from .test_coding_times import _all_cftime_date_types @@ -39,17 +39,12 @@ def test_safe_cast_to_index(): assert expected.dtype == actual.dtype -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime def test_safe_cast_to_index_cftimeindex(): date_types = _all_cftime_date_types() for date_type in date_types.values(): dates = [date_type(1, 1, day) for day in range(1, 20)] - - if has_cftime: - expected = CFTimeIndex(dates) - else: - expected = pd.Index(dates) - + expected = CFTimeIndex(dates) actual = utils.safe_cast_to_index(np.array(dates)) assert_array_equal(expected, actual) assert expected.dtype == actual.dtype @@ -57,7 +52,7 @@ def test_safe_cast_to_index_cftimeindex(): # Test that datetime.datetime objects are never used in a CFTimeIndex -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime def test_safe_cast_to_index_datetime_datetime(): dates = [datetime(1, 1, day) for day in range(1, 20)] @@ -275,3 +270,27 @@ def test_either_dict_or_kwargs(): with pytest.raises(ValueError, match=r"foo"): result = either_dict_or_kwargs(dict(a=1), dict(a=1), "foo") + + +@pytest.mark.parametrize( + ["supplied", "all_", "expected"], + [ + (list("abc"), list("abc"), list("abc")), + (["a", ..., "c"], list("abc"), list("abc")), + (["a", ...], list("abc"), list("abc")), + (["c", ...], list("abc"), list("cab")), + ([..., "b"], list("abc"), list("acb")), + ([...], list("abc"), list("abc")), + ], +) +def test_infix_dims(supplied, all_, expected): + result = list(utils.infix_dims(supplied, all_)) + assert result == expected + + +@pytest.mark.parametrize( + ["supplied", "all_"], [([..., ...], list("abc")), ([...], list("aac"))] +) +def test_infix_dims_errors(supplied, all_): + with pytest.raises(ValueError): + list(utils.infix_dims(supplied, all_)) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 78723eda013..528027ed149 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1280,6 +1280,9 @@ def test_transpose(self): w2 = Variable(["d", "b", "c", "a"], np.einsum("abcd->dbca", x)) assert w2.shape == (5, 3, 4, 2) assert_identical(w2, w.transpose("d", "b", "c", "a")) + assert_identical(w2, w.transpose("d", ..., "a")) + assert_identical(w2, w.transpose("d", "b", "c", ...)) + assert_identical(w2, w.transpose(..., "b", "c", "a")) assert_identical(w, w2.transpose("a", "b", "c", "d")) w3 = Variable(["b", "c", "d", "a"], np.einsum("abcd->bcda", x)) assert_identical(w, w3.transpose("a", "b", "c", "d"))