Merge remote-tracking branch 'upstream/master' into groupby-repr

* upstream/master: Revisit # noqa annotations (pydata#3359) Fix codecov.io upload on Windows (pydata#3360) Add how do I ... section (pydata#3357) Add glossary to documentation (pydata#3352) Documentation improvements (pydata#3328) Remove `complex.nc` from built docs (pydata#3353) Fix DataArray.to_netcdf type annotation (pydata#3325)
dcherian · Oct 1, 2019 · d9f2a23 · d9f2a23
2 parents d8422c0 + 21705e6
commit d9f2a23
Show file tree

Hide file tree

Showing 37 changed files with 1,196 additions and 217 deletions.
diff --git a/asv_bench/benchmarks/__init__.py b/asv_bench/benchmarks/__init__.py
@@ -16,7 +16,7 @@ def decorator(func):
 
 def requires_dask():
     try:
-        import dask  # noqa
+        import dask  # noqa: F401
     except ImportError:
         raise NotImplementedError
 

diff --git a/asv_bench/benchmarks/dataarray_missing.py b/asv_bench/benchmarks/dataarray_missing.py
@@ -5,7 +5,7 @@
 from . import randn, requires_dask
 
 try:
-    import dask  # noqa
+    import dask  # noqa: F401
 except ImportError:
     pass
 

diff --git a/ci/azure/unit-tests.yml b/ci/azure/unit-tests.yml
@@ -19,7 +19,8 @@ steps:
   displayName: Run tests
 
 - bash: |
-    bash <(curl https://codecov.io/bash) -t 688f4d53-31bb-49b5-8370-4ce6f792cf3d
+    curl https://codecov.io/bash > codecov.sh
+    bash codecov.sh -t 688f4d53-31bb-49b5-8370-4ce6f792cf3d
   displayName: Upload coverage to codecov.io
 
 # TODO: publish coverage results to Azure, once we can merge them across

diff --git a/doc/_static/style.css b/doc/_static/style.css
@@ -16,3 +16,12 @@
 .wy-nav-top {
   background-color: #555;
 }
+
+table.colwidths-given {
+    table-layout: fixed;
+    width: 100%;
+}
+table.docutils td {
+    white-space: unset;
+    word-wrap: break-word;
+}
diff --git a/doc/examples/_code/weather_data_setup.py b/doc/examples/_code/weather_data_setup.py
@@ -1,6 +1,6 @@
 import numpy as np
 import pandas as pd
-import seaborn as sns  # noqa, pandas aware plotting library
+import seaborn as sns
 
 import xarray as xr
 

diff --git a/doc/gallery/plot_cartopy_facetgrid.py b/doc/gallery/plot_cartopy_facetgrid.py
@@ -12,7 +12,7 @@
 For more details see `this discussion`_ on github.
 
 .. _this discussion: https://github.com/pydata/xarray/issues/1397#issuecomment-299190567
-"""  # noqa
+"""
 
 
 import cartopy.crs as ccrs

diff --git a/doc/howdoi.rst b/doc/howdoi.rst
@@ -0,0 +1,59 @@
+.. currentmodule:: xarray
+
+.. _howdoi:
+
+How do I ...
+============
+
+.. list-table::
+   :header-rows: 1
+   :widths: 40 60
+
+   * - How do I...
+     - Solution
+   * - add variables from other datasets to my dataset
+     - :py:meth:`Dataset.merge`
+   * - add a new dimension and/or coordinate
+     - :py:meth:`DataArray.expand_dims`, :py:meth:`Dataset.expand_dims`
+   * - add a new coordinate variable
+     - :py:meth:`DataArray.assign_coords`
+   * - change a data variable to a coordinate variable
+     - :py:meth:`Dataset.set_coords`
+   * - change the order of dimensions
+     - :py:meth:`DataArray.transpose`, :py:meth:`Dataset.transpose`
+   * - remove a variable from my object
+     - :py:meth:`Dataset.drop`, :py:meth:`DataArray.drop`
+   * - remove dimensions of length 1 or 0
+     - :py:meth:`DataArray.squeeze`, :py:meth:`Dataset.squeeze`
+   * - remove all variables with a particular dimension
+     - :py:meth:`Dataset.drop_dims`
+   * - convert non-dimension coordinates to data variables or remove them
+     - :py:meth:`DataArray.reset_coords`, :py:meth:`Dataset.reset_coords`
+   * - rename a variable, dimension or coordinate
+     - :py:meth:`Dataset.rename`, :py:meth:`DataArray.rename`, :py:meth:`Dataset.rename_vars`, :py:meth:`Dataset.rename_dims`,
+   * - convert a DataArray to Dataset or vice versa
+     - :py:meth:`DataArray.to_dataset`, :py:meth:`Dataset.to_array`
+   * - extract the underlying array (e.g. numpy or Dask arrays)
+     - :py:attr:`DataArray.data`
+   * - convert to and extract the underlying numpy array
+     - :py:attr:`DataArray.values`
+   * - find out if my xarray object is wrapping a Dask Array
+     - :py:func:`dask.is_dask_collection`
+   * - know how much memory my object requires
+     - :py:attr:`DataArray.nbytes`, :py:attr:`Dataset.nbytes`
+   * - convert a possibly irregularly sampled timeseries to a regularly sampled timeseries
+     - :py:meth:`DataArray.resample`, :py:meth:`Dataset.resample` (see :ref:`resampling` for more)
+   * - apply a function on all data variables in a Dataset
+     - :py:meth:`Dataset.apply`
+   * - write xarray objects with complex values to a netCDF file
+     - :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="h5netcdf", invalid_netcdf=True``
+   * - make xarray objects look like other xarray objects
+     - :py:func:`~xarray.ones_like`, :py:func:`~xarray.zeros_like`, :py:func:`~xarray.full_like`, :py:meth:`Dataset.reindex_like`, :py:meth:`Dataset.interpolate_like`, :py:meth:`Dataset.broadcast_like`, :py:meth:`DataArray.reindex_like`, :py:meth:`DataArray.interpolate_like`, :py:meth:`DataArray.broadcast_like`
+   * - replace NaNs with other values
+     - :py:meth:`Dataset.fillna`, :py:meth:`Dataset.ffill`, :py:meth:`Dataset.bfill`, :py:meth:`Dataset.interpolate_na`, :py:meth:`DataArray.fillna`, :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`DataArray.interpolate_na`
+   * - extract the year, month, day or similar from a DataArray of time values
+     - ``obj.dt.month`` for example where ``obj`` is a :py:class:`~xarray.DataArray` containing ``datetime64`` or ``cftime`` values. See :ref:`dt_accessor` for more.
+   * - round off time values to a specified frequency
+     - ``obj.dt.ceil``, ``obj.dt.floor``, ``obj.dt.round``. See :ref:`dt_accessor` for more.
+   * - make a mask that is ``True`` where an object contains any of the values in a array
+     - :py:meth:`Dataset.isin`, :py:meth:`DataArray.isin`
diff --git a/doc/index.rst b/doc/index.rst
@@ -46,6 +46,7 @@ Documentation
 
 **User Guide**
 
+* :doc:`terminology`
 * :doc:`data-structures`
 * :doc:`indexing`
 * :doc:`interpolation`
@@ -65,6 +66,7 @@ Documentation
    :hidden:
    :caption: User Guide
 
+   terminology
    data-structures
    indexing
    interpolation
@@ -82,6 +84,7 @@ Documentation
 **Help & reference**
 
 * :doc:`whats-new`
+* :doc:`howdoi`
 * :doc:`api`
 * :doc:`internals`
 * :doc:`roadmap`
@@ -94,6 +97,7 @@ Documentation
    :caption: Help & reference
 
    whats-new
+   howdoi
    api
    internals
    roadmap

diff --git a/doc/io.rst b/doc/io.rst
@@ -516,6 +516,11 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set:
     # Reading it back
     xr.open_dataarray("complex.nc", engine="h5netcdf")
 
+.. ipython:: python
+    :suppress:
+
+    import os
+    os.remove('complex.nc')
 
 .. warning::
 

diff --git a/doc/terminology.rst b/doc/terminology.rst
@@ -0,0 +1,42 @@
+.. _terminology:
+
+Terminology
+===========
+
+*Xarray terminology differs slightly from CF, mathematical conventions, and pandas; and therefore using xarray, understanding the documentation, and parsing error messages is easier once key terminology is defined. This glossary was designed so that more fundamental concepts come first. Thus for new users, this page is best read top-to-bottom. Throughout the glossary,* ``arr`` *will refer to an xarray* :py:class:`DataArray` *in any small examples. For more complete examples, please consult the relevant documentation.*
+
+----
+
+**DataArray:** A multi-dimensional array with labeled or named dimensions. ``DataArray`` objects add metadata such as dimension names, coordinates, and attributes (defined below) to underlying "unlabeled" data structures such as numpy and Dask arrays. If its optional ``name`` property is set, it is a *named DataArray*.
+
+----
+
+**Dataset:** A dict-like collection of ``DataArray`` objects with aligned dimensions. Thus, most operations that can be performed on the dimensions of a single ``DataArray`` can be performed on a dataset. Datasets have data variables (see **Variable** below), dimensions, coordinates, and attributes.
+
+----
+
+**Variable:** A `NetCDF-like variable <https://www.unidata.ucar.edu/software/netcdf/netcdf/Variables.html>`_ consisting of dimensions, data, and attributes which describe a single array. The main functional difference between variables and numpy arrays is that numerical operations on variables implement array broadcasting by dimension name. Each ``DataArray`` has an underlying variable that can be accessed via ``arr.variable``. However, a variable is not fully described outside of either a ``Dataset`` or a ``DataArray``.
+
+.. note::
+
+    The :py:class:`Variable` class is low-level interface and can typically be ignored. However, the word "variable" appears often enough in the code and documentation that is useful to understand.
+
+----
+
+**Dimension:** In mathematics, the *dimension* of data is loosely the number of degrees of freedom for it. A *dimension axis* is a set of all points in which all but one of these degrees of freedom is fixed. We can think of each dimension axis as having a name, for example the "x dimension".  In xarray, a ``DataArray`` object's *dimensions* are its named dimension axes, and the name of the ``i``-th dimension is ``arr.dims[i]``. If an array is created without dimensions, the default dimension names are ``dim_0``, ``dim_1``, and so forth.
+
+----
+
+**Coordinate:** An array that labels a dimension of another ``DataArray``. Loosely, the coordinate array's values can be thought of as tick labels along a dimension. There are two types of coordinate arrays: *dimension coordinates* and *non-dimension coordinates* (see below). A coordinate named ``x`` can be retrieved from ``arr.coords[x]``. A ``DataArray`` can have more coordinates than dimensions because a single dimension can be assigned multiple coordinate arrays. However, only one coordinate array can be a assigned as a particular dimension's dimension coordinate array. As a consequence, ``len(arr.dims) <= len(arr.coords)`` in general.
+
+----
+
+**Dimension coordinate:** A coordinate array assigned to ``arr`` with both a name and dimension name in ``arr.dims``. Dimension coordinates are used for label-based indexing and alignment, like the index found on a :py:class:`pandas.DataFrame` or :py:class:`pandas.Series`. In fact, dimension coordinates use :py:class:`pandas.Index` objects under the hood for efficient computation. Dimension coordinates are marked by ``*`` when printing a ``DataArray`` or ``Dataset``.
+
+----
+
+**Non-dimension coordinate:** A coordinate array assigned to ``arr`` with a name in ``arr.dims`` but a dimension name *not* in ``arr.dims``. These coordinate arrays are useful for auxiliary labeling. However, non-dimension coordinates are not indexed, and any operation on non-dimension coordinates that leverages indexing will fail. Printing ``arr.coords`` will print all of ``arr``'s coordinate names, with the assigned dimensions in parentheses. For example, ``coord_name   (dim_name) 1 2 3 ...``.
+
+----
+
+**Index:** An *index* is a data structure optimized for efficient selecting and slicing of an associated array. Xarray creates indexes for dimension coordinates so that operations along dimensions are fast, while non-dimension coordinates are not indexed. Under the hood, indexes are implemented as :py:class:`pandas.Index` objects. The index associated with dimension name ``x`` can be retrieved by ``arr.indexes[x]``. By construction, ``len(arr.dims) == len(arr.indexes)``
diff --git a/doc/time-series.rst b/doc/time-series.rst
@@ -101,6 +101,8 @@ You can also select a particular time by indexing with a
 
 For more details, read the pandas documentation.
 
+.. _dt_accessor:
+
 Datetime components
 -------------------
 

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -43,8 +43,20 @@ Bug fixes
 
 Documentation
 ~~~~~~~~~~~~~
+
+- Created a glossary of important xarray terms (:issue:`2410`, :pull:`3352`).
+  By `Gregory Gundersen <https://github.com/gwgundersen/>`_.
+- Created a "How do I..." section (:ref:`howdoi`) for solutions to common questions. (:pull:`3357`).
+  By `Deepak Cherian <https://github.com/dcherian/>`_.
 - Add examples for :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims`.
   By `Justus Magin <https://github.com/keewis>`_.
+- Add examples for :py:meth:`align`, :py:meth:`merge`, :py:meth:`combine_by_coords`, 
+  :py:meth:`full_like`, :py:meth:`zeros_like`, :py:meth:`ones_like`, :py:meth:`Dataset.pipe`, 
+  :py:meth:`Dataset.assign`, :py:meth:`Dataset.reindex`, :py:meth:`Dataset.fillna`.
+  By `Anderson Banihirwe <https://github.com/andersy005>`_.
+- Fixed documentation to clean up an unwanted file created in ``ipython`` example
+  (:pull:`3353`).
+  By `Gregory Gundersen <https://github.com/gwgundersen/>`_.
 
 .. _whats-new.0.13.0:
 

diff --git a/setup.py b/setup.py
@@ -86,7 +86,7 @@
 - Issue tracker: http://github.com/pydata/xarray/issues
 - Source code: http://github.com/pydata/xarray
 - SciPy2015 talk: https://www.youtube.com/watch?v=X0pAhJgySxk
-"""  # noqa
+"""
 
 
 setup(

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -42,12 +42,12 @@
 
 def _get_default_engine_remote_uri():
     try:
-        import netCDF4  # noqa
+        import netCDF4  # noqa: F401
 
         engine = "netcdf4"
     except ImportError:  # pragma: no cover
         try:
-            import pydap  # noqa
+            import pydap  # noqa: F401
 
             engine = "pydap"
         except ImportError:
@@ -61,13 +61,13 @@ def _get_default_engine_remote_uri():
 def _get_default_engine_grib():
     msgs = []
     try:
-        import Nio  # noqa
+        import Nio  # noqa: F401
 
         msgs += ["set engine='pynio' to access GRIB files with PyNIO"]
     except ImportError:  # pragma: no cover
         pass
     try:
-        import cfgrib  # noqa
+        import cfgrib  # noqa: F401
 
         msgs += ["set engine='cfgrib' to access GRIB files with cfgrib"]
     except ImportError:  # pragma: no cover
@@ -80,7 +80,7 @@ def _get_default_engine_grib():
 
 def _get_default_engine_gz():
     try:
-        import scipy  # noqa
+        import scipy  # noqa: F401
 
         engine = "scipy"
     except ImportError:  # pragma: no cover
@@ -90,12 +90,12 @@ def _get_default_engine_gz():
 
 def _get_default_engine_netcdf():
     try:
-        import netCDF4  # noqa
+        import netCDF4  # noqa: F401
 
         engine = "netcdf4"
     except ImportError:  # pragma: no cover
         try:
-            import scipy.io.netcdf  # noqa
+            import scipy.io.netcdf  # noqa: F401
 
             engine = "scipy"
         except ImportError:
@@ -722,44 +722,41 @@ def open_mfdataset(
 ):
     """Open multiple files as a single dataset.
 
-    If combine='by_coords' then the function ``combine_by_coords`` is used to 
-    combine the datasets into one before returning the result, and if 
-    combine='nested' then ``combine_nested`` is used. The filepaths must be 
-    structured according to which combining function is used, the details of 
-    which are given in the documentation for ``combine_by_coords`` and 
-    ``combine_nested``. By default the old (now deprecated) ``auto_combine`` 
-    will be used, please specify either ``combine='by_coords'`` or 
-    ``combine='nested'`` in future. Requires dask to be installed. See 
-    documentation for details on dask [1]. Attributes from the first dataset 
-    file are used for the combined dataset.
+    If combine='by_coords' then the function ``combine_by_coords`` is used to combine
+    the datasets into one before returning the result, and if combine='nested' then
+    ``combine_nested`` is used. The filepaths must be structured according to which
+    combining function is used, the details of which are given in the documentation for
+    ``combine_by_coords`` and ``combine_nested``. By default the old (now deprecated)
+    ``auto_combine`` will be used, please specify either ``combine='by_coords'`` or
+    ``combine='nested'`` in future. Requires dask to be installed. See documentation for
+    details on dask [1]. Attributes from the first dataset file are used for the
+    combined dataset.
 
     Parameters
     ----------
     paths : str or sequence
-        Either a string glob in the form "path/to/my/files/*.nc" or an explicit
-        list of files to open. Paths can be given as strings or as pathlib
-        Paths. If concatenation along more than one dimension is desired, then
-        ``paths`` must be a nested list-of-lists (see ``manual_combine`` for
-        details). (A string glob will be expanded to a 1-dimensional list.)
+        Either a string glob in the form "path/to/my/files/*.nc" or an explicit list of
+        files to open. Paths can be given as strings or as pathlib Paths. If
+        concatenation along more than one dimension is desired, then ``paths`` must be a
+        nested list-of-lists (see ``manual_combine`` for details). (A string glob will
+        be expanded to a 1-dimensional list.)
     chunks : int or dict, optional
-        Dictionary with keys given by dimension names and values given by chunk
-        sizes. In general, these should divide the dimensions of each dataset.
-        If int, chunk each dimension by ``chunks``.
-        By default, chunks will be chosen to load entire input files into
-        memory at once. This has a major impact on performance: please see the
-        full documentation for more details [2].
+        Dictionary with keys given by dimension names and values given by chunk sizes.
+        In general, these should divide the dimensions of each dataset. If int, chunk
+        each dimension by ``chunks``. By default, chunks will be chosen to load entire
+        input files into memory at once. This has a major impact on performance: please
+        see the full documentation for more details [2].
     concat_dim : str, or list of str, DataArray, Index or None, optional
-        Dimensions to concatenate files along.  You only
-        need to provide this argument if any of the dimensions along which you
-        want to concatenate is not a dimension in the original datasets, e.g.,
-        if you want to stack a collection of 2D arrays along a third dimension.
-        Set ``concat_dim=[..., None, ...]`` explicitly to
+        Dimensions to concatenate files along.  You only need to provide this argument
+        if any of the dimensions along which you want to concatenate is not a dimension
+        in the original datasets, e.g., if you want to stack a collection of 2D arrays
+        along a third dimension. Set ``concat_dim=[..., None, ...]`` explicitly to
         disable concatenation along a particular dimension.
     combine : {'by_coords', 'nested'}, optional
-        Whether ``xarray.combine_by_coords`` or ``xarray.combine_nested`` is 
-        used to combine all the data. If this argument is not provided, 
-        `xarray.auto_combine` is used, but in the future this behavior will 
-        switch to use `xarray.combine_by_coords` by default.
+        Whether ``xarray.combine_by_coords`` or ``xarray.combine_nested`` is used to
+        combine all the data. If this argument is not provided, `xarray.auto_combine` is
+        used, but in the future this behavior will switch to use
+        `xarray.combine_by_coords` by default.
     compat : {'identical', 'equals', 'broadcast_equals',
               'no_conflicts', 'override'}, optional
         String indicating how to compare variables of the same name for
@@ -854,7 +851,7 @@ def open_mfdataset(
 
     .. [1] http://xarray.pydata.org/en/stable/dask.html
     .. [2] http://xarray.pydata.org/en/stable/dask.html#chunking-and-performance
-    """  # noqa
+    """
     if isinstance(paths, str):
         if is_remote_uri(paths):
             raise ValueError(

diff --git a/xarray/backends/locks.py b/xarray/backends/locks.py
@@ -21,9 +21,7 @@
 NETCDFC_LOCK = SerializableLock()
 
 
-_FILE_LOCKS = (
-    weakref.WeakValueDictionary()
-)  # type: MutableMapping[Any, threading.Lock]  # noqa
+_FILE_LOCKS = weakref.WeakValueDictionary()  # type: MutableMapping[Any, threading.Lock]
 
 
 def _get_threaded_lock(key):
-Original file line number
+Diff line change
@@ Expand Up / @@ -101,6 +101,8 @@ You can also select a particular time by indexing with a @@
     For more details, read the pandas documentation.
+    .. _dt_accessor:
     Datetime components
     -------------------
@@ Expand Down @@