Skip to content

Commit df17e1c

Browse files
committed
Merge remote-tracking branch 'upstream/master' into missing_value
* upstream/master: cfgrib is now part of conda-forge (pydata#2992) Add fill_value for concat and auto_combine (pydata#2964) Remove deprecated pytest.config usages (pydata#2988) Add transpose_coords option to DataArray.transpose (pydata#2556) Fix rolling.constuct() example (pydata#2967) Implement load_dataset() and load_dataarray() (pydata#2917)
2 parents 15d94f3 + ae1239c commit df17e1c

21 files changed

+341
-95
lines changed

ci/requirements-py36.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,11 @@ dependencies:
2424
- bottleneck
2525
- zarr
2626
- pseudonetcdf>=3.0.1
27-
- eccodes
27+
- cfgrib>=0.9.2
2828
- cdms2
2929
- pynio
3030
- iris>=1.10
3131
- pydap
3232
- lxml
3333
- pip:
34-
- cfgrib>=0.9.2
3534
- mypy==0.660

ci/requirements-py37.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,8 @@ dependencies:
2525
- bottleneck
2626
- zarr
2727
- pseudonetcdf>=3.0.1
28+
- cfgrib>=0.9.2
2829
- lxml
29-
- eccodes
3030
- pydap
3131
- pip:
32-
- cfgrib>=0.9.2
3332
- mypy==0.650

conftest.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,29 @@
11
"""Configuration for pytest."""
22

3+
import pytest
4+
35

46
def pytest_addoption(parser):
57
"""Add command-line flags for pytest."""
68
parser.addoption("--run-flaky", action="store_true",
79
help="runs flaky tests")
810
parser.addoption("--run-network-tests", action="store_true",
911
help="runs tests requiring a network connection")
12+
13+
14+
def pytest_collection_modifyitems(config, items):
15+
16+
if not config.getoption("--run-flaky"):
17+
skip_flaky = pytest.mark.skip(
18+
reason="set --run-flaky option to run flaky tests")
19+
for item in items:
20+
if "flaky" in item.keywords:
21+
item.add_marker(skip_flaky)
22+
23+
if not config.getoption("--run-network-tests"):
24+
skip_network = pytest.mark.skip(
25+
reason="set --run-network-tests option to run tests requiring an"
26+
"internet connection")
27+
for item in items:
28+
if "network" in item.keywords:
29+
item.add_marker(skip_network)

doc/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,7 @@ Dataset methods
460460
:toctree: generated/
461461

462462
open_dataset
463+
load_dataset
463464
open_mfdataset
464465
open_rasterio
465466
open_zarr
@@ -487,6 +488,7 @@ DataArray methods
487488
:toctree: generated/
488489

489490
open_dataarray
491+
load_dataarray
490492
DataArray.to_dataset
491493
DataArray.to_netcdf
492494
DataArray.to_pandas

doc/whats-new.rst

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,21 @@ Enhancements
2727
- Character arrays' character dimension name decoding and encoding handled by
2828
``var.encoding['char_dim_name']`` (:issue:`2895`)
2929
By `James McCreight <https://github.com/jmccreight>`_.
30+
- :py:meth:`DataArray.transpose` now accepts a keyword argument
31+
``transpose_coords`` which enables transposition of coordinates in the
32+
same way as :py:meth:`Dataset.transpose`. :py:meth:`DataArray.groupby`
33+
:py:meth:`DataArray.groupby_bins`, and :py:meth:`DataArray.resample` now
34+
accept a keyword argument ``restore_coord_dims`` which keeps the order
35+
of the dimensions of multi-dimensional coordinates intact (:issue:`1856`).
36+
By `Peter Hausamann <http://github.com/phausamann>`_.
3037
- Clean up Python 2 compatibility in code (:issue:`2950`)
3138
By `Guido Imperiale <https://github.com/crusaderky>`_.
39+
- Implement ``load_dataset()`` and ``load_dataarray()`` as alternatives to
40+
``open_dataset()`` and ``open_dataarray()`` to open, load into memory,
41+
and close files, returning the Dataset or DataArray. These functions are
42+
helpful for avoiding file-lock errors when trying to write to files opened
43+
using ``open_dataset()`` or ``open_dataarray()``. (:issue:`2887`)
44+
By `Dan Nowacki <https://github.com/dnowacki-usgs>`_.
3245

3346
Bug fixes
3447
~~~~~~~~~
@@ -43,6 +56,8 @@ Bug fixes
4356
By `Martin Pletcher <https://github.com/pletchm>`_.
4457
- Increased support for `missing_value` (:issue:`2871`)
4558
By `Deepak Cherian <https://github.com/dcherian>`_.
59+
- Removed usages of `pytest.config`, which is deprecated (:issue:`2988`:)
60+
By `Maximilian Roos <https://github.com/max-sixty>`_.
4661

4762
.. _whats-new.0.12.1:
4863

@@ -155,9 +170,9 @@ Other enhancements
155170
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
156171
- Added :py:meth:`~xarray.Dataset.drop_dims` (:issue:`1949`).
157172
By `Kevin Squire <https://github.com/kmsquire>`_.
158-
- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=``
159-
parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for
160-
backwards compatibility. The ``overwrite_encoded_chunks`` parameter is
173+
- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=``
174+
parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for
175+
backwards compatibility. The ``overwrite_encoded_chunks`` parameter is
161176
added to remove the original zarr chunk encoding.
162177
By `Lily Wang <https://github.com/lilyminium>`_.
163178

xarray/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from .core.options import set_options
1818

1919
from .backends.api import (open_dataset, open_dataarray, open_mfdataset,
20-
save_mfdataset)
20+
save_mfdataset, load_dataset, load_dataarray)
2121
from .backends.rasterio_ import open_rasterio
2222
from .backends.zarr import open_zarr
2323

xarray/backends/api.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,12 +185,64 @@ def _finalize_store(write, store):
185185
store.close()
186186

187187

188+
def load_dataset(filename_or_obj, **kwargs):
189+
"""Open, load into memory, and close a Dataset from a file or file-like
190+
object.
191+
192+
This is a thin wrapper around :py:meth:`~xarray.open_dataset`. It differs
193+
from `open_dataset` in that it loads the Dataset into memory, closes the
194+
file, and returns the Dataset. In contrast, `open_dataset` keeps the file
195+
handle open and lazy loads its contents. All parameters are passed directly
196+
to `open_dataset`. See that documentation for further details.
197+
198+
Returns
199+
-------
200+
dataset : Dataset
201+
The newly created Dataset.
202+
203+
See Also
204+
--------
205+
open_dataset
206+
"""
207+
if 'cache' in kwargs:
208+
raise TypeError('cache has no effect in this context')
209+
210+
with open_dataset(filename_or_obj, **kwargs) as ds:
211+
return ds.load()
212+
213+
214+
def load_dataarray(filename_or_obj, **kwargs):
215+
"""Open, load into memory, and close a DataArray from a file or file-like
216+
object containing a single data variable.
217+
218+
This is a thin wrapper around :py:meth:`~xarray.open_dataarray`. It differs
219+
from `open_dataarray` in that it loads the Dataset into memory, closes the
220+
file, and returns the Dataset. In contrast, `open_dataarray` keeps the file
221+
handle open and lazy loads its contents. All parameters are passed directly
222+
to `open_dataarray`. See that documentation for further details.
223+
224+
Returns
225+
-------
226+
datarray : DataArray
227+
The newly created DataArray.
228+
229+
See Also
230+
--------
231+
open_dataarray
232+
"""
233+
if 'cache' in kwargs:
234+
raise TypeError('cache has no effect in this context')
235+
236+
with open_dataarray(filename_or_obj, **kwargs) as da:
237+
return da.load()
238+
239+
188240
def open_dataset(filename_or_obj, group=None, decode_cf=True,
189241
mask_and_scale=None, decode_times=True, autoclose=None,
190242
concat_characters=True, decode_coords=True, engine=None,
191243
chunks=None, lock=None, cache=None, drop_variables=None,
192244
backend_kwargs=None, use_cftime=None):
193-
"""Load and decode a dataset from a file or file-like object.
245+
"""Open and decode a dataset from a file or file-like object.
194246
195247
Parameters
196248
----------
@@ -406,7 +458,8 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True,
406458
concat_characters=True, decode_coords=True, engine=None,
407459
chunks=None, lock=None, cache=None, drop_variables=None,
408460
backend_kwargs=None, use_cftime=None):
409-
"""Open an DataArray from a netCDF file containing a single data variable.
461+
"""Open an DataArray from a file or file-like object containing a single
462+
data variable.
410463
411464
This is designed to read netCDF files with only one data variable. If
412465
multiple variables are present then a ValueError is raised.

xarray/core/combine.py

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import pandas as pd
66

7-
from . import utils
7+
from . import utils, dtypes
88
from .alignment import align
99
from .merge import merge
1010
from .variable import IndexVariable, Variable, as_variable
@@ -14,7 +14,7 @@
1414

1515
def concat(objs, dim=None, data_vars='all', coords='different',
1616
compat='equals', positions=None, indexers=None, mode=None,
17-
concat_over=None):
17+
concat_over=None, fill_value=dtypes.NA):
1818
"""Concatenate xarray objects along a new or existing dimension.
1919
2020
Parameters
@@ -66,6 +66,8 @@ def concat(objs, dim=None, data_vars='all', coords='different',
6666
List of integer arrays which specifies the integer positions to which
6767
to assign each dataset along the concatenated dimension. If not
6868
supplied, objects are concatenated in the provided order.
69+
fill_value : scalar, optional
70+
Value to use for newly missing values
6971
indexers, mode, concat_over : deprecated
7072
7173
Returns
@@ -117,7 +119,7 @@ def concat(objs, dim=None, data_vars='all', coords='different',
117119
else:
118120
raise TypeError('can only concatenate xarray Dataset and DataArray '
119121
'objects, got %s' % type(first_obj))
120-
return f(objs, dim, data_vars, coords, compat, positions)
122+
return f(objs, dim, data_vars, coords, compat, positions, fill_value)
121123

122124

123125
def _calc_concat_dim_coord(dim):
@@ -212,7 +214,8 @@ def process_subset_opt(opt, subset):
212214
return concat_over, equals
213215

214216

215-
def _dataset_concat(datasets, dim, data_vars, coords, compat, positions):
217+
def _dataset_concat(datasets, dim, data_vars, coords, compat, positions,
218+
fill_value=dtypes.NA):
216219
"""
217220
Concatenate a sequence of datasets along a new or existing dimension
218221
"""
@@ -225,7 +228,8 @@ def _dataset_concat(datasets, dim, data_vars, coords, compat, positions):
225228
dim, coord = _calc_concat_dim_coord(dim)
226229
# Make sure we're working on a copy (we'll be loading variables)
227230
datasets = [ds.copy() for ds in datasets]
228-
datasets = align(*datasets, join='outer', copy=False, exclude=[dim])
231+
datasets = align(*datasets, join='outer', copy=False, exclude=[dim],
232+
fill_value=fill_value)
229233

230234
concat_over, equals = _calc_concat_over(datasets, dim, data_vars, coords)
231235

@@ -317,7 +321,7 @@ def ensure_common_dims(vars):
317321

318322

319323
def _dataarray_concat(arrays, dim, data_vars, coords, compat,
320-
positions):
324+
positions, fill_value=dtypes.NA):
321325
arrays = list(arrays)
322326

323327
if data_vars != 'all':
@@ -336,14 +340,15 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat,
336340
datasets.append(arr._to_temp_dataset())
337341

338342
ds = _dataset_concat(datasets, dim, data_vars, coords, compat,
339-
positions)
343+
positions, fill_value)
340344
result = arrays[0]._from_temp_dataset(ds, name)
341345

342346
result.name = result_name(arrays)
343347
return result
344348

345349

346-
def _auto_concat(datasets, dim=None, data_vars='all', coords='different'):
350+
def _auto_concat(datasets, dim=None, data_vars='all', coords='different',
351+
fill_value=dtypes.NA):
347352
if len(datasets) == 1 and dim is None:
348353
# There is nothing more to combine, so kick out early.
349354
return datasets[0]
@@ -366,7 +371,8 @@ def _auto_concat(datasets, dim=None, data_vars='all', coords='different'):
366371
'supply the ``concat_dim`` argument '
367372
'explicitly')
368373
dim, = concat_dims
369-
return concat(datasets, dim=dim, data_vars=data_vars, coords=coords)
374+
return concat(datasets, dim=dim, data_vars=data_vars,
375+
coords=coords, fill_value=fill_value)
370376

371377

372378
_CONCAT_DIM_DEFAULT = utils.ReprObject('<inferred>')
@@ -442,7 +448,8 @@ def _check_shape_tile_ids(combined_tile_ids):
442448

443449

444450
def _combine_nd(combined_ids, concat_dims, data_vars='all',
445-
coords='different', compat='no_conflicts'):
451+
coords='different', compat='no_conflicts',
452+
fill_value=dtypes.NA):
446453
"""
447454
Concatenates and merges an N-dimensional structure of datasets.
448455
@@ -472,13 +479,14 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all',
472479
dim=concat_dim,
473480
data_vars=data_vars,
474481
coords=coords,
475-
compat=compat)
482+
compat=compat,
483+
fill_value=fill_value)
476484
combined_ds = list(combined_ids.values())[0]
477485
return combined_ds
478486

479487

480488
def _auto_combine_all_along_first_dim(combined_ids, dim, data_vars,
481-
coords, compat):
489+
coords, compat, fill_value=dtypes.NA):
482490
# Group into lines of datasets which must be combined along dim
483491
# need to sort by _new_tile_id first for groupby to work
484492
# TODO remove all these sorted OrderedDicts once python >= 3.6 only
@@ -490,7 +498,8 @@ def _auto_combine_all_along_first_dim(combined_ids, dim, data_vars,
490498
combined_ids = OrderedDict(sorted(group))
491499
datasets = combined_ids.values()
492500
new_combined_ids[new_id] = _auto_combine_1d(datasets, dim, compat,
493-
data_vars, coords)
501+
data_vars, coords,
502+
fill_value)
494503
return new_combined_ids
495504

496505

@@ -500,18 +509,20 @@ def vars_as_keys(ds):
500509

501510
def _auto_combine_1d(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
502511
compat='no_conflicts',
503-
data_vars='all', coords='different'):
512+
data_vars='all', coords='different',
513+
fill_value=dtypes.NA):
504514
# This is just the old auto_combine function (which only worked along 1D)
505515
if concat_dim is not None:
506516
dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
507517
sorted_datasets = sorted(datasets, key=vars_as_keys)
508518
grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
509519
concatenated = [_auto_concat(list(ds_group), dim=dim,
510-
data_vars=data_vars, coords=coords)
520+
data_vars=data_vars, coords=coords,
521+
fill_value=fill_value)
511522
for id, ds_group in grouped_by_vars]
512523
else:
513524
concatenated = datasets
514-
merged = merge(concatenated, compat=compat)
525+
merged = merge(concatenated, compat=compat, fill_value=fill_value)
515526
return merged
516527

517528

@@ -521,7 +532,7 @@ def _new_tile_id(single_id_ds_pair):
521532

522533

523534
def _auto_combine(datasets, concat_dims, compat, data_vars, coords,
524-
infer_order_from_coords, ids):
535+
infer_order_from_coords, ids, fill_value=dtypes.NA):
525536
"""
526537
Calls logic to decide concatenation order before concatenating.
527538
"""
@@ -550,12 +561,14 @@ def _auto_combine(datasets, concat_dims, compat, data_vars, coords,
550561

551562
# Repeatedly concatenate then merge along each dimension
552563
combined = _combine_nd(combined_ids, concat_dims, compat=compat,
553-
data_vars=data_vars, coords=coords)
564+
data_vars=data_vars, coords=coords,
565+
fill_value=fill_value)
554566
return combined
555567

556568

557569
def auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
558-
compat='no_conflicts', data_vars='all', coords='different'):
570+
compat='no_conflicts', data_vars='all', coords='different',
571+
fill_value=dtypes.NA):
559572
"""Attempt to auto-magically combine the given datasets into one.
560573
This method attempts to combine a list of datasets into a single entity by
561574
inspecting metadata and using a combination of concat and merge.
@@ -596,6 +609,8 @@ def auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
596609
Details are in the documentation of concat
597610
coords : {'minimal', 'different', 'all' or list of str}, optional
598611
Details are in the documentation of conca
612+
fill_value : scalar, optional
613+
Value to use for newly missing values
599614
600615
Returns
601616
-------
@@ -622,4 +637,4 @@ def auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
622637
return _auto_combine(datasets, concat_dims=concat_dims, compat=compat,
623638
data_vars=data_vars, coords=coords,
624639
infer_order_from_coords=infer_order_from_coords,
625-
ids=False)
640+
ids=False, fill_value=fill_value)

0 commit comments

Comments
 (0)