Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 56 additions & 31 deletions xarray/backends/apiv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,37 +11,33 @@
)


def dataset_from_backend_dataset(
ds,
def _get_mtime(filename_or_obj):
# if passed an actual file path, augment the token with
# the file modification time
if isinstance(filename_or_obj, str) and not is_remote_uri(filename_or_obj):
mtime = os.path.getmtime(filename_or_obj)
else:
mtime = None
return mtime


def _chunk_ds(
backend_ds,
filename_or_obj,
engine,
chunks,
cache,
overwrite_encoded_chunks,
**extra_tokens,
):
if not (isinstance(chunks, (int, dict)) or chunks is None):
if chunks != "auto":
raise ValueError(
"chunks must be an int, dict, 'auto', or None. "
"Instead found %s. " % chunks
)

_protect_dataset_variables_inplace(ds, cache)
if chunks is not None and engine != "zarr":
if engine != "zarr":
from dask.base import tokenize

# if passed an actual file path, augment the token with
# the file modification time
if isinstance(filename_or_obj, str) and not is_remote_uri(filename_or_obj):
mtime = os.path.getmtime(filename_or_obj)
else:
mtime = None
mtime = _get_mtime(filename_or_obj)
token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens)
name_prefix = "open_dataset-%s" % token
ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
ds = backend_ds.chunk(chunks, name_prefix=name_prefix, token=token)

elif engine == "zarr":
else:

if chunks == "auto":
try:
Expand All @@ -50,35 +46,64 @@ def dataset_from_backend_dataset(
chunks = None

if chunks is None:
return ds
return backend_ds

if isinstance(chunks, int):
chunks = dict.fromkeys(ds.dims, chunks)
chunks = dict.fromkeys(backend_ds.dims, chunks)

variables = {}
for k, v in ds.variables.items():
for k, v in backend_ds.variables.items():
var_chunks = _get_chunk(k, v, chunks)
variables[k] = _maybe_chunk(
k,
v,
var_chunks,
overwrite_encoded_chunks=overwrite_encoded_chunks,
)
ds2 = ds._replace(variables)
ds = backend_ds._replace(variables)
return ds


def _dataset_from_backend_dataset(
backend_ds,
filename_or_obj,
engine,
chunks,
cache,
overwrite_encoded_chunks,
**extra_tokens,
):
if not (isinstance(chunks, (int, dict)) or chunks is None):
if chunks != "auto":
raise ValueError(
"chunks must be an int, dict, 'auto', or None. "
"Instead found %s. " % chunks
)

_protect_dataset_variables_inplace(backend_ds, cache)
if chunks is None:
ds = backend_ds
else:
ds2 = ds
ds2._file_obj = ds._file_obj
ds = _chunk_ds(
backend_ds,
filename_or_obj,
engine,
chunks,
overwrite_encoded_chunks,
**extra_tokens,
)

ds._file_obj = backend_ds._file_obj

# Ensure source filename always stored in dataset object (GH issue #2550)
if "source" not in ds.encoding:
if isinstance(filename_or_obj, str):
ds2.encoding["source"] = filename_or_obj
ds.encoding["source"] = filename_or_obj

return ds2
return ds


def resolve_decoders_kwargs(decode_cf, engine, **decoders):
def _resolve_decoders_kwargs(decode_cf, engine, **decoders):
signature = plugins.ENGINES[engine]["signature"]
if decode_cf is False:
for d in decoders:
Expand Down Expand Up @@ -225,7 +250,7 @@ def open_dataset(
if engine is None:
engine = _autodetect_engine(filename_or_obj)

decoders = resolve_decoders_kwargs(
decoders = _resolve_decoders_kwargs(
decode_cf,
engine=engine,
mask_and_scale=mask_and_scale,
Expand All @@ -249,7 +274,7 @@ def open_dataset(
**backend_kwargs,
**{k: v for k, v in kwargs.items() if v is not None},
)
ds = dataset_from_backend_dataset(
ds = _dataset_from_backend_dataset(
backend_ds,
filename_or_obj,
engine,
Expand Down