1111)
1212
1313
14- def dataset_from_backend_dataset (
15- ds ,
14+ def _get_mtime (filename_or_obj ):
15+ # if passed an actual file path, augment the token with
16+ # the file modification time
17+ if isinstance (filename_or_obj , str ) and not is_remote_uri (filename_or_obj ):
18+ mtime = os .path .getmtime (filename_or_obj )
19+ else :
20+ mtime = None
21+ return mtime
22+
23+
24+ def _chunk_ds (
25+ backend_ds ,
1626 filename_or_obj ,
1727 engine ,
1828 chunks ,
19- cache ,
2029 overwrite_encoded_chunks ,
2130 ** extra_tokens ,
2231):
23- if not (isinstance (chunks , (int , dict )) or chunks is None ):
24- if chunks != "auto" :
25- raise ValueError (
26- "chunks must be an int, dict, 'auto', or None. "
27- "Instead found %s. " % chunks
28- )
29-
30- _protect_dataset_variables_inplace (ds , cache )
31- if chunks is not None and engine != "zarr" :
32+ if engine != "zarr" :
3233 from dask .base import tokenize
3334
34- # if passed an actual file path, augment the token with
35- # the file modification time
36- if isinstance (filename_or_obj , str ) and not is_remote_uri (filename_or_obj ):
37- mtime = os .path .getmtime (filename_or_obj )
38- else :
39- mtime = None
35+ mtime = _get_mtime (filename_or_obj )
4036 token = tokenize (filename_or_obj , mtime , engine , chunks , ** extra_tokens )
4137 name_prefix = "open_dataset-%s" % token
42- ds2 = ds .chunk (chunks , name_prefix = name_prefix , token = token )
38+ ds = backend_ds .chunk (chunks , name_prefix = name_prefix , token = token )
4339
44- elif engine == "zarr" :
40+ else :
4541
4642 if chunks == "auto" :
4743 try :
@@ -50,35 +46,64 @@ def dataset_from_backend_dataset(
5046 chunks = None
5147
5248 if chunks is None :
53- return ds
49+ return backend_ds
5450
5551 if isinstance (chunks , int ):
56- chunks = dict .fromkeys (ds .dims , chunks )
52+ chunks = dict .fromkeys (backend_ds .dims , chunks )
5753
5854 variables = {}
59- for k , v in ds .variables .items ():
55+ for k , v in backend_ds .variables .items ():
6056 var_chunks = _get_chunk (k , v , chunks )
6157 variables [k ] = _maybe_chunk (
6258 k ,
6359 v ,
6460 var_chunks ,
6561 overwrite_encoded_chunks = overwrite_encoded_chunks ,
6662 )
67- ds2 = ds ._replace (variables )
63+ ds = backend_ds ._replace (variables )
64+ return ds
65+
66+
67+ def _dataset_from_backend_dataset (
68+ backend_ds ,
69+ filename_or_obj ,
70+ engine ,
71+ chunks ,
72+ cache ,
73+ overwrite_encoded_chunks ,
74+ ** extra_tokens ,
75+ ):
76+ if not (isinstance (chunks , (int , dict )) or chunks is None ):
77+ if chunks != "auto" :
78+ raise ValueError (
79+ "chunks must be an int, dict, 'auto', or None. "
80+ "Instead found %s. " % chunks
81+ )
6882
83+ _protect_dataset_variables_inplace (backend_ds , cache )
84+ if chunks is None :
85+ ds = backend_ds
6986 else :
70- ds2 = ds
71- ds2 ._file_obj = ds ._file_obj
87+ ds = _chunk_ds (
88+ backend_ds ,
89+ filename_or_obj ,
90+ engine ,
91+ chunks ,
92+ overwrite_encoded_chunks ,
93+ ** extra_tokens ,
94+ )
95+
96+ ds ._file_obj = backend_ds ._file_obj
7297
7398 # Ensure source filename always stored in dataset object (GH issue #2550)
7499 if "source" not in ds .encoding :
75100 if isinstance (filename_or_obj , str ):
76- ds2 .encoding ["source" ] = filename_or_obj
101+ ds .encoding ["source" ] = filename_or_obj
77102
78- return ds2
103+ return ds
79104
80105
81- def resolve_decoders_kwargs (decode_cf , engine , ** decoders ):
106+ def _resolve_decoders_kwargs (decode_cf , engine , ** decoders ):
82107 signature = plugins .ENGINES [engine ]["signature" ]
83108 if decode_cf is False :
84109 for d in decoders :
@@ -225,7 +250,7 @@ def open_dataset(
225250 if engine is None :
226251 engine = _autodetect_engine (filename_or_obj )
227252
228- decoders = resolve_decoders_kwargs (
253+ decoders = _resolve_decoders_kwargs (
229254 decode_cf ,
230255 engine = engine ,
231256 mask_and_scale = mask_and_scale ,
@@ -249,7 +274,7 @@ def open_dataset(
249274 ** backend_kwargs ,
250275 ** {k : v for k , v in kwargs .items () if v is not None },
251276 )
252- ds = dataset_from_backend_dataset (
277+ ds = _dataset_from_backend_dataset (
253278 backend_ds ,
254279 filename_or_obj ,
255280 engine ,
0 commit comments