Skip to content

Commit b11a265

Browse files
tjhunterTim Hunter
andauthored
[110] Delayed initialization of the default store (#113)
* merge * issue * bug fix * changes * changes to doc Co-authored-by: Tim Hunter <tjhunter@cs.stanford.edu>
1 parent 83878e5 commit b11a265

File tree

15 files changed

+93
-51
lines changed

15 files changed

+93
-51
lines changed

dds/_api.py

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import logging
77
import pathlib
88
import time
9+
import tempfile
910
from collections import OrderedDict
1011
from typing import TypeVar, Tuple, Callable, Dict, Any, Optional, Union, Set, List
1112

@@ -36,7 +37,7 @@
3637

3738

3839
# TODO: set up in the use temporary space
39-
_store: Store = LocalFileStore("/tmp/dds/internal/", "/tmp/dds/data/")
40+
_store_var: Optional[Store] = None
4041
_eval_ctx: Optional[EvalContext] = None
4142

4243

@@ -64,11 +65,11 @@ def eval(
6465

6566
def load(path: Union[str, DDSPath, pathlib.Path]) -> Any:
6667
path_ = DDSPathUtils.create(path)
67-
key = _store.fetch_paths([path_]).get(path_)
68+
key = _store().fetch_paths([path_]).get(path_)
6869
if key is None:
69-
raise DDSException(f"The store {_store} did not return path {path_}")
70+
raise DDSException(f"The store {_store()} did not return path {path_}")
7071
else:
71-
return _store.fetch_blob(key)
72+
return _store().fetch_blob(key)
7273

7374

7475
def set_store(
@@ -84,21 +85,23 @@ def set_store(
8485
8586
store: either a store, or 'local' or 'dbfs'
8687
"""
87-
global _store
88+
global _store_var
8889
if isinstance(store, Store):
8990
if cache_objects is not None:
9091
raise DDSException(
9192
f"Cannot provide a caching option and a store object of type 'Store' at the same time"
9293
)
9394
# Directly setting the store
94-
_store = store
95+
_store_var = store
9596
return
9697
elif store == "local":
9798
if not internal_dir:
98-
internal_dir = "/tmp"
99+
internal_dir = str(
100+
pathlib.Path(tempfile.gettempdir()).joinpath("dds", "store")
101+
)
99102
if not data_dir:
100-
data_dir = "/tmp/data"
101-
_store = LocalFileStore(internal_dir, data_dir)
103+
data_dir = str(pathlib.Path(tempfile.gettempdir()).joinpath("dds", "data"))
104+
_store_var = LocalFileStore(internal_dir, data_dir)
102105
elif store == "dbfs":
103106
if data_dir is None:
104107
raise DDSException("Missing data_dir argument")
@@ -115,7 +118,7 @@ def set_store(
115118
commit_type = str(commit_type or CommitType.FULL.name).upper()
116119
commit_type_ = CommitType[commit_type]
117120

118-
_store = DBFSStore(
121+
_store_var = DBFSStore(
119122
DBFSURI.parse(internal_dir), DBFSURI.parse(data_dir), dbutils, commit_type_
120123
)
121124
else:
@@ -136,8 +139,8 @@ def set_store(
136139
elif cache_objects > 0:
137140
num_objects = cache_objects
138141
if num_objects is not None:
139-
_store = LRUCacheStore(_store, num_elem=num_objects)
140-
_logger.debug(f"Setting the store to {_store}")
142+
_store_var = LRUCacheStore(_store(), num_elem=num_objects)
143+
_logger.debug(f"Setting the store to {_store()}")
141144

142145

143146
def _parse_stages(
@@ -196,9 +199,9 @@ def _eval(
196199
)
197200
key = None if path is None else _eval_ctx.requested_paths[path]
198201
t = _time()
199-
if key is not None and _store.has_blob(key):
202+
if key is not None and _store().has_blob(key):
200203
_logger.debug(f"_eval:Return cached {path} from {key}")
201-
blob = _store.fetch_blob(key)
204+
blob = _store().fetch_blob(key)
202205
_add_delta(t, ProcessingStage.STORE_COMMIT)
203206
return blob
204207
else:
@@ -217,11 +220,27 @@ def _eval(
217220
if key is not None:
218221
_logger.info(f"_eval:Storing blob into key {key}")
219222
t = _time()
220-
_store.store_blob(key, res, codec=None)
223+
_store().store_blob(key, res, codec=None)
221224
_add_delta(t, ProcessingStage.STORE_COMMIT)
222225
return res
223226

224227

228+
def _store() -> Store:
229+
"""
230+
Gets the current store (or initializes it to the local default store if necessary)
231+
"""
232+
global _store_var
233+
if _store_var is None:
234+
p = pathlib.Path(tempfile.gettempdir()).joinpath("dds")
235+
store_path = p.joinpath("store")
236+
data_path = p.joinpath("data")
237+
_logger.info(
238+
f"Initializing default store. store dir: {store_path} data dir: {data_path}"
239+
)
240+
_store_var = LocalFileStore(str(store_path), str(data_path))
241+
return _store_var
242+
243+
225244
def _time() -> float:
226245
return time.monotonic()
227246

@@ -272,7 +291,7 @@ def _eval_new_ctx(
272291
_logger.debug(
273292
f"_eval_new_ctx: need to resolve indirect references: {loads_to_check}"
274293
)
275-
resolved_indirect_refs = _store.fetch_paths(loads_to_check)
294+
resolved_indirect_refs = _store().fetch_paths(loads_to_check)
276295
_logger.debug(
277296
f"_eval_new_ctx: fetched indirect references: {resolved_indirect_refs}"
278297
)
@@ -296,7 +315,7 @@ def _eval_new_ctx(
296315
present_blobs: Optional[Set[PyHash]]
297316
if extra_debug:
298317
present_blobs = set(
299-
[key for key in set(store_paths.values()) if _store.has_blob(key)]
318+
[key for key in set(store_paths.values()) if _store().has_blob(key)]
300319
)
301320
_logger.debug(f"_eval_new_ctx: {len(present_blobs)} present blobs")
302321
else:
@@ -327,9 +346,9 @@ def _eval_new_ctx(
327346
current_sig = inters.fun_return_sig
328347
_logger.debug(f"_eval_new_ctx:current_sig: {current_sig}")
329348
t = _time()
330-
if _store.has_blob(current_sig):
349+
if _store().has_blob(current_sig):
331350
_logger.debug(f"_eval_new_ctx:Return cached signature {current_sig}")
332-
res = _store.fetch_blob(current_sig)
351+
res = _store().fetch_blob(current_sig)
333352
_add_delta(t, ProcessingStage.STORE_COMMIT)
334353
else:
335354
arg_repr = [str(type(arg)) for arg in args]
@@ -349,13 +368,13 @@ def _eval_new_ctx(
349368
# TODO: add a phase for storing the blobs
350369
_logger.info(f"_eval:Storing blob into key {obj_key}")
351370
t = _time()
352-
_store.store_blob(obj_key, res, codec=None)
371+
_store().store_blob(obj_key, res, codec=None)
353372
_add_delta(t, ProcessingStage.STORE_COMMIT)
354373

355374
if ProcessingStage.PATH_COMMIT in stages:
356375
_logger.debug(f"Starting stage {ProcessingStage.PATH_COMMIT}")
357376
t = _time()
358-
_store.sync_paths(store_paths)
377+
_store().sync_paths(store_paths)
359378
_add_delta(t, ProcessingStage.PATH_COMMIT)
360379
_logger.debug(f"Stage {ProcessingStage.PATH_COMMIT} done")
361380
else:

dds/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version = "0.7.1"
1+
version = "0.7.2"

dds/store.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ def __init__(self, internal_dir: str, data_dir: str, create_dirs: bool = True):
8686
if not os.path.exists(p_blobs):
8787
os.makedirs(p_blobs)
8888

89+
def __repr__(self):
90+
return f"LocalFileStore(internal_dir={self._root} data_dir={self._data_root})"
91+
8992
def fetch_blob(self, key: PyHash) -> Any:
9093
p = os.path.join(self._root, "blobs", key)
9194
meta_p = os.path.join(self._root, "blobs", key + ".meta")

doc_source/changelog.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
# Changelog
22

3+
## v0.7.2
4+
5+
Small usability fixes in this release:
6+
7+
* delaying the creation of a default store (and all its side effects) to better support highly concurrent environments
8+
* fix to the type signature of `dds.keep` and `dds.eval`
9+
* improves debugging messages (with a potential extra round trip to the store)
10+
311
## v0.7.0
412

513
Adds a major feature: caching in memory of most recently used objects. See the documentation of

doc_source/tut_custom_types.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@
196196
"metadata": {},
197197
"outputs": [],
198198
"source": [
199-
"dds._api._store.codec_registry().add_file_codec(PilFileCodec())"
199+
"dds._api._store().codec_registry().add_file_codec(PilFileCodec())"
200200
]
201201
},
202202
{

docs/changelog/changelog.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
# Changelog
22

3+
## v0.7.2
4+
5+
Small usability fixes in this release:
6+
7+
* delaying the creation of a default store (and all its side effects) to better support highly concurrent environments
8+
* fix to the type signature of `dds.keep` and `dds.eval`
9+
* improves debugging messages (with a potential extra round trip to the store)
10+
311
## v0.7.0
412

513
Adds a major feature: caching in memory of most recently used objects. See the documentation of

docs/changelog/index.html

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@
8787
<ul class="current">
8888
<li class="toctree-l1 current"><a class="reference internal current" href="./">Changelog</a>
8989
<ul class="current">
90+
<li class="toctree-l2"><a class="reference internal" href="#v072">v0.7.2</a>
91+
</li>
9092
<li class="toctree-l2"><a class="reference internal" href="#v070">v0.7.0</a>
9193
</li>
9294
<li class="toctree-l2"><a class="reference internal" href="#v060">v0.6.0</a>
@@ -127,6 +129,13 @@
127129
<div class="section">
128130

129131
<h1 id="changelog">Changelog</h1>
132+
<h2 id="v072">v0.7.2</h2>
133+
<p>Small usability fixes in this release:</p>
134+
<ul>
135+
<li>delaying the creation of a default store (and all its side effects) to better support highly concurrent environments</li>
136+
<li>fix to the type signature of <code>dds.keep</code> and <code>dds.eval</code></li>
137+
<li>improves debugging messages (with a potential extra round trip to the store)</li>
138+
</ul>
130139
<h2 id="v070">v0.7.0</h2>
131140
<p>Adds a major feature: caching in memory of most recently used objects. See the documentation of
132141
<code>dds.set_store</code>.</p>

docs/dds-reference/index.html

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ <h2 class="doc doc-heading" id="dds.__init__.eval">
301301
</tr>
302302
<tr>
303303
<td><code>args</code></td>
304-
<td><code>Tuple[Any, ...]</code></td>
304+
<td><code>Any</code></td>
305305
<td>
306306
<p>the optional arguments for this function.</p>
307307
<p>NOTE: keyworded arguments are not supported yet.</p>
@@ -346,11 +346,11 @@ <h2 class="doc doc-heading" id="dds.__init__.eval">
346346
<div class="highlight">
347347
<pre><span></span><code><span class="k">def</span> <span class="nf">eval</span><span class="p">(</span>
348348
<span class="n">fun</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">_Out</span><span class="p">],</span>
349-
<span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">Any</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span>
349+
<span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span>
350350
<span class="n">dds_export_graph</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span><span class="p">,</span> <span class="kc">None</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
351351
<span class="n">dds_extra_debug</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
352352
<span class="n">dds_stages</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">ProcessingStage</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
353-
<span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span>
353+
<span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span>
354354
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="n">_Out</span><span class="p">]:</span>
355355
<span class="sd">"""</span>
356356
<span class="sd"> Evaluates a function. The result of the function is not stored in the data store, but the function itself may</span>
@@ -450,15 +450,15 @@ <h2 class="doc doc-heading" id="dds.__init__.keep">
450450
</tr>
451451
<tr>
452452
<td><code>args</code></td>
453-
<td><code>Tuple[Any, ...]</code></td>
453+
<td><code>Any</code></td>
454454
<td>
455455
<p>the arguments of this function</p>
456456
</td>
457457
<td><code>()</code></td>
458458
</tr>
459459
<tr>
460460
<td><code>kwargs</code></td>
461-
<td><code>Dict[str, Any]</code></td>
461+
<td><code>Any</code></td>
462462
<td>
463463
<p><em>(keyworded arguments are currently unsupported)</em></p>
464464
</td>
@@ -518,8 +518,8 @@ <h3 id="using-complex-arguments">Using complex arguments</h3>
518518
<pre><span></span><code><span class="k">def</span> <span class="nf">keep</span><span class="p">(</span>
519519
<span class="n">path</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">DDSPath</span><span class="p">,</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span><span class="p">],</span>
520520
<span class="n">fun</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">_Out</span><span class="p">],</span>
521-
<span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">Any</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span>
522-
<span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span>
521+
<span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span>
522+
<span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span>
523523
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">_Out</span><span class="p">:</span>
524524
<span class="sd">"""</span>
525525
<span class="sd"> Stores the result of calling a function to a specific path. If this particular evaluation has not happened before,</span>

docs/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,5 +240,5 @@ <h2 id="license">License</h2>
240240

241241
<!--
242242
MkDocs version : 1.1.2
243-
Build Date UTC : 2021-02-07 14:27:00.236152+00:00
243+
Build Date UTC : 2021-02-24 18:37:47.655064+00:00
244244
-->

docs/search/search_index.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)