Skip to content

Commit d45f49c

Browse files
authored
Merge pull request #88 from neuro-ml/dev
Support for key labels in cache to disk
2 parents f320e68 + 1e6e0b5 commit d45f49c

26 files changed

+80
-60
lines changed

connectome/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '0.6.0'
1+
__version__ = '0.6.1'

connectome/cache/disk.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
import logging
22

33
from tarn import PickleKeyStorage
4+
from tarn.interface import MaybeLabels
45

56
from .base import Cache
67

78
logger = logging.getLogger(__name__)
89

910

1011
class DiskCache(Cache):
11-
def __init__(self, pool: PickleKeyStorage):
12+
def __init__(self, pool: PickleKeyStorage, labels: MaybeLabels = None):
1213
super().__init__()
1314
self.cache = pool
15+
self.labels = labels
1416

1517
def prepare(self, param):
1618
raw = param.value
@@ -21,4 +23,4 @@ def get(self, key, context):
2123
return self.cache.read(context, error=False)
2224

2325
def set(self, key, value, context):
24-
self.cache.write(context, value, error=False)
26+
self.cache.write(context, value, error=False, labels=self.labels)

connectome/layers/cache.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import numpy as np
77
from tarn import DiskDict, HashKeyStorage, PickleKeyStorage
88
from tarn.config import StorageConfig, init_storage
9+
from tarn.interface import MaybeLabels
910

1011
from ..cache import Cache, DiskCache, MemoryCache
1112
from ..containers import EdgesBag, IdentityContext
@@ -114,16 +115,17 @@ class CacheToDisk(CacheToStorage):
114115
"""
115116

116117
def __init__(self, index: PathLikes, storage: HashKeyStorage, serializer: SerializersLike, names: StringsLike, *,
117-
impure: bool = False):
118+
impure: bool = False, labels: MaybeLabels = None):
118119
super().__init__(names=names, impure=impure)
119120
names, serializer = _normalize_disk_arguments(names, serializer)
120-
self.storage = DiskCache(PickleKeyStorage(index, storage, serializer))
121+
self.storage = DiskCache(PickleKeyStorage(index, storage, serializer), labels=labels)
121122

122123
def _get_storage(self) -> Cache:
123124
return self.storage
124125

125126
@classmethod
126-
def simple(cls, *names, root: PathLike, serializer: Union[Serializer, Sequence[Serializer]] = None):
127+
def simple(cls, *names, root: PathLike, serializer: Union[Serializer, Sequence[Serializer]] = None,
128+
labels: MaybeLabels = None):
127129
"""
128130
A simple version of caching to disk with adequate default settings.
129131
@@ -158,7 +160,7 @@ def simple(cls, *names, root: PathLike, serializer: Union[Serializer, Sequence[S
158160
PickleSerializer(),
159161
)
160162

161-
return cls(index, HashKeyStorage(DiskDict(storage)), serializer, names)
163+
return cls(index, HashKeyStorage(DiskDict(storage)), serializer, names, labels=labels)
162164

163165

164166
def _normalize_disk_arguments(names, serializer):

connectome/layers/columns.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import Any, Generator, Union
44

55
from tarn import HashKeyStorage, PickleKeyStorage
6+
from tarn.interface import MaybeLabels
67
from tqdm.auto import tqdm
78

89
from ..cache import DiskCache, MemoryCache
@@ -41,7 +42,7 @@ class CacheColumns(DynamicConnectLayer, CacheLayer):
4142
"""
4243

4344
def __init__(self, index: PathLikes, storage: HashKeyStorage, serializer: SerializersLike, names: StringsLike, *,
44-
verbose: bool = False, shard_size: Union[int, float, None] = None):
45+
verbose: bool = False, shard_size: Union[int, float, None] = None, labels: MaybeLabels = None):
4546
if shard_size == 1:
4647
raise ValueError(f'Shard size of 1 is ambiguous. Use None if you want to have a single shard')
4748
names, serializer = _normalize_disk_arguments(names, serializer)
@@ -50,7 +51,7 @@ def __init__(self, index: PathLikes, storage: HashKeyStorage, serializer: Serial
5051
self.names = names
5152
self.shard_size = shard_size
5253
self.verbose = verbose
53-
self.disk = DiskCache(PickleKeyStorage(index, storage, serializer))
54+
self.disk = DiskCache(PickleKeyStorage(index, storage, serializer), labels=labels)
5455
self.ram = MemoryCache(None)
5556

5657
def _prepare_container(self, previous: EdgesBag) -> EdgesBag:

connectome/layers/debug.py

+26-11
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
import hashlib
2-
from typing import Any, Generator
2+
from typing import Any, Generator, Type, Union
33

4-
from tarn.cache.storage import key_to_digest
4+
from tarn.compat import HashAlgorithm
5+
from tarn.pickler import dumps
56

67
from ..containers import EdgesBag
7-
from ..engine import Command, Details, Node, NodeHash, NodeHashes, Request, Response, StaticGraph, StaticHash
8+
from ..engine import (
9+
Command, Details, Node, NodeHash, NodeHashes, Request, Response, StaticGraph, StaticHash, CustomHash, LeafHash
10+
)
811
from ..utils import StringsLike
912
from .base import CallableLayer
1013
from .cache import to_seq
1114

1215

1316
class HashDigest(CallableLayer):
14-
def __init__(self, names: StringsLike, algorithm):
17+
def __init__(self, names: StringsLike, algorithm: Union[Type[HashAlgorithm], str, None] = None,
18+
return_value: bool = False):
1519
if isinstance(algorithm, str):
1620
algorithm = getattr(hashlib, algorithm)
1721

@@ -22,7 +26,7 @@ def __init__(self, names: StringsLike, algorithm):
2226
inp, out = Node(name, details), Node(name, details)
2327
inputs.append(inp)
2428
outputs.append(out)
25-
edges.append(HashDigestEdge(algorithm).bind(inp, out))
29+
edges.append(HashDigestEdge(algorithm, return_value).bind(inp, out))
2630

2731
super().__init__(EdgesBag(
2832
inputs, outputs, edges,
@@ -31,16 +35,27 @@ def __init__(self, names: StringsLike, algorithm):
3135

3236

3337
class HashDigestEdge(StaticGraph, StaticHash):
34-
def __init__(self, algorithm):
38+
def __init__(self, algorithm, return_value):
3539
super().__init__(arity=1)
3640
self.algorithm = algorithm
41+
self.return_value = return_value
3742

3843
def _make_hash(self, inputs: NodeHashes) -> NodeHash:
39-
return inputs[0]
44+
return CustomHash('connectome.HashDigest', LeafHash(self.algorithm), LeafHash(self.return_value), *inputs)
4045

4146
def evaluate(self) -> Generator[Request, Response, Any]:
42-
value = yield Command.ParentValue, 0
43-
output = yield Command.CurrentHash,
47+
result = []
48+
if self.return_value:
49+
value = yield Command.ParentValue, 0
50+
result.append(value)
4451

45-
pickled, digest = key_to_digest(self.algorithm, output.value)
46-
return value, output.value, digest, pickled
52+
node_hash = yield Command.CurrentHash,
53+
node_hash = node_hash.value
54+
result.append(node_hash)
55+
56+
pickled = dumps(node_hash)
57+
result.append(pickled)
58+
if self.algorithm is not None:
59+
result.append(self.algorithm(pickled).digest())
60+
61+
return tuple(result)

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
numpy
22
tqdm
33
pylru
4-
tarn>=0.1.0,<1.0.0
4+
tarn>=0.5.0,<1.0.0
55
jboc<1.0.0

tests/disk_fixtures.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
from typing import Iterator
66

77
import pytest
8-
98
from tarn import HashKeyStorage
10-
from tarn.config import init_storage, StorageConfig
9+
from tarn.config import StorageConfig, init_storage
1110

1211
from connectome import CacheToDisk
1312

tests/graph_fixtures.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pytest
22

33
from connectome.containers import ReversibleContainer
4-
from connectome.engine import Node, BoundEdge, FunctionEdge
4+
from connectome.engine import BoundEdge, FunctionEdge, Node
55
from connectome.utils import extract_signature
66

77

tests/interface_fixtures.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import re
2+
23
import pytest
34

4-
from connectome import Source, Transform, inverse, optional, positional, meta, Function
5+
from connectome import Function, Source, Transform, inverse, meta, optional, positional
56
from connectome.engine.compiler import identity
67

78

tests/test_cache/test_cache.py

+3-8
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,14 @@
66
from threading import Thread
77

88
import pytest
9+
from tarn.config import StorageConfig, init_storage
10+
from utils import Counter
911

10-
from tarn import DiskDict
11-
from tarn.config import init_storage, StorageConfig
12-
13-
from connectome import CacheToRam, Apply, CacheToDisk, CacheColumns, Transform, optional
12+
from connectome import Apply, CacheColumns, CacheToDisk, CacheToRam, Transform, optional
1413
from connectome.engine.edges import CacheEdge
1514
from connectome.interface.nodes import Silent
1615
from connectome.serializers import JsonSerializer
1716

18-
from utils import Counter
19-
2017

2118
def sleeper(s):
2219
def f(x):
@@ -142,8 +139,6 @@ def visit(storage, root):
142139
with tempfile.TemporaryDirectory() as temp, storage_factory(locker=locker) as temp_storage:
143140
temp = Path(temp) / 'cache'
144141
init_storage(StorageConfig(hash='blake2b', levels=[1, 63], locker=locker), temp)
145-
# TODO: remove after tarn is updated
146-
DiskDict(temp)
147142

148143
th = Process(target=visit, args=(temp_storage, temp))
149144
th.start()

tests/test_cache/test_disk_cache.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import os
22

33
import pytest
4+
from tarn.cache.pickler import LATEST_VERSION
45

5-
from connectome import Transform, CacheToDisk
6+
from connectome import CacheToDisk, Transform
67
from connectome.exceptions import StorageCorruption
78
from connectome.serializers import JsonSerializer
8-
from tarn.cache.pickler import LATEST_VERSION
99

1010

1111
def setup_cache(temp_dir):

tests/test_cache/test_hash.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
from pathlib import Path
22
from tempfile import TemporaryDirectory
33

4-
from tarn.config import init_storage, StorageConfig
4+
from tarn.config import StorageConfig, init_storage
55

6-
from connectome import Chain, CacheToRam, CacheToDisk, CacheColumns, HashDigest
6+
from connectome import CacheColumns, CacheToDisk, CacheToRam, Chain, HashDigest
77
from connectome.serializers import JsonSerializer
88

99

1010
def test_hash(block_maker, storage_factory):
11-
hash_layer = HashDigest(['image'], 'blake2b')
11+
hash_layer = HashDigest(['image'], 'blake2b', return_value=True)
1212
pipeline = Chain(
1313
block_maker.first_ds(first_constant=2, ids_arg=15),
1414
block_maker.crop(),

tests/test_cache/test_layer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from connectome import Transform, CacheToRam
1+
from connectome import CacheToRam, Transform
22

33

44
def test_nested_virtual():

tests/test_container.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pytest
44

55
from connectome import Transform
6-
from connectome.exceptions import GraphError, DependencyError
6+
from connectome.exceptions import DependencyError, GraphError
77

88

99
def test_normalization():

tests/test_interface/test_factory.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44
import pytest
55

6-
from connectome import Transform, Function, Source, meta
6+
from connectome import Function, Source, Transform, meta
77
from connectome.exceptions import FieldError, GraphError
88

99

tests/test_interface/test_interface.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
import pytest
44

5-
from connectome import positional, Source, Chain, Transform, Merge, meta, Output
6-
from connectome.interface.metaclasses import TransformBase, SourceBase
5+
from connectome import Chain, Merge, Output, Source, Transform, meta, positional
6+
from connectome.interface.metaclasses import SourceBase, TransformBase
77

88

99
def test_single_with_params():

tests/test_interface/test_inverse.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pytest
22

3-
from connectome import Transform, Apply, inverse
3+
from connectome import Apply, Transform, inverse
44
from connectome.engine.compiler import identity
55
from connectome.exceptions import FieldError
66

tests/test_interface/test_metaclasses.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pytest
22

3-
from connectome import Source, Transform, Mixin
3+
from connectome import Mixin, Source, Transform
44

55

66
def test_subclasses():

tests/test_interface/test_mixins.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pytest
22

3-
from connectome import Mixin, Source, meta, Transform, Output, inverse
3+
from connectome import Mixin, Output, Source, Transform, inverse, meta
44

55

66
class SizeMixin(Mixin):

tests/test_interface/test_prepared.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pytest
22

3-
from connectome import Source, meta, impure, Transform, HashDigest, CacheToRam, Merge
3+
from connectome import CacheToRam, HashDigest, Merge, Source, Transform, impure, meta
44
from connectome.interface.complex_edges import hash_by_value
55

66

@@ -23,8 +23,8 @@ class Stripped(Source):
2323

2424

2525
def test_hash():
26-
one = Computable(length=4) >> HashDigest(['field'], 'blake2b')
27-
two = Stripped() >> HashDigest(['field'], 'blake2b')
26+
one = Computable(length=4) >> HashDigest(['field'], 'blake2b', return_value=True)
27+
two = Stripped() >> HashDigest(['field'], 'blake2b', return_value=True)
2828
assert one.field('12345678')[0] == 'received 1234'
2929
assert one.field('12345678') == one.field('1234----') == two.field('1234')
3030

@@ -35,7 +35,7 @@ class A(Transform):
3535
def x(a, b, c):
3636
return a + b + c
3737

38-
ds = A() >> HashDigest(['x'], 'blake2b')
38+
ds = A() >> HashDigest(['x'], 'blake2b', return_value=True)
3939
assert ds.x(1, 2, 3) == ds.x(3, 2, 1)
4040

4141

tests/test_interface/test_validation.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pytest
2-
from connectome import Source, Filter
2+
3+
from connectome import Filter, Source
34

45

56
def test_default_args():

tests/test_layers/test_chain.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
import pytest
44
from tarn.config import StorageConfig, init_storage
5-
from connectome import Source, Transform, Chain, CacheToRam, meta, LazyChain, HashDigest, CacheColumns, Merge, optional
5+
6+
from connectome import CacheColumns, CacheToRam, Chain, HashDigest, LazyChain, Merge, Source, Transform, meta, optional
67
from connectome.exceptions import DependencyError, FieldError
78

89

@@ -33,7 +34,7 @@ def test_chain():
3334
def test_nested(block_maker):
3435
one = block_maker.first_ds(first_constant=2, ids_arg=15)
3536
two = block_maker.crop()
36-
hash_layer = HashDigest('image', 'blake2b')
37+
hash_layer = HashDigest('image', 'blake2b', return_value=True)
3738

3839
base, *variants = [
3940
Chain(one, two, hash_layer),

tests/test_layers/test_filter.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pytest
2-
from connectome import Chain, Filter, Source, meta, impure
2+
3+
from connectome import Chain, Filter, Source, impure, meta
34
from connectome.engine.base import HashError
45
from connectome.exceptions import DependencyError
56
from connectome.interface.blocks import HashDigest
@@ -18,7 +19,7 @@ def test_filter(block_maker):
1819
ids = pipeline.ids
1920
assert ids == ('4', '14')
2021

21-
hash_layer = HashDigest(['image', 'lungs', 'spacing'], 'blake2b')
22+
hash_layer = HashDigest(['image', 'lungs', 'spacing'], 'blake2b', return_value=True)
2223
hashed = Chain(block, hash_layer)
2324
pipeline = Chain(
2425
block, Filter(lambda image: image.endswith('4')), hash_layer,

0 commit comments

Comments
 (0)