Skip to content

Commit 2899028

Browse files
feat: re-implement FileTreeStore with serialization adapter pattern
Updates FileTreeStore to work with the new SerializationAdapter pattern introduced in main branch PR #184: - Uses self._serialization_adapter.load_json() instead of ManagedEntry.from_json() - Uses self._serialization_adapter.dump_json() instead of .to_json() - Passes serialization_adapter parameter to BaseStore.__init__() - Maintains all original functionality (path traversal protection, etc.) - Includes comprehensive tests and documentation Co-authored-by: William Easton <strawgate@users.noreply.github.com>
1 parent ac651f4 commit 2899028

File tree

9 files changed

+1025
-0
lines changed

9 files changed

+1025
-0
lines changed

docs/api/stores.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,16 @@ Persistent disk-based key-value store using DiskCache.
2323
members:
2424
- __init__
2525

26+
## FileTree Store
27+
28+
Directory-based store for visual inspection and testing.
29+
30+
::: key_value.aio.stores.filetree.FileTreeStore
31+
options:
32+
show_source: false
33+
members:
34+
- __init__
35+
2636
## Redis Store
2737

2838
Redis-backed key-value store.

docs/stores.md

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ Local stores are stored in memory or on disk, local to the application.
3434
| Memory | N/A ||| Fast in-memory storage for development and caching |
3535
| Disk | Stable | ☑️ || Persistent file-based storage in a single file |
3636
| Disk (Per-Collection) | Stable | ☑️ || Persistent storage with separate files per collection |
37+
| FileTree (test) | Unstable | ☑️ || Directory-based storage with JSON files for visual inspection |
3738
| Null (test) | N/A ||| No-op store for testing without side effects |
3839
| RocksDB | Unstable | ☑️ || High-performance embedded database |
3940
| Simple (test) | N/A ||| Simple in-memory store for testing |
@@ -140,6 +141,74 @@ pip install py-key-value-aio[disk]
140141

141142
---
142143

144+
### FileTreeStore
145+
146+
Directory-based storage for visual inspection and debugging.
147+
148+
```python
149+
from key_value.aio.stores.filetree import FileTreeStore
150+
151+
store = FileTreeStore(directory="./debug-store")
152+
```
153+
154+
**Installation:**
155+
156+
```bash
157+
pip install py-key-value-aio
158+
```
159+
160+
**Use Cases:**
161+
162+
- Visual inspection of store contents
163+
- Debugging store behavior
164+
- Development and testing
165+
- Understanding data structure
166+
167+
**Characteristics:**
168+
169+
- Collections as directories
170+
- Keys as JSON files (`{key}.json`)
171+
- Human-readable filesystem layout
172+
- Easy to inspect and modify
173+
- **NOT for production use**
174+
175+
**Directory Structure:**
176+
177+
```text
178+
{base_directory}/
179+
{collection_1}/
180+
{key_1}.json
181+
{key_2}.json
182+
{collection_2}/
183+
{key_3}.json
184+
```
185+
186+
**Important Limitations:**
187+
188+
- Poor performance with many keys
189+
- No atomic operations
190+
- No automatic cleanup of expired entries
191+
- Filesystem path length constraints
192+
- Subject to filesystem limitations
193+
194+
**When to Use:**
195+
196+
Use FileTreeStore when you need to:
197+
198+
- Visually inspect what's being stored
199+
- Debug complex data structures
200+
- Understand how the store organizes data
201+
- Manually modify stored data for testing
202+
203+
**When NOT to Use:**
204+
205+
- Production environments
206+
- High-performance requirements
207+
- Large datasets
208+
- Concurrent access scenarios
209+
210+
---
211+
143212
### RocksDBStore
144213

145214
High-performance embedded database using RocksDB.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""File-tree based store for visual inspection and testing."""
2+
3+
from key_value.aio.stores.filetree.store import FileTreeStore
4+
5+
__all__ = ["FileTreeStore"]
Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
"""FileTreeStore implementation using native filesystem operations."""
2+
3+
from pathlib import Path
4+
5+
from key_value.shared.errors import DeserializationError
6+
from key_value.shared.utils.managed_entry import ManagedEntry
7+
from key_value.shared.utils.serialization import BasicSerializationAdapter, SerializationAdapter
8+
from typing_extensions import override
9+
10+
from key_value.aio.stores.base import (
11+
BaseDestroyCollectionStore,
12+
BaseDestroyStore,
13+
BaseEnumerateCollectionsStore,
14+
BaseEnumerateKeysStore,
15+
)
16+
17+
DEFAULT_PAGE_SIZE = 10000
18+
PAGE_LIMIT = 10000
19+
20+
21+
class FileTreeStore(BaseDestroyStore, BaseDestroyCollectionStore, BaseEnumerateCollectionsStore, BaseEnumerateKeysStore):
22+
"""A file-tree based store using directories for collections and files for keys.
23+
24+
This store uses the native filesystem:
25+
- Each collection is a subdirectory under the base directory
26+
- Each key is stored as a JSON file named "{key}.json"
27+
- File contents contain the ManagedEntry serialized to JSON
28+
29+
Directory structure:
30+
{base_directory}/
31+
{collection_1}/
32+
{key_1}.json
33+
{key_2}.json
34+
{collection_2}/
35+
{key_3}.json
36+
37+
Warning:
38+
This store is intended for development and testing purposes only.
39+
It is not suitable for production use due to:
40+
- Poor performance with many keys
41+
- No atomic operations
42+
- No built-in cleanup of expired entries
43+
- Filesystem limitations on file names and directory sizes
44+
45+
The store does NOT automatically clean up expired entries from disk. Expired entries
46+
are only filtered out when read via get() or similar methods.
47+
"""
48+
49+
_directory: Path
50+
51+
def __init__(
52+
self,
53+
*,
54+
directory: Path | str,
55+
serialization_adapter: SerializationAdapter | None = None,
56+
default_collection: str | None = None,
57+
) -> None:
58+
"""Initialize the file-tree store.
59+
60+
Args:
61+
directory: The base directory to use for storing collections and keys.
62+
serialization_adapter: The serialization adapter to use for the store.
63+
default_collection: The default collection to use if no collection is provided.
64+
"""
65+
self._directory = Path(directory).resolve()
66+
self._directory.mkdir(parents=True, exist_ok=True)
67+
68+
self._stable_api = False
69+
70+
super().__init__(
71+
serialization_adapter=serialization_adapter or BasicSerializationAdapter(),
72+
default_collection=default_collection,
73+
)
74+
75+
def _get_collection_path(self, collection: str) -> Path:
76+
"""Get the path to a collection directory.
77+
78+
Args:
79+
collection: The collection name.
80+
81+
Returns:
82+
The path to the collection directory.
83+
84+
Raises:
85+
ValueError: If the collection name would result in a path outside the base directory.
86+
"""
87+
collection_path = (self._directory / collection).resolve()
88+
89+
if not collection_path.is_relative_to(self._directory):
90+
msg = f"Invalid collection name: {collection!r} would escape base directory"
91+
raise ValueError(msg)
92+
93+
return collection_path
94+
95+
def _get_key_path(self, collection: str, key: str) -> Path:
96+
"""Get the path to a key file.
97+
98+
Args:
99+
collection: The collection name.
100+
key: The key name.
101+
102+
Returns:
103+
The path to the key file.
104+
105+
Raises:
106+
ValueError: If the collection or key name would result in a path outside the base directory.
107+
"""
108+
collection_path = self._get_collection_path(collection)
109+
key_path = (collection_path / f"{key}.json").resolve()
110+
111+
if not key_path.is_relative_to(self._directory):
112+
msg = f"Invalid key name: {key!r} would escape base directory"
113+
raise ValueError(msg)
114+
115+
return key_path
116+
117+
@override
118+
async def _setup_collection(self, *, collection: str) -> None:
119+
"""Set up a collection by creating its directory if it doesn't exist.
120+
121+
Args:
122+
collection: The collection name.
123+
"""
124+
collection_path = self._get_collection_path(collection)
125+
collection_path.mkdir(parents=True, exist_ok=True)
126+
127+
@override
128+
async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry | None:
129+
"""Retrieve a managed entry by key from the specified collection.
130+
131+
Args:
132+
collection: The collection name.
133+
key: The key name.
134+
135+
Returns:
136+
The managed entry if found and not expired, None otherwise.
137+
"""
138+
key_path = self._get_key_path(collection, key)
139+
140+
if not key_path.exists():
141+
return None
142+
143+
try:
144+
json_str = key_path.read_text(encoding="utf-8")
145+
return self._serialization_adapter.load_json(json_str=json_str)
146+
except (OSError, DeserializationError):
147+
# If we can't read or parse the file, treat it as not found
148+
return None
149+
150+
@override
151+
async def _put_managed_entry(
152+
self,
153+
*,
154+
key: str,
155+
collection: str,
156+
managed_entry: ManagedEntry,
157+
) -> None:
158+
"""Store a managed entry at the specified key in the collection.
159+
160+
Args:
161+
collection: The collection name.
162+
key: The key name.
163+
managed_entry: The managed entry to store.
164+
"""
165+
key_path = self._get_key_path(collection, key)
166+
167+
# Ensure the parent directory exists
168+
key_path.parent.mkdir(parents=True, exist_ok=True)
169+
170+
# Write the managed entry to the file
171+
json_str = self._serialization_adapter.dump_json(entry=managed_entry)
172+
key_path.write_text(json_str, encoding="utf-8")
173+
174+
@override
175+
async def _delete_managed_entry(self, *, key: str, collection: str) -> bool:
176+
"""Delete a managed entry from the specified collection.
177+
178+
Args:
179+
collection: The collection name.
180+
key: The key name.
181+
182+
Returns:
183+
True if the entry was deleted, False if it didn't exist.
184+
"""
185+
key_path = self._get_key_path(collection, key)
186+
187+
if not key_path.exists():
188+
return False
189+
190+
try:
191+
key_path.unlink()
192+
except OSError:
193+
return False
194+
else:
195+
return True
196+
197+
@override
198+
async def _get_collection_keys(self, *, collection: str, limit: int | None = None) -> list[str]:
199+
"""List all keys in the specified collection.
200+
201+
Args:
202+
collection: The collection name.
203+
limit: Maximum number of keys to return.
204+
205+
Returns:
206+
A list of key names (without the .json extension).
207+
"""
208+
limit = min(limit or DEFAULT_PAGE_SIZE, PAGE_LIMIT)
209+
collection_path = self._get_collection_path(collection)
210+
211+
if not collection_path.exists():
212+
return []
213+
214+
keys: list[str] = []
215+
for file_path in collection_path.iterdir():
216+
if file_path.is_file() and file_path.suffix == ".json":
217+
keys.append(file_path.stem)
218+
if len(keys) >= limit:
219+
break
220+
221+
return keys
222+
223+
@override
224+
async def _get_collection_names(self, *, limit: int | None = None) -> list[str]:
225+
"""List all collection names.
226+
227+
Args:
228+
limit: Maximum number of collections to return.
229+
230+
Returns:
231+
A list of collection names.
232+
"""
233+
limit = min(limit or DEFAULT_PAGE_SIZE, PAGE_LIMIT)
234+
235+
collections: list[str] = []
236+
for dir_path in self._directory.iterdir():
237+
if dir_path.is_dir():
238+
collections.append(dir_path.name)
239+
if len(collections) >= limit:
240+
break
241+
242+
return collections
243+
244+
@override
245+
async def _delete_collection(self, *, collection: str) -> bool:
246+
"""Delete an entire collection and all its keys.
247+
248+
Args:
249+
collection: The collection name.
250+
251+
Returns:
252+
True if the collection was deleted, False if it didn't exist or an error occurred.
253+
"""
254+
collection_path = self._get_collection_path(collection)
255+
256+
if not collection_path.exists():
257+
return False
258+
259+
try:
260+
# Delete all files in the collection
261+
for file_path in collection_path.iterdir():
262+
if file_path.is_file():
263+
file_path.unlink()
264+
265+
# Delete the collection directory
266+
collection_path.rmdir()
267+
except OSError:
268+
return False
269+
else:
270+
return True
271+
272+
@override
273+
async def _delete_store(self) -> bool:
274+
"""Delete the entire store and all its collections.
275+
276+
Returns:
277+
True if the store was deleted successfully.
278+
"""
279+
try:
280+
# Delete all collections
281+
for collection_path in self._directory.iterdir():
282+
if collection_path.is_dir():
283+
# Delete all files in the collection
284+
for file_path in collection_path.iterdir():
285+
if file_path.is_file():
286+
file_path.unlink()
287+
# Delete the collection directory
288+
collection_path.rmdir()
289+
except OSError:
290+
return False
291+
else:
292+
return True

0 commit comments

Comments
 (0)