1818
1919
2020class DuckDBSerializationAdapter (SerializationAdapter ):
21- """Adapter for DuckDB with support for native JSON and TEXT storage modes ."""
21+ """Adapter for DuckDB with native JSON storage."""
2222
23- _native_storage : bool
24-
25- def __init__ (self , * , native_storage : bool = True ) -> None :
26- """Initialize the DuckDB adapter.
27-
28- Args:
29- native_storage: If True, use JSON column for native dict storage.
30- If False, use TEXT column for stringified JSON.
31- """
23+ def __init__ (self ) -> None :
24+ """Initialize the DuckDB adapter."""
3225 super ().__init__ ()
3326
34- self ._native_storage = native_storage
3527 self ._date_format = "datetime"
36- # Always use string format - DuckDB needs JSON strings for both TEXT and JSON columns
28+ # Use string format - DuckDB needs JSON strings for JSON columns
3729 self ._value_format = "string"
3830
3931 @override
4032 def prepare_dump (self , data : dict [str , Any ]) -> dict [str , Any ]:
4133 """Prepare data for dumping to DuckDB.
4234
43- Moves the value to the appropriate column (value_dict or value_json)
44- and sets the other column to None. Also includes version, key, and collection
45- fields in the JSON for compatibility with deserialization.
35+ Stores the value in the value_dict JSON column and includes version, key,
36+ and collection fields in the JSON for compatibility with deserialization.
4637 """
4738 value = data .pop ("value" )
4839
@@ -51,7 +42,7 @@ def prepare_dump(self, data: dict[str, Any]) -> dict[str, Any]:
5142 key = data .pop ("key" , None )
5243 collection_name = data .pop ("collection" , None )
5344
54- # Build the document to store in JSON columns
45+ # Build the document to store in JSON column
5546 json_document : dict [str , Any ] = {"value" : value }
5647
5748 if version is not None :
@@ -61,32 +52,23 @@ def prepare_dump(self, data: dict[str, Any]) -> dict[str, Any]:
6152 if collection_name is not None :
6253 json_document ["collection" ] = collection_name
6354
64- # Set both columns to None, then populate the appropriate one
65- data ["value_json" ] = None
66- data ["value_dict" ] = None
67-
68- if self ._native_storage :
69- # For native storage, convert the document to JSON string for DuckDB's JSON column
70- # DuckDB will parse it and store it as native JSON
71- data ["value_dict" ] = json .dumps (json_document )
72- else :
73- # For TEXT storage, store as JSON string
74- data ["value_json" ] = json .dumps (json_document )
55+ # Store as JSON string for DuckDB's JSON column
56+ # DuckDB will parse it and store it as native JSON
57+ data ["value_dict" ] = json .dumps (json_document )
7558
7659 return data
7760
7861 @override
7962 def prepare_load (self , data : dict [str , Any ]) -> dict [str , Any ]:
8063 """Prepare data loaded from DuckDB for conversion to ManagedEntry.
8164
82- Extracts value, version, key, and collection from the JSON columns
65+ Extracts value, version, key, and collection from the JSON column
8366 and handles timezone conversion for DuckDB's naive timestamps.
8467 """
85- value_json = data .pop ("value_json" , None )
8668 value_dict = data .pop ("value_dict" , None )
8769
88- # Parse the JSON document from the appropriate column
89- json_document = self ._parse_json_column (value_dict , value_json )
70+ # Parse the JSON document from the value_dict column
71+ json_document = self ._parse_json_column (value_dict )
9072
9173 # Extract fields from the JSON document
9274 data ["value" ] = json_document .get ("value" )
@@ -102,27 +84,20 @@ def prepare_load(self, data: dict[str, Any]) -> dict[str, Any]:
10284
10385 return data
10486
105- def _parse_json_column (self , value_dict : Any , value_json : Any ) -> dict [str , Any ]: # noqa: ANN401
106- """Parse JSON from value_dict or value_json column."""
107- if value_dict is not None :
108- # Native storage mode - value_dict can be dict or string (DuckDB JSON returns as string)
109- if isinstance (value_dict , dict ):
110- return cast (dict [str , Any ], value_dict )
111- if isinstance (value_dict , str ):
112- parsed : dict [str , Any ] = json .loads (value_dict )
113- return parsed
114- msg = f"value_dict has unexpected type: { type (value_dict )} "
87+ def _parse_json_column (self , value_dict : Any ) -> dict [str , Any ]:
88+ """Parse JSON from value_dict column."""
89+ if value_dict is None :
90+ msg = "value_dict column contains no data"
11591 raise DeserializationError (message = msg )
11692
117- if value_json is not None :
118- # Stringified JSON mode - parse from string
119- if isinstance (value_json , str ):
120- parsed_json : dict [str , Any ] = json .loads (value_json )
121- return parsed_json
122- msg = f"value_json has unexpected type: { type (value_json )} "
123- raise DeserializationError (message = msg )
93+ # value_dict can be dict or string (DuckDB JSON returns as string)
94+ if isinstance (value_dict , dict ):
95+ return cast ("dict[str, Any]" , value_dict )
96+ if isinstance (value_dict , str ):
97+ parsed : dict [str , Any ] = json .loads (value_dict )
98+ return parsed
12499
125- msg = "Neither value_dict nor value_json column contains data "
100+ msg = f" value_dict has unexpected type: { type ( value_dict ) } "
126101 raise DeserializationError (message = msg )
127102
128103 def _convert_timestamps_to_utc (self , data : dict [str , Any ]) -> None :
@@ -146,9 +121,8 @@ class DuckDBStore(BaseContextManagerStore, BaseStore):
146121 The store uses native DuckDB types (JSON, TIMESTAMP) to enable efficient SQL queries
147122 on stored data. Users can query the database directly for analytics or data exploration.
148123
149- Storage modes:
150- - native_storage=True: Stores values in a JSON column as native dicts for queryability
151- - native_storage=False: Stores values as stringified JSON in a TEXT column
124+ Values are stored in a JSON column as native dicts, allowing direct SQL queries
125+ on the stored data for analytics and reporting.
152126
153127 Note on connection ownership: When you provide an existing connection, the store
154128 will take ownership and close it when the store is closed or garbage collected.
@@ -167,7 +141,6 @@ def __init__(
167141 * ,
168142 connection : duckdb .DuckDBPyConnection ,
169143 table_name : str = "kv_entries" ,
170- native_storage : bool = True ,
171144 default_collection : str | None = None ,
172145 seed : SEED_DATA_TYPE | None = None ,
173146 ) -> None :
@@ -180,8 +153,6 @@ def __init__(
180153 Args:
181154 connection: An existing DuckDB connection to use.
182155 table_name: Name of the table to store key-value entries. Defaults to "kv_entries".
183- native_storage: If True, use native JSON column for dict storage; if False, use TEXT for stringified JSON.
184- Default is True for better queryability and native type support.
185156 default_collection: The default collection to use if no collection is provided.
186157 seed: Optional seed data to pre-populate the store.
187158 """
@@ -192,7 +163,6 @@ def __init__(
192163 * ,
193164 database_path : Path | str | None = None ,
194165 table_name : str = "kv_entries" ,
195- native_storage : bool = True ,
196166 default_collection : str | None = None ,
197167 seed : SEED_DATA_TYPE | None = None ,
198168 ) -> None :
@@ -201,8 +171,6 @@ def __init__(
201171 Args:
202172 database_path: Path to the database file. If None or ':memory:', uses in-memory database.
203173 table_name: Name of the table to store key-value entries. Defaults to "kv_entries".
204- native_storage: If True, use native JSON column for dict storage; if False, use TEXT for stringified JSON.
205- Default is True for better queryability and native type support.
206174 default_collection: The default collection to use if no collection is provided.
207175 seed: Optional seed data to pre-populate the store.
208176 """
@@ -213,7 +181,6 @@ def __init__(
213181 connection : duckdb .DuckDBPyConnection | None = None ,
214182 database_path : Path | str | None = None ,
215183 table_name : str = "kv_entries" ,
216- native_storage : bool = True ,
217184 default_collection : str | None = None ,
218185 seed : SEED_DATA_TYPE | None = None ,
219186 ) -> None :
@@ -223,8 +190,6 @@ def __init__(
223190 connection: An existing DuckDB connection to use.
224191 database_path: Path to the database file. If None or ':memory:', uses in-memory database.
225192 table_name: Name of the table to store key-value entries. Defaults to "kv_entries".
226- native_storage: If True, use native JSON column for dict storage; if False, use TEXT for stringified JSON.
227- Default is True for better queryability and native type support.
228193 default_collection: The default collection to use if no collection is provided.
229194 seed: Optional seed data to pre-populate the store.
230195 """
@@ -248,7 +213,7 @@ def __init__(
248213 self ._owns_connection = True
249214
250215 self ._is_closed = False
251- self ._adapter = DuckDBSerializationAdapter (native_storage = native_storage )
216+ self ._adapter = DuckDBSerializationAdapter ()
252217 self ._table_name = table_name
253218 self ._stable_api = False
254219
@@ -264,8 +229,7 @@ def _get_create_table_sql(self) -> str:
264229 CREATE TABLE IF NOT EXISTS { self ._table_name } (
265230 collection VARCHAR NOT NULL,
266231 key VARCHAR NOT NULL,
267- value_json TEXT,
268- value_dict JSON,
232+ value_dict JSON NOT NULL,
269233 created_at TIMESTAMP,
270234 expires_at TIMESTAMP,
271235 PRIMARY KEY (collection, key)
@@ -301,7 +265,7 @@ def _get_select_sql(self) -> str:
301265 SQL SELECT statement with placeholders.
302266 """
303267 return f"""
304- SELECT value_json, value_dict, created_at, expires_at
268+ SELECT value_dict, created_at, expires_at
305269 FROM { self ._table_name }
306270 WHERE collection = ? AND key = ?
307271 """ # noqa: S608
@@ -314,8 +278,8 @@ def _get_insert_sql(self) -> str:
314278 """
315279 return f"""
316280 INSERT OR REPLACE INTO { self ._table_name }
317- (collection, key, value_json, value_dict, created_at, expires_at)
318- VALUES (?, ?, ?, ?, ?, ? )
281+ (collection, key, value_dict, created_at, expires_at)
282+ VALUES (?, ?, ?, ?, ?)
319283 """ # noqa: S608
320284
321285 def _get_delete_sql (self ) -> str :
@@ -335,17 +299,15 @@ async def _setup(self) -> None:
335299 """Initialize the database schema for key-value storage.
336300
337301 The schema uses native DuckDB types for efficient querying:
338- - value_json: TEXT column storing stringified JSON (used when native_storage=False)
339- - value_dict: JSON column storing native dicts (used when native_storage=True)
302+ - value_dict: JSON column storing native dicts for queryability
340303 - created_at: TIMESTAMP for native datetime operations
341304 - expires_at: TIMESTAMP for native expiration queries
342305
343- This design follows the Elasticsearch/MongoDB pattern of separating native and stringified
344- storage, enabling:
345- - Direct SQL queries on the database for analytics (when using native storage)
306+ This design enables:
307+ - Direct SQL queries on the database for analytics
346308 - Efficient expiration cleanup: DELETE FROM table WHERE expires_at < now()
347309 - Metadata queries without JSON deserialization
348- - Flexibility to choose between native dict storage and stringified JSON
310+ - Native JSON column support for rich querying capabilities
349311 """
350312 # Create the main table for storing key-value entries
351313 self ._connection .execute (self ._get_create_table_sql ())
@@ -360,7 +322,7 @@ async def _setup(self) -> None:
360322 async def _get_managed_entry (self , * , key : str , collection : str ) -> ManagedEntry | None :
361323 """Retrieve a managed entry by key from the specified collection.
362324
363- Reconstructs the ManagedEntry from value columns and metadata columns
325+ Reconstructs the ManagedEntry from value column and metadata columns
364326 using the serialization adapter.
365327 """
366328 if self ._is_closed :
@@ -375,11 +337,10 @@ async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry
375337 if result is None :
376338 return None
377339
378- value_json , value_dict , created_at , expires_at = result
340+ value_dict , created_at , expires_at = result
379341
380- # Build document dict for the adapter (exclude None values)
342+ # Build document dict for the adapter
381343 document : dict [str , Any ] = {
382- "value_json" : value_json ,
383344 "value_dict" : value_dict ,
384345 }
385346
@@ -411,15 +372,14 @@ async def _put_managed_entry(
411372 raise RuntimeError (msg )
412373
413374 # Use adapter to dump the managed entry to a dict with key and collection
414- document = self ._adapter .dump_dict (entry = managed_entry , exclude_none = False , key = key , collection = collection )
375+ document = self ._adapter .dump_dict (entry = managed_entry , key = key , collection = collection )
415376
416377 # Insert or replace the entry with metadata in separate columns
417378 self ._connection .execute (
418379 self ._get_insert_sql (),
419380 [
420381 collection ,
421382 key ,
422- document ["value_json" ],
423383 document ["value_dict" ],
424384 document .get ("created_at" ),
425385 document .get ("expires_at" ),
0 commit comments