From d7d14e91c2dce472af6a09373f88395271b741ce Mon Sep 17 00:00:00 2001 From: Guido Petretto Date: Mon, 29 May 2023 09:58:03 +0200 Subject: [PATCH 1/5] Add orjson options in JSONStore --- src/maggma/stores/gridfs.py | 14 ++++++++------ src/maggma/stores/mongolike.py | 16 ++++++++++++++-- tests/stores/test_gridfs.py | 14 +++++++++++--- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/src/maggma/stores/gridfs.py b/src/maggma/stores/gridfs.py index 720776b40..16019b6d7 100644 --- a/src/maggma/stores/gridfs.py +++ b/src/maggma/stores/gridfs.py @@ -2,7 +2,7 @@ """ Module containing various definitions of Stores. Stores are a default access pattern to data and provide -various utillities +various utilities """ import copy @@ -11,7 +11,7 @@ from ruamel import yaml from datetime import datetime from pymongo.errors import ConfigurationError -from typing import Any, Dict, Iterator, List, Optional, Tuple, Union +from typing import Dict, Iterator, List, Optional, Tuple, Union import gridfs from monty.json import jsanitize @@ -162,7 +162,9 @@ def connect(self, force_reset: bool = False): def _collection(self): """Property referring to underlying pymongo collection""" if self._coll is None: - raise StoreError("Must connect Mongo-like store before attemping to use it") + raise StoreError( + "Must connect Mongo-like store before attempting to use it" + ) return self._coll @property @@ -244,7 +246,6 @@ def query( if properties is not None and prop_keys.issubset(set(doc.keys())): yield {p: doc[p] for p in properties if p in doc} else: - metadata = doc.get("metadata", {}) data = self._collection.find_one( @@ -354,7 +355,7 @@ def groupby( def ensure_index(self, key: str, unique: Optional[bool] = False) -> bool: """ - Tries to create an index and return true if it suceeded + Tries to create an index and return true if it succeeded Currently operators on the GridFS files collection Args: key: single key to index @@ -447,7 +448,8 @@ def remove_docs(self, criteria: Dict): self._collection.delete(_id) def close(self): - self._collection.database.client.close() + self._files_store.close() + self._coll = None if self.ssh_tunnel is not None: self.ssh_tunnel.stop() diff --git a/src/maggma/stores/mongolike.py b/src/maggma/stores/mongolike.py index 4633d422a..4f1a1517f 100644 --- a/src/maggma/stores/mongolike.py +++ b/src/maggma/stores/mongolike.py @@ -10,7 +10,7 @@ from itertools import chain, groupby from socket import socket import warnings -from typing import Dict, Iterator, List, Optional, Tuple, Union +from typing import Dict, Iterator, List, Optional, Tuple, Union, Any import mongomock import orjson @@ -704,6 +704,8 @@ def __init__( self, paths: Union[str, List[str]], read_only: bool = True, + serialization_option: int | None = None, + serialization_default: Any | None = None, **kwargs, ): """ @@ -720,6 +722,10 @@ def __init__( Note that when read_only=False, JSONStore only supports a single JSON file. If the file does not exist, it will be automatically created when the JSONStore is initialized. + serialization_option: + option that will be passed to the orjson.dump when saving to the json the file. + serialization_default: + default that will be passed to the orjson.dump when saving to the json the file. """ paths = paths if isinstance(paths, (list, tuple)) else [paths] self.paths = paths @@ -755,6 +761,8 @@ def __init__( f.write(bytesdata.decode("utf-8")) self.default_sort = None + self.serialization_option = serialization_option + self.serialization_default = serialization_default super().__init__(**kwargs) @@ -838,7 +846,11 @@ def update_json_file(self): data = [d for d in self.query()] for d in data: d.pop("_id") - bytesdata = orjson.dumps(data) + bytesdata = orjson.dumps( + data, + option=self.serialization_option, + default=self.serialization_default, + ) f.write(bytesdata.decode("utf-8")) def __hash__(self): diff --git a/tests/stores/test_gridfs.py b/tests/stores/test_gridfs.py index a6c682e6b..65d6540e2 100644 --- a/tests/stores/test_gridfs.py +++ b/tests/stores/test_gridfs.py @@ -6,6 +6,7 @@ import numpy as np import numpy.testing.utils as nptu import pytest +from maggma.core import StoreError from maggma.stores import GridFSStore, MongoStore from maggma.stores.gridfs import files_collection_fields, GridFSURIStore @@ -218,7 +219,7 @@ def test_index(gridfsstore): def test_gfs_metadata(gridfsstore): """ - Ensure metadata is put back in the docuemnt + Ensure metadata is put back in the document """ tic = datetime(2018, 4, 12, 16) @@ -244,7 +245,6 @@ def test_gridfsstore_from_launchpad_file(lp_file): def test_searchable_fields(gridfsstore): - tic = datetime(2018, 4, 12, 16) data = [ @@ -259,7 +259,6 @@ def test_searchable_fields(gridfsstore): def test_additional_metadata(gridfsstore): - tic = datetime(2018, 4, 12, 16) data = [ @@ -299,3 +298,12 @@ def test_gridfs_uri_dbname_parse(): uri_with_db = "mongodb://uuu:xxxx@host:27017" with pytest.raises(ConfigurationError): GridFSURIStore(uri_with_db, "test") + + +def test_close(gridfsstore): + assert gridfsstore.query_one() is None + gridfsstore.close() + with pytest.raises(StoreError): + gridfsstore.query_one() + # reconnect to allow the drop of the collection in the fixture + gridfsstore.connect() From d8f99c9774c131e404173706a7cbe20ce4784677 Mon Sep 17 00:00:00 2001 From: Guido Petretto Date: Mon, 29 May 2023 10:26:00 +0200 Subject: [PATCH 2/5] add orjson options test --- tests/stores/test_mongolike.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tests/stores/test_mongolike.py b/tests/stores/test_mongolike.py index d037ce77f..6dd753207 100644 --- a/tests/stores/test_mongolike.py +++ b/tests/stores/test_mongolike.py @@ -1,3 +1,4 @@ +import orjson import os import shutil from datetime import datetime @@ -43,7 +44,6 @@ def memorystore(): @pytest.fixture def jsonstore(test_dir): - files = [] for f in ["a.json", "b.json"]: files.append(test_dir / "test_set" / f) @@ -503,6 +503,28 @@ def test_json_store_writeable(test_dir): update_json_file_mock.assert_not_called() +def test_jsonstore_orjson_options(test_dir): + class SubFloat(float): + pass + + with ScratchDir("."): + jsonstore = JSONStore("d.json", read_only=False) + jsonstore.connect() + with pytest.raises(orjson.JSONEncodeError): + jsonstore.update({"wrong_field": SubFloat(1.1), "task_id": 3}) + jsonstore.close() + + jsonstore = JSONStore( + "a.json", + read_only=False, + serialization_option=None, + serialization_default="test", + ) + jsonstore.connect() + jsonstore.update({"wrong_field": SubFloat(1.1), "task_id": 3}) + jsonstore.close() + + def test_jsonstore_last_updated(test_dir): # files = [] # for f in ["a.json", "b.json"]: From 589624de43b323f2b0eb4119efb7a1e0f25c52ae Mon Sep 17 00:00:00 2001 From: Guido Petretto Date: Mon, 29 May 2023 10:46:51 +0200 Subject: [PATCH 3/5] fix tests --- src/maggma/stores/mongolike.py | 4 ++-- tests/stores/test_mongolike.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/maggma/stores/mongolike.py b/src/maggma/stores/mongolike.py index 4f1a1517f..2ee5578ad 100644 --- a/src/maggma/stores/mongolike.py +++ b/src/maggma/stores/mongolike.py @@ -704,8 +704,8 @@ def __init__( self, paths: Union[str, List[str]], read_only: bool = True, - serialization_option: int | None = None, - serialization_default: Any | None = None, + serialization_option: Optional[int] = None, + serialization_default: Any = None, **kwargs, ): """ diff --git a/tests/stores/test_mongolike.py b/tests/stores/test_mongolike.py index 6dd753207..ddc30dcaf 100644 --- a/tests/stores/test_mongolike.py +++ b/tests/stores/test_mongolike.py @@ -518,7 +518,7 @@ class SubFloat(float): "a.json", read_only=False, serialization_option=None, - serialization_default="test", + serialization_default=lambda x: "test", ) jsonstore.connect() jsonstore.update({"wrong_field": SubFloat(1.1), "task_id": 3}) From da131b67a8e12fce626a2b8694c11519c502d4ce Mon Sep 17 00:00:00 2001 From: Guido Petretto Date: Mon, 29 May 2023 11:02:33 +0200 Subject: [PATCH 4/5] fix linting again --- src/maggma/stores/gridfs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/maggma/stores/gridfs.py b/src/maggma/stores/gridfs.py index 16019b6d7..2374d2082 100644 --- a/src/maggma/stores/gridfs.py +++ b/src/maggma/stores/gridfs.py @@ -11,7 +11,7 @@ from ruamel import yaml from datetime import datetime from pymongo.errors import ConfigurationError -from typing import Dict, Iterator, List, Optional, Tuple, Union +from typing import Any, Dict, Iterator, List, Optional, Tuple, Union import gridfs from monty.json import jsanitize @@ -81,7 +81,7 @@ def __init__( self.port = port self.username = username self.password = password - self._coll = None # type: Any + self._coll: Any = None self.compression = compression self.ensure_metadata = ensure_metadata self.searchable_fields = [] if searchable_fields is None else searchable_fields @@ -509,7 +509,7 @@ def __init__( self.database = database self.collection_name = collection_name - self._coll = None # type: Any + self._coll: Any = None self.compression = compression self.ensure_metadata = ensure_metadata self.searchable_fields = [] if searchable_fields is None else searchable_fields From 4fa1424b1c8ead96b4b56a6dd69bde05330be018 Mon Sep 17 00:00:00 2001 From: Guido Petretto Date: Mon, 29 May 2023 11:22:51 +0200 Subject: [PATCH 5/5] bump orjson version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3ccbded28..3568d96a2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,7 @@ uvicorn==0.18.3 sshtunnel==0.4.0 msgpack==1.0.3 msgpack-python==0.5.6 -orjson==3.8.0 +orjson==3.8.14 boto3==1.24.42 python-dateutil==2.8.2 pydantic