Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add orjson options in JSONStore #791

Merged
merged 5 commits into from
May 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ uvicorn==0.18.3
sshtunnel==0.4.0
msgpack==1.0.3
msgpack-python==0.5.6
orjson==3.8.0
orjson==3.8.14
boto3==1.24.42
python-dateutil==2.8.2
pydantic
Expand Down
16 changes: 9 additions & 7 deletions src/maggma/stores/gridfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""
Module containing various definitions of Stores.
Stores are a default access pattern to data and provide
various utillities
various utilities
"""

import copy
Expand Down Expand Up @@ -81,7 +81,7 @@ def __init__(
self.port = port
self.username = username
self.password = password
self._coll = None # type: Any
self._coll: Any = None
self.compression = compression
self.ensure_metadata = ensure_metadata
self.searchable_fields = [] if searchable_fields is None else searchable_fields
Expand Down Expand Up @@ -162,7 +162,9 @@ def connect(self, force_reset: bool = False):
def _collection(self):
"""Property referring to underlying pymongo collection"""
if self._coll is None:
raise StoreError("Must connect Mongo-like store before attemping to use it")
raise StoreError(
"Must connect Mongo-like store before attempting to use it"
)
return self._coll

@property
Expand Down Expand Up @@ -244,7 +246,6 @@ def query(
if properties is not None and prop_keys.issubset(set(doc.keys())):
yield {p: doc[p] for p in properties if p in doc}
else:

metadata = doc.get("metadata", {})

data = self._collection.find_one(
Expand Down Expand Up @@ -354,7 +355,7 @@ def groupby(

def ensure_index(self, key: str, unique: Optional[bool] = False) -> bool:
"""
Tries to create an index and return true if it suceeded
Tries to create an index and return true if it succeeded
Currently operators on the GridFS files collection
Args:
key: single key to index
Expand Down Expand Up @@ -447,7 +448,8 @@ def remove_docs(self, criteria: Dict):
self._collection.delete(_id)

def close(self):
self._collection.database.client.close()
self._files_store.close()
self._coll = None
if self.ssh_tunnel is not None:
self.ssh_tunnel.stop()

Expand Down Expand Up @@ -507,7 +509,7 @@ def __init__(
self.database = database

self.collection_name = collection_name
self._coll = None # type: Any
self._coll: Any = None
self.compression = compression
self.ensure_metadata = ensure_metadata
self.searchable_fields = [] if searchable_fields is None else searchable_fields
Expand Down
16 changes: 14 additions & 2 deletions src/maggma/stores/mongolike.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from itertools import chain, groupby
from socket import socket
import warnings
from typing import Dict, Iterator, List, Optional, Tuple, Union
from typing import Dict, Iterator, List, Optional, Tuple, Union, Any

import mongomock
import orjson
Expand Down Expand Up @@ -704,6 +704,8 @@ def __init__(
self,
paths: Union[str, List[str]],
read_only: bool = True,
serialization_option: Optional[int] = None,
serialization_default: Any = None,
**kwargs,
):
"""
Expand All @@ -720,6 +722,10 @@ def __init__(
Note that when read_only=False, JSONStore only supports a single JSON
file. If the file does not exist, it will be automatically created
when the JSONStore is initialized.
serialization_option:
option that will be passed to the orjson.dump when saving to the json the file.
serialization_default:
default that will be passed to the orjson.dump when saving to the json the file.
"""
paths = paths if isinstance(paths, (list, tuple)) else [paths]
self.paths = paths
Expand Down Expand Up @@ -755,6 +761,8 @@ def __init__(
f.write(bytesdata.decode("utf-8"))

self.default_sort = None
self.serialization_option = serialization_option
self.serialization_default = serialization_default

super().__init__(**kwargs)

Expand Down Expand Up @@ -838,7 +846,11 @@ def update_json_file(self):
data = [d for d in self.query()]
for d in data:
d.pop("_id")
bytesdata = orjson.dumps(data)
bytesdata = orjson.dumps(
data,
option=self.serialization_option,
default=self.serialization_default,
)
f.write(bytesdata.decode("utf-8"))

def __hash__(self):
Expand Down
14 changes: 11 additions & 3 deletions tests/stores/test_gridfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
import numpy.testing.utils as nptu
import pytest
from maggma.core import StoreError

from maggma.stores import GridFSStore, MongoStore
from maggma.stores.gridfs import files_collection_fields, GridFSURIStore
Expand Down Expand Up @@ -218,7 +219,7 @@ def test_index(gridfsstore):

def test_gfs_metadata(gridfsstore):
"""
Ensure metadata is put back in the docuemnt
Ensure metadata is put back in the document
"""
tic = datetime(2018, 4, 12, 16)

Expand All @@ -244,7 +245,6 @@ def test_gridfsstore_from_launchpad_file(lp_file):


def test_searchable_fields(gridfsstore):

tic = datetime(2018, 4, 12, 16)

data = [
Expand All @@ -259,7 +259,6 @@ def test_searchable_fields(gridfsstore):


def test_additional_metadata(gridfsstore):

tic = datetime(2018, 4, 12, 16)

data = [
Expand Down Expand Up @@ -299,3 +298,12 @@ def test_gridfs_uri_dbname_parse():
uri_with_db = "mongodb://uuu:xxxx@host:27017"
with pytest.raises(ConfigurationError):
GridFSURIStore(uri_with_db, "test")


def test_close(gridfsstore):
assert gridfsstore.query_one() is None
gridfsstore.close()
with pytest.raises(StoreError):
gridfsstore.query_one()
# reconnect to allow the drop of the collection in the fixture
gridfsstore.connect()
24 changes: 23 additions & 1 deletion tests/stores/test_mongolike.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import orjson
import os
import shutil
from datetime import datetime
Expand Down Expand Up @@ -43,7 +44,6 @@ def memorystore():

@pytest.fixture
def jsonstore(test_dir):

files = []
for f in ["a.json", "b.json"]:
files.append(test_dir / "test_set" / f)
Expand Down Expand Up @@ -503,6 +503,28 @@ def test_json_store_writeable(test_dir):
update_json_file_mock.assert_not_called()


def test_jsonstore_orjson_options(test_dir):
class SubFloat(float):
pass

with ScratchDir("."):
jsonstore = JSONStore("d.json", read_only=False)
jsonstore.connect()
with pytest.raises(orjson.JSONEncodeError):
jsonstore.update({"wrong_field": SubFloat(1.1), "task_id": 3})
jsonstore.close()

jsonstore = JSONStore(
"a.json",
read_only=False,
serialization_option=None,
serialization_default=lambda x: "test",
)
jsonstore.connect()
jsonstore.update({"wrong_field": SubFloat(1.1), "task_id": 3})
jsonstore.close()


def test_jsonstore_last_updated(test_dir):
# files = []
# for f in ["a.json", "b.json"]:
Expand Down