Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to toggle async io in open() #47

Merged
merged 5 commits into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,39 @@ About

TBD

Usage Example
============

Reading
------------

.. code-block:: python
import aerovaldb

with aerovaldb.open('json_files:path/to/data/') as db:
try:
fh = db.get_map(*args, access_type=aerovaldb.AccessType.FILE_PATH)
# ... sendfile of filehandle
except FileNotFoundError as e:
json = db.get_map(*args, access_type=aerovaldb.AccessType.JSON_STR)
# ... send json string to client

Writing
------------

.. code-block:: python
import aerovaldb
import json

with aerovaldb.open('json_files:path/to/data/') as db:
db.put
obj = {"data": "Some test data"}
json_str = "{ 'data': 'Some test data' }"
db.put_map(json_str) # String is assumed to be json string and stored directly.

db.put_map(obj) # Otherwise serialized object is stored.


.. toctree::
:maxdepth: 1
:caption: Contents:
Expand Down
59 changes: 59 additions & 0 deletions scripts/aiofile_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import aiofile
import time
import random
import os
import aiofile
import asyncio


def generate_test_file(fp: str, /, size: int):
with open(fp, "w") as f:
for i in range(size):
bytes = random.randbytes(1024)
f.write(bytes.hex())


def generate_test_files():
if not os.path.exists("test-file-1kb"):
print("Generate 1kb test file")
generate_test_file("test-file-1kb", 1)

if not os.path.exists("test-file-100kb"):
print("Generate 100kb test file")
generate_test_file("test-file-100kb", 100)

if not os.path.exists("test-file-10000kb"):
print("Generate 10000kb test file")
generate_test_file("test-file-10000kb", 100000)

if not os.path.exists("test-file-250000kb"):
print("Generate 250000kb test file")
generate_test_file("test-file-250000kb", 250000)


generate_test_files()

for fp in (
"test-file-1kb",
"test-file-100kb",
"test-file-10000kb",
"test-file-250000kb",
):
print(f"Testing file {fp}")
print(f" Testing synchronous read")
start_time = time.perf_counter()
for _ in range(10):
with open(fp, "r") as f:
f.read()
print(f" Time elapsed: {time.perf_counter()-start_time:.2f}s")

async def async_read(fpath: str):
for _ in range(10):
async with aiofile.async_open(fpath) as f:
await f.read()

print(f" Testing asynchronous read")
start_time = time.perf_counter()
for _ in range(10):
asyncio.run(async_read(fp))
print(f" Time elapsed: {time.perf_counter()-start_time:.2f}s")
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = aerovaldb
version = 0.0.6.dev0
version = 0.0.7.dev0
author = Augustin Mortier, Thorbjørn Lundin, Heiko Klein
author_email = Heiko.Klein@met.no
description = aeroval database to communicate between pyaerocom and aeroval
Expand Down
11 changes: 8 additions & 3 deletions src/aerovaldb/jsondb/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,13 @@ class JSONLRUCache:
Implements an in-memory LRU cache for file content in aerovaldb.
"""

def __init__(self, *, max_size: int):
def __init__(self, *, max_size: int, asyncio: bool = False):
"""
:param max_size : The maximum size of the cache in terms of number of entries / files.

Files will be ejected based on least recently used, when full.
"""
self._asyncio = asyncio
self._max_size = max_size
self.invalidate_all()

Expand Down Expand Up @@ -123,8 +124,12 @@ def _canonical_file_path(self, file_path: str | Path) -> str:
async def _read_json(self, file_path: str | Path) -> str:
abspath = self._canonical_file_path(file_path)
logger.debug(f"Reading file {abspath}")
async with aiofile.async_open(abspath, "r") as f:
return await f.read()
if self._asyncio:
async with aiofile.async_open(abspath, "r") as f:
return await f.read()

with open(abspath, "r") as f:
return f.read()

def _get(self, abspath: str) -> str:
"""Returns an element from the cache."""
Expand Down
9 changes: 7 additions & 2 deletions src/aerovaldb/jsondb/jsonfiledb.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,13 @@


class AerovalJsonFileDB(AerovalDB):
def __init__(self, basedir: str | Path):
self._cache = JSONLRUCache(max_size=64)
def __init__(self, basedir: str | Path, /, use_async: bool = False):
"""
:param basedir The root directory where aerovaldb will look for files.
:param asyncio Whether to use asynchronous io to read and store files.
"""
self._asyncio = use_async
self._cache = JSONLRUCache(max_size=64, asyncio=self._asyncio)

self._basedir = os.path.realpath(basedir)
if isinstance(self._basedir, str):
Expand Down
6 changes: 4 additions & 2 deletions src/aerovaldb/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,16 @@ def list_engines() -> dict[str, AerovalDB]:
return _build_db_engines(entrypoints)


def open(resource) -> AerovalDB:
def open(resource, /, use_async: bool = False) -> AerovalDB:
"""open an AerovalDB directly, sending args and kwargs
directly to the `AervoalDB()` function

:param resource: the resource-name for the database. The resource can be
- 'entrypoint:path', with path being the location where the database should be generated
- 'path', with path containing either an aerovaldb.cfg configuration
- or path being a json_files dabasase
:param use_async : If true, aiofile will be used to read files, otherwise files will be read
synchronously.
:return: an implementation-object of AerovalDB openend to a location
"""

Expand All @@ -66,4 +68,4 @@ def open(resource) -> AerovalDB:

aerodb = list_engines()[name]

return aerodb(path) # type: ignore
return aerodb(path, use_async=use_async) # type: ignore
4 changes: 2 additions & 2 deletions tests/jsondb/test_jsonfiledb.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ async def test_getter(resource: str, fun: str, args: list, kwargs: dict, expecte
"""
This test tests that data is read as expected from a static, fixed database.
"""
with aerovaldb.open(resource) as db:
with aerovaldb.open(resource, use_async=True) as db:
f = getattr(db, fun)

if kwargs is not None:
Expand All @@ -183,7 +183,7 @@ async def test_getter(resource: str, fun: str, args: list, kwargs: dict, expecte
@pytest.mark.parametrize("resource", (("json_files:./tests/test-db/json",)))
@pytest.mark.parametrize(*get_parameters)
def test_getter_sync(resource: str, fun: str, args: list, kwargs: dict, expected):
with aerovaldb.open(resource) as db:
with aerovaldb.open(resource, use_async=False) as db:
f = getattr(db, fun)

if kwargs is not None:
Expand Down