Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

⬆️ UPGRADE: sqlalchemy v1.4 (v2 API) #114

Merged
merged 1 commit into from
Sep 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ jobs:
# No need to run the benchmarks, they will run in a different workflow
# Also, run in very verbose mode so if there is an error we get a complete diff
run: pytest -vv --cov=disk_objectstore --benchmark-skip
env:
SQLALCHEMY_WARN_20: 1
- name: Create xml coverage
run: coverage xml
- name: Upload coverage to Codecov
Expand Down
6 changes: 4 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ repos:
hooks:
- id: pylint
additional_dependencies:
- sqlalchemy<1.4
- sqlalchemy==1.4.22
- click==8.0.1
- memory-profiler==0.58.0
- profilehooks==1.12.0
Expand All @@ -71,5 +71,7 @@ repos:
rev: v0.910
hooks:
- id: mypy
additional_dependencies: [typing-extensions]
additional_dependencies:
- "sqlalchemy[mypy]==1.4.22"
- typing-extensions
files: ^(disk_objectstore/.*py)$
256 changes: 111 additions & 145 deletions disk_objectstore/container.py

Large diffs are not rendered by default.

94 changes: 94 additions & 0 deletions disk_objectstore/database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""Models for the container index file (SQLite DB)."""
import os
from typing import Optional

from sqlalchemy import Boolean, Column, Integer, String, create_engine, event
from sqlalchemy.orm import declarative_base, sessionmaker
from sqlalchemy.orm.session import Session
from sqlalchemy.sql.expression import text

Base = declarative_base() # pylint: disable=invalid-name,useless-suppression


class Obj(Base): # pylint: disable=too-few-public-methods
"""The main (and only) table to store object metadata (hashkey, offset, length, ...)."""

__tablename__ = "db_object"

id = Column(Integer, primary_key=True) # pylint: disable=invalid-name

# Important: there are parts of the code that rely on the fact that this field is unique.
# If you really do not want a uniqueness field, you will need to adapt the code.
hashkey = Column(String, nullable=False, unique=True, index=True)
compressed = Column(Boolean, nullable=False)
size = Column(
Integer, nullable=False
) # uncompressed size; if uncompressed, size == length
offset = Column(Integer, nullable=False)
length = Column(Integer, nullable=False)
pack_id = Column(
Integer, nullable=False
) # integer ID of the pack in which this entry is stored


def get_session(
path: str, create: bool = False, raise_if_missing: bool = False
) -> Optional[Session]:
"""Return a new session to connect to the pack-index SQLite DB.

:param create: if True, creates the sqlite file and schema.
:param raise_if_missing: ignored if create==True. If create==False, and the index file
is missing, either raise an exception (FileNotFoundError) if this flag is True, or return None
"""
if not create and not os.path.exists(path):
if raise_if_missing:
raise FileNotFoundError("Pack index does not exist")
return None

engine = create_engine(f"sqlite:///{path}", future=True)

# For the next two bindings, see background on
# https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#serializable-isolation-savepoints-transactional-ddl
@event.listens_for(engine, "connect")
def do_connect(dbapi_connection, _):
"""Hook function that is called upon connection.

It modifies the default behavior of SQLite to use WAL and to
go back to the 'default' isolation level mode.
"""
# disable pysqlite's emitting of the BEGIN statement entirely.
# also stops it from emitting COMMIT before any DDL.
dbapi_connection.isolation_level = None
# Open the file in WAL mode (see e.g. https://stackoverflow.com/questions/9671490)
# This allows to have as many readers as one wants, and a concurrent writer (up to one)
# Note that this writes on a journal, on a different packs.idx-wal,
# and also creates a packs.idx-shm file.
# Note also that when the session is created, you will keep reading from the same version,
# so you need to close and reload the session to see the newly written data.
# Docs on WAL: https://www.sqlite.org/wal.html
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA journal_mode=wal;")
cursor.close()

# For this binding, see background on
# https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#serializable-isolation-savepoints-transactional-ddl
@event.listens_for(engine, "begin")
def do_begin(conn): # pylint: disable=unused-variable
# emit our own BEGIN
conn.execute(text("BEGIN"))

if create:
# Create all tables in the engine. This is equivalent to "Create Table"
# statements in raw SQL.
Base.metadata.create_all(engine)

# Bind the engine to the metadata of the Base class so that the
# declaratives can be accessed through a DBSession instance
Base.metadata.bind = engine

# We set autoflush = False to avoid to lock the DB if just doing queries/reads
session = sessionmaker(
bind=engine, autoflush=False, autocommit=False, future=True
)()

return session
26 changes: 0 additions & 26 deletions disk_objectstore/models.py

This file was deleted.

5 changes: 2 additions & 3 deletions disk_objectstore/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,13 @@
Iterable,
Iterator,
Optional,
Sequence,
Tuple,
Type,
Union,
)
from zlib import error

from sqlalchemy.engine.result import ResultProxy

from .exceptions import ClosingNotAllowed, ModificationNotAllowed

try:
Expand Down Expand Up @@ -1265,7 +1264,7 @@ def detect_where_sorted( # pylint: disable=too-many-branches, too-many-statemen
now_left = new_now_left


def yield_first_element(iterator: Union[ResultProxy, zip]) -> Iterator[Union[str, int]]:
def yield_first_element(iterator: Iterable[Sequence]) -> Iterator[Any]:
"""Given an iterator that returns a tuple, return an iterator that yields only the first element of the tuple."""
for elem in iterator:
yield elem[0]
Expand Down
2 changes: 1 addition & 1 deletion performance-benchmarks/validation-calls/performance.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ from sqlalchemy import func
from tqdm import tqdm


from disk_objectstore.models import Obj
from disk_objectstore.database import Obj
from disk_objectstore import Container
from disk_objectstore.utils import PackedObjectReader, StreamDecompresser, get_hash
c = Container('test-newrepo-sdb/')
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ check_untyped_defs = true
scripts_are_modules = true
warn_unused_ignores = true
warn_redundant_casts = true
plugins = ["sqlalchemy.ext.mypy.plugin"]
5 changes: 4 additions & 1 deletion requirements.lock
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ distlib==0.3.2
# via virtualenv
filelock==3.0.12
# via virtualenv
greenlet==1.1.1
# via sqlalchemy
identify==2.2.13
# via pre-commit
importlib-metadata==4.7.1
Expand All @@ -28,6 +30,7 @@ importlib-metadata==4.7.1
# pluggy
# pre-commit
# pytest
# sqlalchemy
# virtualenv
iniconfig==1.1.1
# via pytest
Expand Down Expand Up @@ -68,7 +71,7 @@ pyyaml==5.4.1
# via pre-commit
six==1.16.0
# via virtualenv
sqlalchemy==1.3.24
sqlalchemy==1.4.23
# via disk-objectstore (setup.py)
toml==0.10.2
# via
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ keywords = object store, repository, file store
[options]
packages = find:
install_requires =
sqlalchemy<1.4
sqlalchemy~=1.4.22
typing-extensions;python_version < '3.8'
python_requires = ~=3.7
include_package_data = True
Expand Down
25 changes: 11 additions & 14 deletions tests/concurrent_tests/periodic_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@

import click
import psutil
from sqlalchemy.sql.expression import select

from disk_objectstore.container import Container, NotExistent, ObjectType
from disk_objectstore.models import Obj
from disk_objectstore.database import Obj

MAX_RETRIES_NO_PERM = 1000

Expand Down Expand Up @@ -348,19 +349,15 @@ def main(
session = (
container._get_cached_session() # pylint: disable=protected-access
)
query = (
session.query(Obj)
.filter(Obj.hashkey == key)
.with_entities(
Obj.pack_id,
Obj.hashkey,
Obj.offset,
Obj.length,
Obj.compressed,
Obj.size,
)
)
print(list(query))
stmt = select(
Obj.pack_id,
Obj.hashkey,
Obj.offset,
Obj.length,
Obj.compressed,
Obj.size,
).where(Obj.hashkey == key)
print(list(session.execute(stmt)))
raise


Expand Down
8 changes: 4 additions & 4 deletions tests/test_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import pytest

import disk_objectstore.exceptions as exc
from disk_objectstore import CompressMode, Container, ObjectType, models, utils
from disk_objectstore import CompressMode, Container, ObjectType, database, utils

COMPRESSION_ALGORITHMS_TO_TEST = ["zlib+1", "zlib+9"]

Expand Down Expand Up @@ -2260,9 +2260,9 @@ def test_validate_overlapping_packed(temp_container): # pylint: disable=invalid
assert not any(errors.values())

# Change the offset of the second object so that it's overlapping
temp_container._get_cached_session().query(models.Obj).filter(
models.Obj.hashkey == hashkey_second
).update({models.Obj.offset: models.Obj.offset - 1})
temp_container._get_cached_session().query(database.Obj).filter(
database.Obj.hashkey == hashkey_second
).update({database.Obj.offset: database.Obj.offset - 1})

errors = temp_container.validate()
problems = errors.pop("overlapping_packed")
Expand Down
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,6 @@ extras =
dev
deps =
black
setenv =
SQLALCHEMY_WARN_20 = 1
commands = pytest {posargs}