Skip to content

Commit

Permalink
⬆️ UPGRADE: sqlalchemy v1.4 (v2 API)
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisjsewell committed Aug 30, 2021
1 parent f902178 commit e966efd
Show file tree
Hide file tree
Showing 13 changed files with 237 additions and 197 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ jobs:
# No need to run the benchmarks, they will run in a different workflow
# Also, run in very verbose mode so if there is an error we get a complete diff
run: pytest -vv --cov=disk_objectstore --benchmark-skip
env:
SQLALCHEMY_WARN_20: 1
- name: Create xml coverage
run: coverage xml
- name: Upload coverage to Codecov
Expand Down
6 changes: 4 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ repos:
hooks:
- id: pylint
additional_dependencies:
- sqlalchemy<1.4
- sqlalchemy==1.4.22
- click==8.0.1
- memory-profiler==0.58.0
- profilehooks==1.12.0
Expand All @@ -71,5 +71,7 @@ repos:
rev: v0.910
hooks:
- id: mypy
additional_dependencies: [typing-extensions]
additional_dependencies:
- "sqlalchemy[mypy]==1.4.22"
- typing-extensions
files: ^(disk_objectstore/.*py)$
256 changes: 111 additions & 145 deletions disk_objectstore/container.py

Large diffs are not rendered by default.

94 changes: 94 additions & 0 deletions disk_objectstore/database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""Models for the container index file (SQLite DB)."""
import os
from typing import Optional

from sqlalchemy import Boolean, Column, Integer, String, create_engine, event
from sqlalchemy.orm import declarative_base, sessionmaker
from sqlalchemy.orm.session import Session
from sqlalchemy.sql.expression import text

Base = declarative_base() # pylint: disable=invalid-name,useless-suppression


class Obj(Base): # pylint: disable=too-few-public-methods
"""The main (and only) table to store object metadata (hashkey, offset, length, ...)."""

__tablename__ = "db_object"

id = Column(Integer, primary_key=True) # pylint: disable=invalid-name

# Important: there are parts of the code that rely on the fact that this field is unique.
# If you really do not want a uniqueness field, you will need to adapt the code.
hashkey = Column(String, nullable=False, unique=True, index=True)
compressed = Column(Boolean, nullable=False)
size = Column(
Integer, nullable=False
) # uncompressed size; if uncompressed, size == length
offset = Column(Integer, nullable=False)
length = Column(Integer, nullable=False)
pack_id = Column(
Integer, nullable=False
) # integer ID of the pack in which this entry is stored


def get_session(
path: str, create: bool = False, raise_if_missing: bool = False
) -> Optional[Session]:
"""Return a new session to connect to the pack-index SQLite DB.
:param create: if True, creates the sqlite file and schema.
:param raise_if_missing: ignored if create==True. If create==False, and the index file
is missing, either raise an exception (FileNotFoundError) if this flag is True, or return None
"""
if not create and not os.path.exists(path):
if raise_if_missing:
raise FileNotFoundError("Pack index does not exist")
return None

engine = create_engine(f"sqlite:///{path}", future=True)

# For the next two bindings, see background on
# https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#serializable-isolation-savepoints-transactional-ddl
@event.listens_for(engine, "connect")
def do_connect(dbapi_connection, _):
"""Hook function that is called upon connection.
It modifies the default behavior of SQLite to use WAL and to
go back to the 'default' isolation level mode.
"""
# disable pysqlite's emitting of the BEGIN statement entirely.
# also stops it from emitting COMMIT before any DDL.
dbapi_connection.isolation_level = None
# Open the file in WAL mode (see e.g. https://stackoverflow.com/questions/9671490)
# This allows to have as many readers as one wants, and a concurrent writer (up to one)
# Note that this writes on a journal, on a different packs.idx-wal,
# and also creates a packs.idx-shm file.
# Note also that when the session is created, you will keep reading from the same version,
# so you need to close and reload the session to see the newly written data.
# Docs on WAL: https://www.sqlite.org/wal.html
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA journal_mode=wal;")
cursor.close()

# For this binding, see background on
# https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#serializable-isolation-savepoints-transactional-ddl
@event.listens_for(engine, "begin")
def do_begin(conn): # pylint: disable=unused-variable
# emit our own BEGIN
conn.execute(text("BEGIN"))

if create:
# Create all tables in the engine. This is equivalent to "Create Table"
# statements in raw SQL.
Base.metadata.create_all(engine)

# Bind the engine to the metadata of the Base class so that the
# declaratives can be accessed through a DBSession instance
Base.metadata.bind = engine

# We set autoflush = False to avoid to lock the DB if just doing queries/reads
session = sessionmaker(
bind=engine, autoflush=False, autocommit=False, future=True
)()

return session
26 changes: 0 additions & 26 deletions disk_objectstore/models.py

This file was deleted.

5 changes: 2 additions & 3 deletions disk_objectstore/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,13 @@
Iterable,
Iterator,
Optional,
Sequence,
Tuple,
Type,
Union,
)
from zlib import error

from sqlalchemy.engine.result import ResultProxy

from .exceptions import ClosingNotAllowed, ModificationNotAllowed

try:
Expand Down Expand Up @@ -1265,7 +1264,7 @@ def detect_where_sorted( # pylint: disable=too-many-branches, too-many-statemen
now_left = new_now_left


def yield_first_element(iterator: Union[ResultProxy, zip]) -> Iterator[Union[str, int]]:
def yield_first_element(iterator: Iterable[Sequence]) -> Iterator[Any]:
"""Given an iterator that returns a tuple, return an iterator that yields only the first element of the tuple."""
for elem in iterator:
yield elem[0]
Expand Down
2 changes: 1 addition & 1 deletion performance-benchmarks/validation-calls/performance.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ from sqlalchemy import func
from tqdm import tqdm


from disk_objectstore.models import Obj
from disk_objectstore.database import Obj
from disk_objectstore import Container
from disk_objectstore.utils import PackedObjectReader, StreamDecompresser, get_hash
c = Container('test-newrepo-sdb/')
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ check_untyped_defs = true
scripts_are_modules = true
warn_unused_ignores = true
warn_redundant_casts = true
plugins = ["sqlalchemy.ext.mypy.plugin"]
5 changes: 4 additions & 1 deletion requirements.lock
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ distlib==0.3.2
# via virtualenv
filelock==3.0.12
# via virtualenv
greenlet==1.1.1
# via sqlalchemy
identify==2.2.13
# via pre-commit
importlib-metadata==4.7.1
Expand All @@ -28,6 +30,7 @@ importlib-metadata==4.7.1
# pluggy
# pre-commit
# pytest
# sqlalchemy
# virtualenv
iniconfig==1.1.1
# via pytest
Expand Down Expand Up @@ -68,7 +71,7 @@ pyyaml==5.4.1
# via pre-commit
six==1.16.0
# via virtualenv
sqlalchemy==1.3.24
sqlalchemy==1.4.23
# via disk-objectstore (setup.py)
toml==0.10.2
# via
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ keywords = object store, repository, file store
[options]
packages = find:
install_requires =
sqlalchemy<1.4
sqlalchemy~=1.4.22
typing-extensions;python_version < '3.8'
python_requires = ~=3.7
include_package_data = True
Expand Down
25 changes: 11 additions & 14 deletions tests/concurrent_tests/periodic_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@

import click
import psutil
from sqlalchemy.sql.expression import select

from disk_objectstore.container import Container, NotExistent, ObjectType
from disk_objectstore.models import Obj
from disk_objectstore.database import Obj

MAX_RETRIES_NO_PERM = 1000

Expand Down Expand Up @@ -348,19 +349,15 @@ def main(
session = (
container._get_cached_session() # pylint: disable=protected-access
)
query = (
session.query(Obj)
.filter(Obj.hashkey == key)
.with_entities(
Obj.pack_id,
Obj.hashkey,
Obj.offset,
Obj.length,
Obj.compressed,
Obj.size,
)
)
print(list(query))
stmt = select(
Obj.pack_id,
Obj.hashkey,
Obj.offset,
Obj.length,
Obj.compressed,
Obj.size,
).where(Obj.hashkey == key)
print(list(session.execute(stmt)))
raise


Expand Down
8 changes: 4 additions & 4 deletions tests/test_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import pytest

import disk_objectstore.exceptions as exc
from disk_objectstore import CompressMode, Container, ObjectType, models, utils
from disk_objectstore import CompressMode, Container, ObjectType, database, utils

COMPRESSION_ALGORITHMS_TO_TEST = ["zlib+1", "zlib+9"]

Expand Down Expand Up @@ -2260,9 +2260,9 @@ def test_validate_overlapping_packed(temp_container): # pylint: disable=invalid
assert not any(errors.values())

# Change the offset of the second object so that it's overlapping
temp_container._get_cached_session().query(models.Obj).filter(
models.Obj.hashkey == hashkey_second
).update({models.Obj.offset: models.Obj.offset - 1})
temp_container._get_cached_session().query(database.Obj).filter(
database.Obj.hashkey == hashkey_second
).update({database.Obj.offset: database.Obj.offset - 1})

errors = temp_container.validate()
problems = errors.pop("overlapping_packed")
Expand Down
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,6 @@ extras =
dev
deps =
black
setenv =
SQLALCHEMY_WARN_20 = 1
commands = pytest {posargs}

0 comments on commit e966efd

Please sign in to comment.