diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d27939e..664de4f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,6 +46,8 @@ jobs: # No need to run the benchmarks, they will run in a different workflow # Also, run in very verbose mode so if there is an error we get a complete diff run: pytest -vv --cov=disk_objectstore --benchmark-skip + env: + SQLALCHEMY_WARN_20: 1 - name: Create xml coverage run: coverage xml - name: Upload coverage to Codecov diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f783c7f..d43ebd0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -60,7 +60,7 @@ repos: hooks: - id: pylint additional_dependencies: - - sqlalchemy<1.4 + - sqlalchemy==1.4.22 - click==8.0.1 - memory-profiler==0.58.0 - profilehooks==1.12.0 @@ -71,5 +71,7 @@ repos: rev: v0.910 hooks: - id: mypy - additional_dependencies: [typing-extensions] + additional_dependencies: + - "sqlalchemy[mypy]==1.4.22" + - typing-extensions files: ^(disk_objectstore/.*py)$ diff --git a/disk_objectstore/container.py b/disk_objectstore/container.py index b4ee8e0..00e04e2 100644 --- a/disk_objectstore/container.py +++ b/disk_objectstore/container.py @@ -31,13 +31,12 @@ # Python <3.8 backport from typing_extensions import Literal # type: ignore -from sqlalchemy import create_engine, event -from sqlalchemy.orm import sessionmaker from sqlalchemy.orm.session import Session from sqlalchemy.sql import func +from sqlalchemy.sql.expression import delete, select, text, update +from .database import Obj, get_session from .exceptions import InconsistentContent, NotExistent, NotInitialised -from .models import Base, Obj from .utils import ( CallbackStreamWrapper, LazyOpener, @@ -177,6 +176,18 @@ def _get_config_file(self) -> str: """Return the path to the container config file.""" return os.path.join(self._folder, "config.json") + @overload + def _get_session( + self, create: bool = False, raise_if_missing: Literal[True] = True + ) -> Session: + ... + + @overload + def _get_session( + self, create: bool = False, raise_if_missing: Literal[False] = False + ) -> Optional[Session]: + ... + def _get_session( self, create: bool = False, raise_if_missing: bool = False ) -> Optional[Session]: @@ -186,61 +197,11 @@ def _get_session( :param raise_if_missing: ignored if create==True. If create==False, and the index file is missing, either raise an exception (FileNotFoundError) if this flag is True, or return None """ - if not create and not os.path.exists(self._get_pack_index_path()): - if raise_if_missing: - raise FileNotFoundError("Pack index does not exist") - return None - - engine = create_engine(f"sqlite:///{self._get_pack_index_path()}") - - # For the next two bindings, see background on - # https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#serializable-isolation-savepoints-transactional-ddl - @event.listens_for(engine, "connect") - def do_connect( - dbapi_connection, connection_record - ): # pylint: disable=unused-argument,unused-variable - """Hook function that is called upon connection. - - It modifies the default behavior of SQLite to use WAL and to - go back to the 'default' isolation level mode. - """ - # disable pysqlite's emitting of the BEGIN statement entirely. - # also stops it from emitting COMMIT before any DDL. - dbapi_connection.isolation_level = None - # Open the file in WAL mode (see e.g. https://stackoverflow.com/questions/9671490) - # This allows to have as many readers as one wants, and a concurrent writer (up to one) - # Note that this writes on a journal, on a different packs.idx-wal, - # and also creates a packs.idx-shm file. - # Note also that when the session is created, you will keep reading from the same version, - # so you need to close and reload the session to see the newly written data. - # Docs on WAL: https://www.sqlite.org/wal.html - cursor = dbapi_connection.cursor() - cursor.execute("PRAGMA journal_mode=wal;") - cursor.close() - - # For this binding, see background on - # https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#serializable-isolation-savepoints-transactional-ddl - @event.listens_for(engine, "begin") - def do_begin(conn): # pylint: disable=unused-variable - # emit our own BEGIN - conn.execute("BEGIN") - - if create: - # Create all tables in the engine. This is equivalent to "Create Table" - # statements in raw SQL. - Base.metadata.create_all(engine) - - # Bind the engine to the metadata of the Base class so that the - # declaratives can be accessed through a DBSession instance - Base.metadata.bind = engine - - # We set autoflush = False to avoid to lock the DB if just doing queries/reads - DBSession = sessionmaker( # pylint: disable=invalid-name - bind=engine, autoflush=False, autocommit=False + return get_session( + self._get_pack_index_path(), + create=create, + raise_if_missing=raise_if_missing, ) - session = DBSession() - - return session def _get_cached_session(self) -> Session: """Return the SQLAlchemy session to access the SQLite file, @@ -630,26 +591,24 @@ def _get_objects_stream_meta_generator( # pylint: disable=too-many-branches,too # Operate in chunks, due to the SQLite limits # (see comment above the definition of self._IN_SQL_MAX_LENGTH) for chunk in chunk_iterator(hashkeys_set, size=self._IN_SQL_MAX_LENGTH): - query = ( - session.query(Obj) - .filter(Obj.hashkey.in_(chunk)) - .with_entities( - Obj.pack_id, - Obj.hashkey, - Obj.offset, - Obj.length, - Obj.compressed, - Obj.size, - ) - ) - for res in query: + stmt = select( + Obj.pack_id, + Obj.hashkey, + Obj.offset, + Obj.length, + Obj.compressed, + Obj.size, + ).where(Obj.hashkey.in_(chunk)) + for res in session.execute(stmt): packs[res[0]].append( ObjQueryResults(res[1], res[2], res[3], res[4], res[5]) ) else: sorted_hashkeys = sorted(hashkeys_set) pack_iterator = session.execute( - "SELECT pack_id, hashkey, offset, length, compressed, size FROM db_object ORDER BY hashkey" + text( + "SELECT pack_id, hashkey, offset, length, compressed, size FROM db_object ORDER BY hashkey" + ) ) # The left_key returns the second element of the tuple, i.e. the hashkey (that is the value to compare # with the right iterator) @@ -767,26 +726,24 @@ def _get_objects_stream_meta_generator( # pylint: disable=too-many-branches,too for chunk in chunk_iterator( loose_not_found, size=self._IN_SQL_MAX_LENGTH ): - query = ( - session.query(Obj) - .filter(Obj.hashkey.in_(chunk)) - .with_entities( - Obj.pack_id, - Obj.hashkey, - Obj.offset, - Obj.length, - Obj.compressed, - Obj.size, - ) - ) - for res in query: + stmt = select( + Obj.pack_id, + Obj.hashkey, + Obj.offset, + Obj.length, + Obj.compressed, + Obj.size, + ).where(Obj.hashkey.in_(chunk)) + for res in session.execute(stmt): packs[res[0]].append( ObjQueryResults(res[1], res[2], res[3], res[4], res[5]) ) else: sorted_hashkeys = sorted(loose_not_found) pack_iterator = session.execute( - "SELECT pack_id, hashkey, offset, length, compressed, size FROM db_object ORDER BY hashkey" + text( + "SELECT pack_id, hashkey, offset, length, compressed, size FROM db_object ORDER BY hashkey" + ) ) # The left_key returns the second element of the tuple, i.e. the hashkey (that is the value to compare # with the right iterator) @@ -1077,7 +1034,9 @@ def count_objects(self) -> Dict[str, int]: Also return a number of packs under 'pack_files'.""" retval = {} - number_packed = self._get_cached_session().query(Obj).count() + number_packed = self._get_cached_session().scalar( + select(func.count()).select_from(Obj) + ) retval["packed"] = number_packed retval["loose"] = sum(1 for _ in self._list_loose()) @@ -1137,12 +1096,12 @@ def get_total_size(self) -> Dict[str, int]: session = self._get_cached_session() # COALESCE is used to return 0 if there are no results, rather than None # SQL's COALESCE returns the first non-null result - retval["total_size_packed"] = session.query( - func.coalesce(func.sum(Obj.size), 0).label("total_size_packed") - ).all()[0][0] - retval["total_size_packed_on_disk"] = session.query( - func.coalesce(func.sum(Obj.length), 0).label("total_length_packed") - ).all()[0][0] + retval["total_size_packed"] = session.scalar( + select(func.coalesce(func.sum(Obj.size), 0).label("total_size_packed")) + ) + retval["total_size_packed_on_disk"] = session.scalar( + select(func.coalesce(func.sum(Obj.length), 0).label("total_length_packed")) + ) total_size_packfiles_on_disk = 0 for pack_id in list(self._list_packs()): @@ -1251,14 +1210,14 @@ def list_all_objects(self) -> Iterator[str]: # after this call was called. It would be bad instead to miss an object that has always existed last_pk = -1 while True: - results_chunk = ( - session.query(Obj) - .filter(Obj.id > last_pk) + + stmt = ( + select(Obj.id, Obj.hashkey) + .where(Obj.id > last_pk) .order_by(Obj.id) .limit(yield_per_size) - .with_entities(Obj.id, Obj.hashkey) - .all() ) + results_chunk = session.execute(stmt).all() for _, hashkey in results_chunk: # I need to use a comma because I want to create a tuple @@ -1365,17 +1324,13 @@ def pack_all_loose( # pylint: disable=too-many-locals,too-many-branches if len(loose_objects) <= self._MAX_CHUNK_ITERATE_LENGTH: for chunk in chunk_iterator(loose_objects, size=self._IN_SQL_MAX_LENGTH): # I check the hash keys that are already in the pack - for res in ( - session.query(Obj) - .filter(Obj.hashkey.in_(chunk)) - .with_entities(Obj.hashkey) - .all() - ): + stmt = select(Obj.hashkey).where(Obj.hashkey.in_(chunk)) + for res in session.execute(stmt): existing_packed_hashkeys.append(res[0]) else: sorted_hashkeys = sorted(loose_objects) pack_iterator = session.execute( - "SELECT hashkey FROM db_object ORDER BY hashkey" + text("SELECT hashkey FROM db_object ORDER BY hashkey") ) # The query returns a tuple of length 1, so I still need a left_key @@ -1597,7 +1552,7 @@ def add_streamed_objects_to_pack( # pylint: disable=too-many-locals, too-many-b if no_holes: if callback: - total = session.query(Obj).count() + total = session.scalar(select(func.count()).select_from(Obj)) if total: # If we have a callback, compute the total count of objects in this pack callback( @@ -1615,14 +1570,14 @@ def add_streamed_objects_to_pack( # pylint: disable=too-many-locals, too-many-b # As this is expensive, I will do it only if it is needed, i.e. when no_holes is True last_pk = -1 while True: - results_chunk = ( - session.query(Obj) - .filter(Obj.id > last_pk) + + stmt = ( + select(Obj.id, Obj.hashkey) + .where(Obj.id > last_pk) .order_by(Obj.id) .limit(yield_per_size) - .with_entities(Obj.id, Obj.hashkey) - .all() ) + results_chunk = session.execute(stmt).all() if not results_chunk: # No more packed objects @@ -1893,10 +1848,15 @@ def _vacuum(self) -> None: (See also description in issue #94). """ - engine = self._get_cached_session().get_bind() - engine.execute("VACUUM") + # VACUUM cannot be performed from within a transaction + # see: https://github.com/sqlalchemy/sqlalchemy/discussions/6959 + session = self._get_cached_session() + session.execute(text("COMMIT")) + session.execute(text("VACUUM")) + # ensure sqlalchemy knows to open a new transaction for the next execution + session.commit() - def clean_storage( # pylint: disable=too-many-branches + def clean_storage( # pylint: disable=too-many-branches,too-many-locals self, vacuum: bool = False ) -> None: """Perform some maintenance clean-up of the container. @@ -1991,17 +1951,13 @@ def clean_storage( # pylint: disable=too-many-branches if len(loose_objects) <= self._MAX_CHUNK_ITERATE_LENGTH: for chunk in chunk_iterator(loose_objects, size=self._IN_SQL_MAX_LENGTH): # I check the hash keys that are already in the pack - for res in ( - session.query(Obj) - .filter(Obj.hashkey.in_(chunk)) - .with_entities(Obj.hashkey) - .all() - ): - existing_packed_hashkeys.append(res[0]) + stmt = select(Obj.hashkey).where(Obj.hashkey.in_(chunk)) + for row in session.execute(stmt): + existing_packed_hashkeys.append(row[0]) else: sorted_hashkeys = sorted(loose_objects) pack_iterator = session.execute( - "SELECT hashkey FROM db_object ORDER BY hashkey" + text("SELECT hashkey FROM db_object ORDER BY hashkey") ) # The query returns a tuple of length 1, so I still need a left_key @@ -2085,7 +2041,7 @@ def import_objects( # pylint: disable=too-many-locals,too-many-statements,too-m # NOTE: I need to wrap in the `yield_first_element` iterator since it returns a list of lists sorted_packed = yield_first_element( self._get_cached_session().execute( - "SELECT hashkey FROM db_object ORDER BY hashkey" + text("SELECT hashkey FROM db_object ORDER BY hashkey") ) ) sorted_existing = merge_sorted(sorted_loose, sorted_packed) @@ -2349,7 +2305,9 @@ def callback(self, action, value): if callback: # If we have a callback, compute the total count of objects in this pack - total = session.query(Obj).filter(Obj.pack_id == pack_id).count() + total = session.scalar( + select(func.count()).select_from(Obj).where(Obj.pack_id == pack_id) + ) callback( action="init", value={"total": total, "description": f"Pack {pack_id}"}, @@ -2364,14 +2322,12 @@ def callback(self, action, value): pack_path = self._get_pack_path_from_pack_id(str(pack_id)) current_pos = 0 with open(pack_path, mode="rb") as pack_handle: - query = ( - session.query( - Obj.hashkey, Obj.size, Obj.offset, Obj.length, Obj.compressed - ) - .filter(Obj.pack_id == pack_id) + stmt = ( + select(Obj.hashkey, Obj.size, Obj.offset, Obj.length, Obj.compressed) + .where(Obj.pack_id == pack_id) .order_by(Obj.offset) ) - for hashkey, size, offset, length, compressed in query: + for hashkey, size, offset, length, compressed in session.execute(stmt): obj_reader: StreamSeekBytesType = PackedObjectReader( fhandle=pack_handle, offset=offset, length=length ) @@ -2440,7 +2396,7 @@ def validate( session = self._get_cached_session() all_pack_ids = sorted( - {res[0] for res in session.query(Obj).with_entities(Obj.pack_id).distinct()} + {res[0] for res in session.execute(select(Obj.pack_id).distinct())} ) for pack_id in all_pack_ids: @@ -2515,14 +2471,19 @@ def delete_objects(self, hashkeys: List[str]) -> List[Union[str, Any]]: # Operate in chunks, due to the SQLite limits # (see comment above the definition of self._IN_SQL_MAX_LENGTH) for chunk in chunk_iterator(hashkeys, size=self._IN_SQL_MAX_LENGTH): - query = session.query(Obj.hashkey).filter(Obj.hashkey.in_(chunk)) - deleted_this_chunk = [res[0] for res in query] + results = session.execute(select(Obj.hashkey).where(Obj.hashkey.in_(chunk))) + deleted_this_chunk = [res[0] for res in results] # I need to specify either `False` or `'fetch'` # otherwise one gets 'sqlalchemy.exc.InvalidRequestError: Could not evaluate current criteria in Python' # `'fetch'` will run the query twice so it's less efficient # False is beter but one needs to either `expire_all` at the end, or commit. # I will commit at the end. - query.delete(synchronize_session=False) + stmt = ( + delete(Obj) + .where(Obj.hashkey.in_(chunk)) + .execution_options(synchronize_session=False) + ) + session.execute(stmt) deleted_packed.update(deleted_this_chunk) session.commit() @@ -2579,9 +2540,10 @@ def repack_pack( ) session = self._get_cached_session() - one_object_in_pack = ( - session.query(Obj.id).filter(Obj.pack_id == pack_id).limit(1).all() - ) + + one_object_in_pack = session.execute( + select(Obj.id).where(Obj.pack_id == pack_id).limit(1) + ).all() if not one_object_in_pack: # No objects. Clean up the pack file, if it exists. if os.path.exists(self._get_pack_path_from_pack_id(pack_id)): @@ -2595,8 +2557,8 @@ def repack_pack( str(self._REPACK_PACK_ID), allow_repack_pack=True ) as write_pack_handle: with open(self._get_pack_path_from_pack_id(pack_id), "rb") as read_pack: - query = ( - session.query( + stmt = ( + select( Obj.id, Obj.hashkey, Obj.size, @@ -2604,10 +2566,12 @@ def repack_pack( Obj.length, Obj.compressed, ) - .filter(Obj.pack_id == pack_id) + .where(Obj.pack_id == pack_id) .order_by(Obj.offset) ) - for rowid, hashkey, size, offset, length, compressed in query: + for rowid, hashkey, size, offset, length, compressed in session.execute( + stmt + ): # Since I am assuming above that the method is `KEEP`, I will just transfer # the bytes. Otherwise I have to properly take into account compression in the # source and in the destination. @@ -2655,9 +2619,9 @@ def repack_pack( # Now we can safely delete the old object. I just check that there is no object still # refencing the old pack, to be sure. - one_object_in_pack = ( - session.query(Obj.id).filter(Obj.pack_id == pack_id).limit(1).all() - ) + one_object_in_pack = session.execute( + select(Obj.id).where(Obj.pack_id == pack_id).limit(1) + ).all() assert not one_object_in_pack, ( "I moved the objects of pack '{pack_id}' to pack '{repack_id}' " "but there are still references to pack '{pack_id}'!".format( @@ -2679,8 +2643,10 @@ def repack_pack( # Before deleting the source (pack -1) I need now to update again all # entries to point to the correct pack id - session.query(Obj).filter(Obj.pack_id == self._REPACK_PACK_ID).update( - {Obj.pack_id: pack_id} + session.execute( + update(Obj) + .where(Obj.pack_id == self._REPACK_PACK_ID) + .values(pack_id=pack_id) ) session.commit() diff --git a/disk_objectstore/database.py b/disk_objectstore/database.py new file mode 100644 index 0000000..73acc06 --- /dev/null +++ b/disk_objectstore/database.py @@ -0,0 +1,94 @@ +"""Models for the container index file (SQLite DB).""" +import os +from typing import Optional + +from sqlalchemy import Boolean, Column, Integer, String, create_engine, event +from sqlalchemy.orm import declarative_base, sessionmaker +from sqlalchemy.orm.session import Session +from sqlalchemy.sql.expression import text + +Base = declarative_base() # pylint: disable=invalid-name,useless-suppression + + +class Obj(Base): # pylint: disable=too-few-public-methods + """The main (and only) table to store object metadata (hashkey, offset, length, ...).""" + + __tablename__ = "db_object" + + id = Column(Integer, primary_key=True) # pylint: disable=invalid-name + + # Important: there are parts of the code that rely on the fact that this field is unique. + # If you really do not want a uniqueness field, you will need to adapt the code. + hashkey = Column(String, nullable=False, unique=True, index=True) + compressed = Column(Boolean, nullable=False) + size = Column( + Integer, nullable=False + ) # uncompressed size; if uncompressed, size == length + offset = Column(Integer, nullable=False) + length = Column(Integer, nullable=False) + pack_id = Column( + Integer, nullable=False + ) # integer ID of the pack in which this entry is stored + + +def get_session( + path: str, create: bool = False, raise_if_missing: bool = False +) -> Optional[Session]: + """Return a new session to connect to the pack-index SQLite DB. + + :param create: if True, creates the sqlite file and schema. + :param raise_if_missing: ignored if create==True. If create==False, and the index file + is missing, either raise an exception (FileNotFoundError) if this flag is True, or return None + """ + if not create and not os.path.exists(path): + if raise_if_missing: + raise FileNotFoundError("Pack index does not exist") + return None + + engine = create_engine(f"sqlite:///{path}", future=True) + + # For the next two bindings, see background on + # https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#serializable-isolation-savepoints-transactional-ddl + @event.listens_for(engine, "connect") + def do_connect(dbapi_connection, _): + """Hook function that is called upon connection. + + It modifies the default behavior of SQLite to use WAL and to + go back to the 'default' isolation level mode. + """ + # disable pysqlite's emitting of the BEGIN statement entirely. + # also stops it from emitting COMMIT before any DDL. + dbapi_connection.isolation_level = None + # Open the file in WAL mode (see e.g. https://stackoverflow.com/questions/9671490) + # This allows to have as many readers as one wants, and a concurrent writer (up to one) + # Note that this writes on a journal, on a different packs.idx-wal, + # and also creates a packs.idx-shm file. + # Note also that when the session is created, you will keep reading from the same version, + # so you need to close and reload the session to see the newly written data. + # Docs on WAL: https://www.sqlite.org/wal.html + cursor = dbapi_connection.cursor() + cursor.execute("PRAGMA journal_mode=wal;") + cursor.close() + + # For this binding, see background on + # https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#serializable-isolation-savepoints-transactional-ddl + @event.listens_for(engine, "begin") + def do_begin(conn): # pylint: disable=unused-variable + # emit our own BEGIN + conn.execute(text("BEGIN")) + + if create: + # Create all tables in the engine. This is equivalent to "Create Table" + # statements in raw SQL. + Base.metadata.create_all(engine) + + # Bind the engine to the metadata of the Base class so that the + # declaratives can be accessed through a DBSession instance + Base.metadata.bind = engine + + # We set autoflush = False to avoid to lock the DB if just doing queries/reads + session = sessionmaker( + bind=engine, autoflush=False, autocommit=False, future=True + )() + + return session diff --git a/disk_objectstore/models.py b/disk_objectstore/models.py deleted file mode 100644 index f724b93..0000000 --- a/disk_objectstore/models.py +++ /dev/null @@ -1,26 +0,0 @@ -"""Models for the container index file (SQLite DB).""" -from sqlalchemy import Boolean, Column, Integer, String -from sqlalchemy.ext.declarative import declarative_base - -Base = declarative_base() # pylint: disable=invalid-name,useless-suppression - - -class Obj(Base): # type: ignore # pylint: disable=too-few-public-methods - """The main (and only) table to store object metadata (hashkey, offset, length, ...).""" - - __tablename__ = "db_object" - - id = Column(Integer, primary_key=True) # pylint: disable=invalid-name - - # Important: there are parts of the code that rely on the fact that this field is unique. - # If you really do not want a uniqueness field, you will need to adapt the code. - hashkey = Column(String, nullable=False, unique=True, index=True) - compressed = Column(Boolean, nullable=False) - size = Column( - Integer, nullable=False - ) # uncompressed size; if uncompressed, size == length - offset = Column(Integer, nullable=False) - length = Column(Integer, nullable=False) - pack_id = Column( - Integer, nullable=False - ) # integer ID of the pack in which this entry is stored diff --git a/disk_objectstore/utils.py b/disk_objectstore/utils.py index 0e972a6..aedcdf2 100644 --- a/disk_objectstore/utils.py +++ b/disk_objectstore/utils.py @@ -19,14 +19,13 @@ Iterable, Iterator, Optional, + Sequence, Tuple, Type, Union, ) from zlib import error -from sqlalchemy.engine.result import ResultProxy - from .exceptions import ClosingNotAllowed, ModificationNotAllowed try: @@ -1265,7 +1264,7 @@ def detect_where_sorted( # pylint: disable=too-many-branches, too-many-statemen now_left = new_now_left -def yield_first_element(iterator: Union[ResultProxy, zip]) -> Iterator[Union[str, int]]: +def yield_first_element(iterator: Iterable[Sequence]) -> Iterator[Any]: """Given an iterator that returns a tuple, return an iterator that yields only the first element of the tuple.""" for elem in iterator: yield elem[0] diff --git a/performance-benchmarks/validation-calls/performance.txt b/performance-benchmarks/validation-calls/performance.txt index 7974dcb..bd32bc6 100644 --- a/performance-benchmarks/validation-calls/performance.txt +++ b/performance-benchmarks/validation-calls/performance.txt @@ -3,7 +3,7 @@ from sqlalchemy import func from tqdm import tqdm -from disk_objectstore.models import Obj +from disk_objectstore.database import Obj from disk_objectstore import Container from disk_objectstore.utils import PackedObjectReader, StreamDecompresser, get_hash c = Container('test-newrepo-sdb/') diff --git a/pyproject.toml b/pyproject.toml index a1b0cf3..a5a3afa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,3 +32,4 @@ check_untyped_defs = true scripts_are_modules = true warn_unused_ignores = true warn_redundant_casts = true +plugins = ["sqlalchemy.ext.mypy.plugin"] diff --git a/requirements.lock b/requirements.lock index 16bc686..d4787bb 100644 --- a/requirements.lock +++ b/requirements.lock @@ -20,6 +20,8 @@ distlib==0.3.2 # via virtualenv filelock==3.0.12 # via virtualenv +greenlet==1.1.1 + # via sqlalchemy identify==2.2.13 # via pre-commit importlib-metadata==4.7.1 @@ -28,6 +30,7 @@ importlib-metadata==4.7.1 # pluggy # pre-commit # pytest + # sqlalchemy # virtualenv iniconfig==1.1.1 # via pytest @@ -68,7 +71,7 @@ pyyaml==5.4.1 # via pre-commit six==1.16.0 # via virtualenv -sqlalchemy==1.3.24 +sqlalchemy==1.4.23 # via disk-objectstore (setup.py) toml==0.10.2 # via diff --git a/setup.cfg b/setup.cfg index 3fc4a54..a101fb3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -26,7 +26,7 @@ keywords = object store, repository, file store [options] packages = find: install_requires = - sqlalchemy<1.4 + sqlalchemy~=1.4.22 typing-extensions;python_version < '3.8' python_requires = ~=3.7 include_package_data = True diff --git a/tests/concurrent_tests/periodic_worker.py b/tests/concurrent_tests/periodic_worker.py index 962826b..b65dbc8 100755 --- a/tests/concurrent_tests/periodic_worker.py +++ b/tests/concurrent_tests/periodic_worker.py @@ -20,9 +20,10 @@ import click import psutil +from sqlalchemy.sql.expression import select from disk_objectstore.container import Container, NotExistent, ObjectType -from disk_objectstore.models import Obj +from disk_objectstore.database import Obj MAX_RETRIES_NO_PERM = 1000 @@ -348,19 +349,15 @@ def main( session = ( container._get_cached_session() # pylint: disable=protected-access ) - query = ( - session.query(Obj) - .filter(Obj.hashkey == key) - .with_entities( - Obj.pack_id, - Obj.hashkey, - Obj.offset, - Obj.length, - Obj.compressed, - Obj.size, - ) - ) - print(list(query)) + stmt = select( + Obj.pack_id, + Obj.hashkey, + Obj.offset, + Obj.length, + Obj.compressed, + Obj.size, + ).where(Obj.hashkey == key) + print(list(session.execute(stmt))) raise diff --git a/tests/test_container.py b/tests/test_container.py index 0a167a6..a310384 100644 --- a/tests/test_container.py +++ b/tests/test_container.py @@ -14,7 +14,7 @@ import pytest import disk_objectstore.exceptions as exc -from disk_objectstore import CompressMode, Container, ObjectType, models, utils +from disk_objectstore import CompressMode, Container, ObjectType, database, utils COMPRESSION_ALGORITHMS_TO_TEST = ["zlib+1", "zlib+9"] @@ -2260,9 +2260,9 @@ def test_validate_overlapping_packed(temp_container): # pylint: disable=invalid assert not any(errors.values()) # Change the offset of the second object so that it's overlapping - temp_container._get_cached_session().query(models.Obj).filter( - models.Obj.hashkey == hashkey_second - ).update({models.Obj.offset: models.Obj.offset - 1}) + temp_container._get_cached_session().query(database.Obj).filter( + database.Obj.hashkey == hashkey_second + ).update({database.Obj.offset: database.Obj.offset - 1}) errors = temp_container.validate() problems = errors.pop("overlapping_packed") diff --git a/tox.ini b/tox.ini index e05fd5e..f80eeb8 100644 --- a/tox.ini +++ b/tox.ini @@ -16,4 +16,6 @@ extras = dev deps = black +setenv = + SQLALCHEMY_WARN_20 = 1 commands = pytest {posargs}