diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index aa07afaf39..541518a9c7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -103,6 +103,7 @@ repos: aiida/storage/psql_dos/backend.py| aiida/storage/psql_dos/orm/querybuilder/.*py| aiida/storage/psql_dos/utils.py| + aiida/storage/sqlite_zip/.*.py| aiida/tools/graph/graph_traversers.py| aiida/tools/groups/paths.py| aiida/tools/archive/.*py| diff --git a/aiida/cmdline/commands/cmd_archive.py b/aiida/cmdline/commands/cmd_archive.py index 2fdb40f933..94728e3012 100644 --- a/aiida/cmdline/commands/cmd_archive.py +++ b/aiida/cmdline/commands/cmd_archive.py @@ -11,18 +11,19 @@ """`verdi archive` command.""" from enum import Enum import logging +from pathlib import Path import traceback from typing import List, Tuple import urllib.request import click from click_spinner import spinner -import tabulate from aiida.cmdline.commands.cmd_verdi import verdi from aiida.cmdline.params import arguments, options from aiida.cmdline.params.types import GroupParamType, PathOrUrl from aiida.cmdline.utils import decorators, echo +from aiida.common.exceptions import CorruptStorage, IncompatibleStorageSchema, UnreachableStorage from aiida.common.links import GraphTraversalRules from aiida.common.log import AIIDA_LOGGER @@ -36,66 +37,68 @@ def verdi_archive(): """Create, inspect and import AiiDA archives.""" -@verdi_archive.command('inspect') +@verdi_archive.command('version') +@click.argument('path', nargs=1, type=click.Path(exists=True, readable=True)) +def archive_version(path): + """Print the current version of an archive's schema.""" + # note: this mirrors `cmd_storage:storage_version` + # it is currently hardcoded to the `SqliteZipBackend`, but could be generalized in the future + from aiida.storage.sqlite_zip.backend import SqliteZipBackend + storage_cls = SqliteZipBackend + profile = storage_cls.create_profile(path) + head_version = storage_cls.version_head() + try: + profile_version = storage_cls.version_profile(profile) + except (UnreachableStorage, CorruptStorage) as exc: + echo.echo_critical(f'archive file version unreadable: {exc}') + echo.echo(f'Latest archive schema version: {head_version!r}') + echo.echo(f'Archive schema version of {Path(path).name!r}: {profile_version!r}') + + +@verdi_archive.command('info') +@click.argument('path', nargs=1, type=click.Path(exists=True, readable=True)) +@click.option('--statistics', is_flag=True, help='Provides more in-detail statistically relevant data.') +def archive_info(path, statistics): + """Summarise the contents of an archive.""" + # note: this mirrors `cmd_storage:storage_info` + # it is currently hardcoded to the `SqliteZipBackend`, but could be generalized in the future + from aiida.storage.sqlite_zip.backend import SqliteZipBackend + try: + storage = SqliteZipBackend(SqliteZipBackend.create_profile(path)) + except (UnreachableStorage, CorruptStorage) as exc: + echo.echo_critical(f'archive file unreadable: {exc}') + except IncompatibleStorageSchema as exc: + echo.echo_critical(f'archive version incompatible: {exc}') + with spinner(): + try: + data = storage.get_info(statistics=statistics) + finally: + storage.close() + + echo.echo_dictionary(data, sort_keys=False, fmt='yaml') + + +@verdi_archive.command('inspect', hidden=True) @click.argument('archive', nargs=1, type=click.Path(exists=True, readable=True)) @click.option('-v', '--version', is_flag=True, help='Print the archive format version and exit.') @click.option('-m', '--meta-data', is_flag=True, help='Print the meta data contents and exit.') @click.option('-d', '--database', is_flag=True, help='Include information on entities in the database.') -def inspect(archive, version, meta_data, database): +@decorators.deprecated_command( + 'This command has been deprecated and will be removed soon. ' + 'Please call `verdi archive version` or `verdi archive info` instead.\n' +) +@click.pass_context +def inspect(ctx, archive, version, meta_data, database): # pylint: disable=unused-argument """Inspect contents of an archive without importing it. - By default a summary of the archive contents will be printed. - The various options can be used to change exactly what information is displayed. + .. deprecated:: v2.0.0, use `verdi archive version` or `verdi archive info` instead. """ - from aiida.tools.archive.abstract import get_format - from aiida.tools.archive.exceptions import UnreadableArchiveError - - archive_format = get_format() - latest_version = archive_format.latest_version - try: - current_version = archive_format.read_version(archive) - except UnreadableArchiveError as exc: - echo.echo_critical(f'archive file of unknown format: {exc}') - if version: - echo.echo(current_version) - return - - if current_version != latest_version: - echo.echo_critical( - f"Archive version is not the latest: '{current_version}' != '{latest_version}'. " - 'Use `verdi migrate` to upgrade to the latest version' - ) - - with archive_format.open(archive, 'r') as archive_reader: - metadata = archive_reader.get_metadata() - - if meta_data: - echo.echo_dictionary(metadata, sort_keys=False) - return - - statistics = { - name: metadata[key] for key, name in [ - ['export_version', 'Version archive'], - ['aiida_version', 'Version aiida'], - ['compression', 'Compression'], - ['ctime', 'Created'], - ['mtime', 'Modified'], - ] if key in metadata - } - if 'conversion_info' in metadata: - statistics['Conversion info'] = '\n'.join(metadata['conversion_info']) - - echo.echo(tabulate.tabulate(statistics.items())) - - if database: - echo.echo('') - echo.echo('Database statistics') - echo.echo('-------------------') - with spinner(): - with archive_format.open(archive, 'r') as archive_reader: - data = archive_reader.get_backend().get_info(statistics=True) - echo.echo_dictionary(data, sort_keys=False, fmt='yaml') + ctx.invoke(archive_version, path=archive) + elif database: + ctx.invoke(archive_info, path=archive, statistics=True) + else: + ctx.invoke(archive_info, path=archive, statistics=False) @verdi_archive.command('create') @@ -136,7 +139,7 @@ def create( create_backward, return_backward, call_calc_backward, call_work_backward, include_comments, include_logs, include_authinfos, compress, batch_size, test_run ): - """Write subsets of the provenance graph to a single file. + """Create an archive from all or part of a profiles's data. Besides Nodes of the provenance graph, you can archive Groups, Codes, Computers, Comments and Logs. @@ -214,7 +217,7 @@ def create( help='Archive format version to migrate to (defaults to latest version).', ) def migrate(input_file, output_file, force, in_place, version): - """Migrate an export archive to a more recent format version.""" + """Migrate an archive to a more recent schema version.""" from aiida.common.progress_reporter import set_progress_bar_tqdm, set_progress_reporter from aiida.tools.archive.abstract import get_format @@ -248,7 +251,7 @@ def migrate(input_file, output_file, force, in_place, version): f'{error.__class__.__name__}:{error}' ) - echo.echo_success(f'migrated the archive to version {version}') + echo.echo_success(f'migrated the archive to version {version!r}') class ExtrasImportCode(Enum): @@ -333,7 +336,7 @@ def import_archive( ctx, archives, webpages, extras_mode_existing, extras_mode_new, comment_mode, include_authinfos, migration, batch_size, import_group, group, test_run ): - """Import data from an AiiDA archive file. + """Import archived data to a profile. The archive can be specified by its relative or absolute file path, or its HTTP URL. """ @@ -424,12 +427,11 @@ def _import_archive_and_migrate(archive: str, web_based: bool, import_kwargs: di :param archive: the path or URL to the archive :param web_based: If the archive needs to be downloaded first :param import_kwargs: keyword arguments to pass to the import function - :param try_migration: whether to try a migration if the import raises IncompatibleArchiveVersionError + :param try_migration: whether to try a migration if the import raises `IncompatibleStorageSchema` """ from aiida.common.folders import SandboxFolder from aiida.tools.archive.abstract import get_format - from aiida.tools.archive.exceptions import IncompatibleArchiveVersionError from aiida.tools.archive.imports import import_archive as _import_archive archive_format = get_format() @@ -452,7 +454,7 @@ def _import_archive_and_migrate(archive: str, web_based: bool, import_kwargs: di echo.echo_report(f'starting import: {archive}') try: _import_archive(archive_path, archive_format=archive_format, **import_kwargs) - except IncompatibleArchiveVersionError as exception: + except IncompatibleStorageSchema as exception: if try_migration: echo.echo_report(f'incompatible version detected for {archive}, trying migration') diff --git a/aiida/manage/configuration/profile.py b/aiida/manage/configuration/profile.py index fc5e9d96b4..a808efc668 100644 --- a/aiida/manage/configuration/profile.py +++ b/aiida/manage/configuration/profile.py @@ -127,9 +127,9 @@ def storage_cls(self) -> Type['StorageBackend']: if self.storage_backend == 'psql_dos': from aiida.storage.psql_dos.backend import PsqlDosBackend return PsqlDosBackend - if self.storage_backend == 'archive.sqlite': - from aiida.tools.archive.implementations.sqlite.backend import ArchiveReadOnlyBackend - return ArchiveReadOnlyBackend + if self.storage_backend == 'sqlite_zip': + from aiida.storage.sqlite_zip.backend import SqliteZipBackend + return SqliteZipBackend raise ValueError(f'unknown storage backend type: {self.storage_backend}') @property diff --git a/aiida/storage/log.py b/aiida/storage/log.py index 11ef376b36..24a037f442 100644 --- a/aiida/storage/log.py +++ b/aiida/storage/log.py @@ -12,3 +12,4 @@ from aiida.common.log import AIIDA_LOGGER STORAGE_LOGGER = AIIDA_LOGGER.getChild('storage') +MIGRATE_LOGGER = STORAGE_LOGGER.getChild('migrate') diff --git a/aiida/storage/psql_dos/__init__.py b/aiida/storage/psql_dos/__init__.py index eac0048fe9..8bea8e1e03 100644 --- a/aiida/storage/psql_dos/__init__.py +++ b/aiida/storage/psql_dos/__init__.py @@ -7,7 +7,7 @@ # For further information on the license, see the LICENSE.txt file # # For further information please visit http://www.aiida.net # ########################################################################### -"""Module with implementation of the storage backend using SqlAlchemy and the disk-objectstore.""" +"""Module with implementation of the storage backend using PostGreSQL and the disk-objectstore.""" # AUTO-GENERATED diff --git a/aiida/storage/psql_dos/backend.py b/aiida/storage/psql_dos/backend.py index 7f0fe3d59f..683484845a 100644 --- a/aiida/storage/psql_dos/backend.py +++ b/aiida/storage/psql_dos/backend.py @@ -55,7 +55,7 @@ def version_head(cls) -> str: return cls.migrator.get_schema_version_head() @classmethod - def version_profile(cls, profile: Profile) -> None: + def version_profile(cls, profile: Profile) -> Optional[str]: return cls.migrator(profile).get_schema_version_profile(check_legacy=True) @classmethod diff --git a/aiida/storage/psql_dos/migrations/env.py b/aiida/storage/psql_dos/migrations/env.py index 613d237c34..aacf26e98d 100644 --- a/aiida/storage/psql_dos/migrations/env.py +++ b/aiida/storage/psql_dos/migrations/env.py @@ -16,18 +16,8 @@ def run_migrations_online(): The connection should have been passed to the config, which we use to configue the migration context. """ + from aiida.storage.psql_dos.models.base import get_orm_metadata - # pylint: disable=unused-import - from aiida.common.exceptions import DbContentError - from aiida.storage.psql_dos.models.authinfo import DbAuthInfo - from aiida.storage.psql_dos.models.base import Base - from aiida.storage.psql_dos.models.comment import DbComment - from aiida.storage.psql_dos.models.computer import DbComputer - from aiida.storage.psql_dos.models.group import DbGroup - from aiida.storage.psql_dos.models.log import DbLog - from aiida.storage.psql_dos.models.node import DbLink, DbNode - from aiida.storage.psql_dos.models.settings import DbSetting - from aiida.storage.psql_dos.models.user import DbUser config = context.config # pylint: disable=no-member connection = config.attributes.get('connection', None) @@ -43,7 +33,7 @@ def run_migrations_online(): context.configure( # pylint: disable=no-member connection=connection, - target_metadata=Base.metadata, + target_metadata=get_orm_metadata(), transaction_per_migration=True, aiida_profile=aiida_profile, on_version_apply=on_version_apply diff --git a/aiida/storage/psql_dos/migrator.py b/aiida/storage/psql_dos/migrator.py index ef97683548..fc85d30bf8 100644 --- a/aiida/storage/psql_dos/migrator.py +++ b/aiida/storage/psql_dos/migrator.py @@ -33,6 +33,7 @@ from aiida.common import exceptions from aiida.manage.configuration.profile import Profile +from aiida.storage.log import MIGRATE_LOGGER from aiida.storage.psql_dos.models.settings import DbSetting from aiida.storage.psql_dos.utils import create_sqlalchemy_engine @@ -197,8 +198,6 @@ def migrate(self) -> None: :raises: :class:`~aiida.common.exceptions.UnreachableStorage` if the storage cannot be accessed """ - from aiida.cmdline.utils import echo - # the database can be in one of a few states: # 1. Completely empty -> we can simply initialise it with the current ORM schema # 2. Legacy django database -> we transfer the version to alembic, migrate to the head of the django branch, @@ -211,7 +210,7 @@ def migrate(self) -> None: if not inspect(connection).has_table(self.alembic_version_tbl_name): if not inspect(connection).has_table(self.django_version_table.name): # the database is assumed to be empty, so we need to initialise it - echo.echo_report('initialising empty storage schema') + MIGRATE_LOGGER.report('initialising empty storage schema') self.initialise() return # the database is a legacy django one, @@ -238,10 +237,10 @@ def migrate(self) -> None: if 'django' in branches or 'sqlalchemy' in branches: # migrate up to the top of the respective legacy branches if 'django' in branches: - echo.echo_report('Migrating to the head of the legacy django branch') + MIGRATE_LOGGER.report('Migrating to the head of the legacy django branch') self.migrate_up('django@head') elif 'sqlalchemy' in branches: - echo.echo_report('Migrating to the head of the legacy sqlalchemy branch') + MIGRATE_LOGGER.report('Migrating to the head of the legacy sqlalchemy branch') self.migrate_up('sqlalchemy@head') # now re-stamp with the comparable revision on the main branch with self._connection_context() as connection: @@ -251,7 +250,7 @@ def migrate(self) -> None: connection.commit() # finally migrate to the main head revision - echo.echo_report('Migrating to the head of the main branch') + MIGRATE_LOGGER.report('Migrating to the head of the main branch') self.migrate_up('main@head') def migrate_up(self, version: str) -> None: @@ -284,7 +283,7 @@ def _alembic_script(cls): return ScriptDirectory.from_config(cls._alembic_config()) @contextlib.contextmanager - def _alembic_connect(self, _connection: Optional[Connection] = None): + def _alembic_connect(self, _connection: Optional[Connection] = None) -> Iterator[Config]: """Context manager to return an instance of an Alembic configuration. The profiles's database connection is added in the `attributes` property, through which it can then also be @@ -297,16 +296,15 @@ def _alembic_connect(self, _connection: Optional[Connection] = None): def _callback(step: MigrationInfo, **kwargs): # pylint: disable=unused-argument """Callback to be called after a migration step is executed.""" - from aiida.cmdline.utils import echo from_rev = step.down_revision_ids[0] if step.down_revision_ids else '' - echo.echo_report(f'- {from_rev} -> {step.up_revision_id}') + MIGRATE_LOGGER.report(f'- {from_rev} -> {step.up_revision_id}') config.attributes['on_version_apply'] = _callback # pylint: disable=unsupported-assignment-operation yield config @contextlib.contextmanager - def _migration_context(self, _connection: Optional[Connection] = None) -> MigrationContext: + def _migration_context(self, _connection: Optional[Connection] = None) -> Iterator[MigrationContext]: """Context manager to return an instance of an Alembic migration context. This migration context will have been configured with the current database connection, which allows this context diff --git a/aiida/storage/sqlite_zip/__init__.py b/aiida/storage/sqlite_zip/__init__.py new file mode 100644 index 0000000000..d79b5e11c6 --- /dev/null +++ b/aiida/storage/sqlite_zip/__init__.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +"""Module with implementation of the storage backend, +using an SQLite database and repository files, within a zipfile. + +The content of the zip file is:: + + |- storage.zip + |- metadata.json + |- db.sqlite3 + |- repo/ + |- hashkey1 + |- hashkey2 + ... + +For quick access, the metadata (such as the version) is stored in a `metadata.json` file, +at the "top" of the zip file, with the sqlite database, just below it, then the repository files. +Repository files are named by their SHA256 content hash. + +This storage method is primarily intended for the AiiDA archive, +as a read-only storage method. +This is because sqlite and zip are not suitable for concurrent write access. + +The archive format originally used a JSON file to store the database, +and these revisions are handled by the `version_profile` and `migrate` backend methods. +""" diff --git a/aiida/storage/sqlite_zip/backend.py b/aiida/storage/sqlite_zip/backend.py new file mode 100644 index 0000000000..ff931cdb9d --- /dev/null +++ b/aiida/storage/sqlite_zip/backend.py @@ -0,0 +1,485 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +"""The table models are dynamically generated from the sqlalchemy backend models.""" +from __future__ import annotations + +from contextlib import contextmanager +from functools import singledispatch +from pathlib import Path +import tempfile +from typing import BinaryIO, Iterable, Iterator, Optional, Sequence, Tuple, Type, cast +from zipfile import ZipFile, is_zipfile + +from archive_path import extract_file_in_zip +from sqlalchemy.orm import Session + +from aiida.common.exceptions import AiidaException, ClosedStorage, CorruptStorage +from aiida.manage import Profile +from aiida.orm.entities import EntityTypes +from aiida.orm.implementation import StorageBackend +from aiida.repository.backend.abstract import AbstractRepositoryBackend +from aiida.storage.psql_dos.orm import authinfos, comments, computers, entities, groups, logs, nodes, users +from aiida.storage.psql_dos.orm.querybuilder import SqlaQueryBuilder +from aiida.storage.psql_dos.orm.utils import ModelWrapper + +from . import models +from .migrator import get_schema_version_head, validate_storage +from .utils import DB_FILENAME, REPO_FOLDER, create_sqla_engine, extract_metadata, read_version + + +class SqliteZipBackend(StorageBackend): # pylint: disable=too-many-public-methods + """A read-only backend for a sqlite/zip format. + + The storage format uses an SQLite database and repository files, within a folder or zipfile. + + The content of the folder/zipfile should be:: + + |- metadata.json + |- db.sqlite3 + |- repo/ + |- hashkey1 + |- hashkey2 + ... + + """ + + @classmethod + def version_head(cls) -> str: + return get_schema_version_head() + + @staticmethod + def create_profile(path: str | Path) -> Profile: + """Create a new profile instance for this backend, from the path to the zip file.""" + profile_name = Path(path).name + return Profile( + profile_name, { + 'storage': { + 'backend': 'sqlite_zip', + 'config': { + 'path': str(path) + } + }, + 'process_control': { + 'backend': 'null', + 'config': {} + } + } + ) + + @classmethod + def version_profile(cls, profile: Profile) -> Optional[str]: + return read_version(profile.storage_config['path'], search_limit=None) + + @classmethod + def migrate(cls, profile: Profile): + raise NotImplementedError('use the migrate function directly.') + + def __init__(self, profile: Profile): + super().__init__(profile) + self._path = Path(profile.storage_config['path']) + validate_storage(self._path) + # lazy open the archive zipfile and extract the database file + self._db_file: Optional[Path] = None + self._session: Optional[Session] = None + self._repo: Optional[_RoBackendRepository] = None + self._closed = False + + def __str__(self) -> str: + state = 'closed' if self.is_closed else 'open' + return f'SqliteZip storage (read-only) [{state}] @ {self._path}' + + @property + def is_closed(self) -> bool: + return self._closed + + def close(self): + """Close the backend""" + if self._session: + self._session.close() + if self._db_file and self._db_file.exists(): + self._db_file.unlink() + if self._repo: + self._repo.close() + self._session = None + self._db_file = None + self._repo = None + self._closed = True + + def get_session(self) -> Session: + """Return an SQLAlchemy session.""" + if self._closed: + raise ClosedStorage(str(self)) + if self._session is None: + if is_zipfile(self._path): + _, path = tempfile.mkstemp() + db_file = self._db_file = Path(path) + with db_file.open('wb') as handle: + try: + extract_file_in_zip(self._path, DB_FILENAME, handle, search_limit=4) + except Exception as exc: + raise CorruptStorage(f'database could not be read: {exc}') from exc + else: + db_file = self._path / DB_FILENAME + if not db_file.exists(): + raise CorruptStorage(f'database could not be read: non-existent {db_file}') + self._session = Session(create_sqla_engine(db_file)) + return self._session + + def get_repository(self) -> '_RoBackendRepository': + if self._closed: + raise ClosedStorage(str(self)) + if self._repo is None: + if is_zipfile(self._path): + self._repo = ZipfileBackendRepository(self._path) + elif (self._path / REPO_FOLDER).exists(): + self._repo = FolderBackendRepository(self._path / REPO_FOLDER) + else: + raise CorruptStorage(f'repository could not be read: non-existent {self._path / REPO_FOLDER}') + return self._repo + + def query(self) -> 'SqliteBackendQueryBuilder': + return SqliteBackendQueryBuilder(self) + + def get_backend_entity(self, res): # pylint: disable=no-self-use + """Return the backend entity that corresponds to the given Model instance.""" + klass = get_backend_entity(res) + return klass(self, res) + + @property + def authinfos(self): + return create_backend_collection( + authinfos.SqlaAuthInfoCollection, self, authinfos.SqlaAuthInfo, models.DbAuthInfo + ) + + @property + def comments(self): + return create_backend_collection(comments.SqlaCommentCollection, self, comments.SqlaComment, models.DbComment) + + @property + def computers(self): + return create_backend_collection( + computers.SqlaComputerCollection, self, computers.SqlaComputer, models.DbComputer + ) + + @property + def groups(self): + return create_backend_collection(groups.SqlaGroupCollection, self, groups.SqlaGroup, models.DbGroup) + + @property + def logs(self): + return create_backend_collection(logs.SqlaLogCollection, self, logs.SqlaLog, models.DbLog) + + @property + def nodes(self): + return create_backend_collection(nodes.SqlaNodeCollection, self, nodes.SqlaNode, models.DbNode) + + @property + def users(self): + return create_backend_collection(users.SqlaUserCollection, self, users.SqlaUser, models.DbUser) + + def _clear(self, recreate_user: bool = True) -> None: + raise ReadOnlyError() + + def transaction(self): + raise ReadOnlyError() + + @property + def in_transaction(self) -> bool: + return False + + def bulk_insert(self, entity_type: EntityTypes, rows: list[dict], allow_defaults: bool = False) -> list[int]: + raise ReadOnlyError() + + def bulk_update(self, entity_type: EntityTypes, rows: list[dict]) -> None: + raise ReadOnlyError() + + def delete_nodes_and_connections(self, pks_to_delete: Sequence[int]): + raise ReadOnlyError() + + def get_global_variable(self, key: str): + raise NotImplementedError + + def set_global_variable(self, key: str, value, description: Optional[str] = None, overwrite=True) -> None: + raise ReadOnlyError() + + def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None: + raise NotImplementedError + + def get_info(self, statistics: bool = False) -> dict: + # since extracting the database file is expensive, we only do it if statistics is True + results = {'metadata': extract_metadata(self._path)} + if statistics: + results.update(super().get_info(statistics=statistics)) + results['repository'] = self.get_repository().get_info(statistics) + return results + + +class ReadOnlyError(AiidaException): + """Raised when a write operation is called on a read-only archive.""" + + def __init__(self, msg='sqlite_zip storage is read-only'): # pylint: disable=useless-super-delegation + super().__init__(msg) + + +class _RoBackendRepository(AbstractRepositoryBackend): # pylint: disable=abstract-method + """A backend abstract for a read-only folder or zip file.""" + + def __init__(self, path: str | Path): + """Initialise the repository backend. + + :param path: the path to the zip file + """ + self._path = Path(path) + self._closed = False + + def close(self) -> None: + """Close the repository.""" + self._closed = True + + @property + def uuid(self) -> Optional[str]: + return None + + @property + def key_format(self) -> Optional[str]: + return 'sha256' + + def initialise(self, **kwargs) -> None: + pass + + @property + def is_initialised(self) -> bool: + return True + + def erase(self) -> None: + raise ReadOnlyError() + + def _put_object_from_filelike(self, handle: BinaryIO) -> str: + raise ReadOnlyError() + + def has_objects(self, keys: list[str]) -> list[bool]: + return [self.has_object(key) for key in keys] + + def iter_object_streams(self, keys: list[str]) -> Iterator[Tuple[str, BinaryIO]]: + for key in keys: + with self.open(key) as handle: # pylint: disable=not-context-manager + yield key, handle + + def delete_objects(self, keys: list[str]) -> None: + raise ReadOnlyError() + + def get_object_hash(self, key: str) -> str: + return key + + def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None: + pass + + def get_info(self, statistics: bool = False, **kwargs) -> dict: + return {'objects': {'count': len(list(self.list_objects()))}} + + +class ZipfileBackendRepository(_RoBackendRepository): + """A read-only backend for a zip file. + + The zip file should contain repository files with the key format: ``/``, + i.e. files named by the sha256 hash of the file contents, inside a ```` directory. + """ + + def __init__(self, path: str | Path): + super().__init__(path) + self._folder = REPO_FOLDER + self.__zipfile: None | ZipFile = None + + def close(self) -> None: + if self._zipfile: + self._zipfile.close() + super().close() + + @property + def _zipfile(self) -> ZipFile: + """Return the open zip file.""" + if self._closed: + raise ClosedStorage(f'repository is closed: {self._path}') + if self.__zipfile is None: + try: + self.__zipfile = ZipFile(self._path, mode='r') # pylint: disable=consider-using-with + except Exception as exc: + raise CorruptStorage(f'repository could not be read {self._path}: {exc}') from exc + return self.__zipfile + + def has_object(self, key: str) -> bool: + try: + self._zipfile.getinfo(f'{self._folder}/{key}') + except KeyError: + return False + return True + + def list_objects(self) -> Iterable[str]: + prefix = f'{self._folder}/' + prefix_len = len(prefix) + for name in self._zipfile.namelist(): + if name.startswith(prefix) and name[prefix_len:]: + yield name[prefix_len:] + + @contextmanager + def open(self, key: str) -> Iterator[BinaryIO]: + try: + handle = self._zipfile.open(f'{self._folder}/{key}') + yield cast(BinaryIO, handle) + except KeyError: + raise FileNotFoundError(f'object with key `{key}` does not exist.') + finally: + handle.close() + + +class FolderBackendRepository(_RoBackendRepository): + """A read-only backend for a folder. + + The folder should contain repository files, named by the sha256 hash of the file contents. + """ + + def has_object(self, key: str) -> bool: + return self._path.joinpath(key).is_file() + + def list_objects(self) -> Iterable[str]: + for subpath in self._path.iterdir(): + if subpath.is_file(): + yield subpath.name + + @contextmanager + def open(self, key: str) -> Iterator[BinaryIO]: + if not self._path.joinpath(key).is_file(): + raise FileNotFoundError(f'object with key `{key}` does not exist.') + with self._path.joinpath(key).open('rb') as handle: + yield handle + + +class SqliteBackendQueryBuilder(SqlaQueryBuilder): + """Archive query builder""" + + @property + def Node(self): + return models.DbNode + + @property + def Link(self): + return models.DbLink + + @property + def Computer(self): + return models.DbComputer + + @property + def User(self): + return models.DbUser + + @property + def Group(self): + return models.DbGroup + + @property + def AuthInfo(self): + return models.DbAuthInfo + + @property + def Comment(self): + return models.DbComment + + @property + def Log(self): + return models.DbLog + + @property + def table_groups_nodes(self): + return models.DbGroupNodes.__table__ # type: ignore[attr-defined] # pylint: disable=no-member + + +def create_backend_cls(base_class, model_cls): + """Create an archive backend class for the given model class.""" + + class ReadOnlyEntityBackend(base_class): # type: ignore + """Backend class for the read-only archive.""" + + MODEL_CLASS = model_cls + + def __init__(self, _backend, model): + """Initialise the backend entity.""" + self._backend = _backend + self._model = ModelWrapper(model, _backend) + + @property + def model(self) -> ModelWrapper: + """Return an ORM model that correctly updates and flushes the data model when getting or setting a field.""" + return self._model + + @property + def bare_model(self): + """Return the underlying SQLAlchemy ORM model for this entity.""" + return self.model._model # pylint: disable=protected-access + + @classmethod + def from_dbmodel(cls, model, _backend): + return cls(_backend, model) + + @property + def is_stored(self): + return True + + def store(self): # pylint: disable=no-self-use + raise ReadOnlyError() + + return ReadOnlyEntityBackend + + +def create_backend_collection(cls, _backend, entity_cls, model): + collection = cls(_backend) + new_cls = create_backend_cls(entity_cls, model) + collection.ENTITY_CLASS = new_cls + return collection + + +@singledispatch +def get_backend_entity(dbmodel) -> Type[entities.SqlaModelEntity]: # pylint: disable=unused-argument + raise TypeError(f'Cannot get backend entity for {dbmodel}') + + +@get_backend_entity.register(models.DbAuthInfo) # type: ignore[call-overload] +def _(dbmodel): + return create_backend_cls(authinfos.SqlaAuthInfo, dbmodel.__class__) + + +@get_backend_entity.register(models.DbComment) # type: ignore[call-overload] +def _(dbmodel): + return create_backend_cls(comments.SqlaComment, dbmodel.__class__) + + +@get_backend_entity.register(models.DbComputer) # type: ignore[call-overload] +def _(dbmodel): + return create_backend_cls(computers.SqlaComputer, dbmodel.__class__) + + +@get_backend_entity.register(models.DbGroup) # type: ignore[call-overload] +def _(dbmodel): + return create_backend_cls(groups.SqlaGroup, dbmodel.__class__) + + +@get_backend_entity.register(models.DbLog) # type: ignore[call-overload] +def _(dbmodel): + return create_backend_cls(logs.SqlaLog, dbmodel.__class__) + + +@get_backend_entity.register(models.DbNode) # type: ignore[call-overload] +def _(dbmodel): + return create_backend_cls(nodes.SqlaNode, dbmodel.__class__) + + +@get_backend_entity.register(models.DbUser) # type: ignore[call-overload] +def _(dbmodel): + return create_backend_cls(users.SqlaUser, dbmodel.__class__) diff --git a/aiida/tools/archive/implementations/sqlite/migrations/__init__.py b/aiida/storage/sqlite_zip/migrations/__init__.py similarity index 90% rename from aiida/tools/archive/implementations/sqlite/migrations/__init__.py rename to aiida/storage/sqlite_zip/migrations/__init__.py index 84dbe1264d..2776a55f97 100644 --- a/aiida/tools/archive/implementations/sqlite/migrations/__init__.py +++ b/aiida/storage/sqlite_zip/migrations/__init__.py @@ -7,4 +7,3 @@ # For further information on the license, see the LICENSE.txt file # # For further information please visit http://www.aiida.net # ########################################################################### -"""Migration archive files from old export versions to newer ones.""" diff --git a/aiida/storage/sqlite_zip/migrations/env.py b/aiida/storage/sqlite_zip/migrations/env.py new file mode 100644 index 0000000000..2ee03a00b2 --- /dev/null +++ b/aiida/storage/sqlite_zip/migrations/env.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +"""Upper level SQLAlchemy migration funcitons.""" +from alembic import context + + +def run_migrations_online(): + """Run migrations in 'online' mode. + + The connection should have been passed to the config, which we use to configue the migration context. + """ + from aiida.storage.sqlite_zip.models import SqliteBase + + config = context.config # pylint: disable=no-member + + connection = config.attributes.get('connection', None) + aiida_profile = config.attributes.get('aiida_profile', None) + on_version_apply = config.attributes.get('on_version_apply', None) + + if connection is None: + from aiida.common.exceptions import ConfigurationError + raise ConfigurationError('An initialized connection is expected for the AiiDA online migrations.') + + context.configure( # pylint: disable=no-member + connection=connection, + target_metadata=SqliteBase.metadata, + transaction_per_migration=True, + aiida_profile=aiida_profile, + on_version_apply=on_version_apply + ) + + context.run_migrations() # pylint: disable=no-member + + +try: + if context.is_offline_mode(): # pylint: disable=no-member + NotImplementedError('This feature is not currently supported.') + + run_migrations_online() +except NameError: + # This will occur in an environment that is just compiling the documentation + pass diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/__init__.py b/aiida/storage/sqlite_zip/migrations/legacy/__init__.py similarity index 94% rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/__init__.py rename to aiida/storage/sqlite_zip/migrations/legacy/__init__.py index 5190ad4d96..f46a36c0bd 100644 --- a/aiida/tools/archive/implementations/sqlite/migrations/legacy/__init__.py +++ b/aiida/storage/sqlite_zip/migrations/legacy/__init__.py @@ -24,8 +24,7 @@ from .v11_to_v12 import migrate_v11_to_v12 # version from -> version to, function which modifies metadata, data in-place -_vtype = Dict[str, Tuple[str, Callable[[dict, dict], None]]] -LEGACY_MIGRATE_FUNCTIONS: _vtype = { +LEGACY_MIGRATE_FUNCTIONS: Dict[str, Tuple[str, Callable[[dict, dict], None]]] = { '0.4': ('0.5', migrate_v4_to_v5), '0.5': ('0.6', migrate_v5_to_v6), '0.6': ('0.7', migrate_v6_to_v7), diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v04_to_v05.py b/aiida/storage/sqlite_zip/migrations/legacy/v04_to_v05.py similarity index 75% rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v04_to_v05.py rename to aiida/storage/sqlite_zip/migrations/legacy/v04_to_v05.py index 2e872db20f..17402b4e85 100644 --- a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v04_to_v05.py +++ b/aiida/storage/sqlite_zip/migrations/legacy/v04_to_v05.py @@ -24,7 +24,27 @@ Where id is a SQLA id and migration-name is the name of the particular migration. """ # pylint: disable=invalid-name -from ..utils import remove_fields, update_metadata, verify_metadata_version # pylint: disable=no-name-in-module +from ..utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module + + +def remove_fields(metadata, data, entities, fields): + """Remove fields under entities from data.json and metadata.json. + + :param metadata: the content of an export archive metadata.json file + :param data: the content of an export archive data.json file + :param entities: list of ORM entities + :param fields: list of fields to be removed from the export archive files + """ + # data.json + for entity in entities: + for content in data['export_data'].get(entity, {}).values(): + for field in fields: + content.pop(field, None) + + # metadata.json + for entity in entities: + for field in fields: + metadata['all_fields_info'][entity].pop(field, None) def migration_drop_node_columns_nodeversion_public(metadata, data): diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v05_to_v06.py b/aiida/storage/sqlite_zip/migrations/legacy/v05_to_v06.py similarity index 98% rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v05_to_v06.py rename to aiida/storage/sqlite_zip/migrations/legacy/v05_to_v06.py index 3f6a7ea9c5..934c03d4c7 100644 --- a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v05_to_v06.py +++ b/aiida/storage/sqlite_zip/migrations/legacy/v05_to_v06.py @@ -31,7 +31,7 @@ def migrate_deserialized_datetime(data, conversion): """Deserialize datetime strings from export archives, meaning to reattach the UTC timezone information.""" - from aiida.tools.archive.exceptions import ArchiveMigrationError + from aiida.common.exceptions import StorageMigrationError ret_data: Union[str, dict, list] @@ -62,7 +62,7 @@ def migrate_deserialized_datetime(data, conversion): # Since we know that all strings will be UTC, here we are simply reattaching that information. ret_data = f'{data}+00:00' else: - raise ArchiveMigrationError(f"Unknown convert_type '{conversion}'") + raise StorageMigrationError(f"Unknown convert_type '{conversion}'") return ret_data diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v06_to_v07.py b/aiida/storage/sqlite_zip/migrations/legacy/v06_to_v07.py similarity index 92% rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v06_to_v07.py rename to aiida/storage/sqlite_zip/migrations/legacy/v06_to_v07.py index 4b764140f6..c76d2f8e0c 100644 --- a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v06_to_v07.py +++ b/aiida/storage/sqlite_zip/migrations/legacy/v06_to_v07.py @@ -46,14 +46,14 @@ def data_migration_legacy_process_attributes(data): `process_state` attribute. If they have it, it is checked whether the state is active or not, if not, the `sealed` attribute is created and set to `True`. - :raises `~aiida.tools.archive.exceptions.CorruptArchive`: if a Node, found to have attributes, + :raises `~aiida.common.exceptions.CorruptStorage`: if a Node, found to have attributes, cannot be found in the list of exported entities. - :raises `~aiida.tools.archive.exceptions.CorruptArchive`: if the 'sealed' attribute does not exist and + :raises `~aiida.common.exceptions.CorruptStorage`: if the 'sealed' attribute does not exist and the ProcessNode is in an active state, i.e. `process_state` is one of ('created', 'running', 'waiting'). A log-file, listing all illegal ProcessNodes, will be produced in the current directory. """ + from aiida.common.exceptions import CorruptStorage from aiida.storage.psql_dos.migrations.utils.integrity import write_database_integrity_violation - from aiida.tools.archive.exceptions import CorruptArchive attrs_to_remove = ['_sealed', '_finished', '_failed', '_aborted', '_do_abort'] active_states = {'created', 'running', 'waiting'} @@ -68,7 +68,7 @@ def data_migration_legacy_process_attributes(data): if process_state in active_states: # The ProcessNode is in an active state, and should therefore never have been allowed # to be exported. The Node will be added to a log that is saved in the working directory, - # then a CorruptArchive will be raised, since the archive needs to be migrated manually. + # then a CorruptStorage will be raised, since the archive needs to be migrated manually. uuid_pk = data['export_data']['Node'][node_pk].get('uuid', node_pk) illegal_cases.append([uuid_pk, process_state]) continue # No reason to do more now @@ -81,7 +81,7 @@ def data_migration_legacy_process_attributes(data): for attr in attrs_to_remove: content.pop(attr, None) except KeyError as exc: - raise CorruptArchive(f'Your export archive is corrupt! Org. exception: {exc}') + raise CorruptStorage(f'Your export archive is corrupt! Org. exception: {exc}') if illegal_cases: headers = ['UUID/PK', 'process_state'] @@ -89,7 +89,7 @@ def data_migration_legacy_process_attributes(data): 'that should never have been allowed to be exported.' write_database_integrity_violation(illegal_cases, headers, warning_message) - raise CorruptArchive( + raise CorruptStorage( 'Your export archive is corrupt! ' 'Please see the log-file in your current directory for more details.' ) diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v07_to_v08.py b/aiida/storage/sqlite_zip/migrations/legacy/v07_to_v08.py similarity index 100% rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v07_to_v08.py rename to aiida/storage/sqlite_zip/migrations/legacy/v07_to_v08.py diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v08_to_v09.py b/aiida/storage/sqlite_zip/migrations/legacy/v08_to_v09.py similarity index 100% rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v08_to_v09.py rename to aiida/storage/sqlite_zip/migrations/legacy/v08_to_v09.py diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v09_to_v10.py b/aiida/storage/sqlite_zip/migrations/legacy/v09_to_v10.py similarity index 100% rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v09_to_v10.py rename to aiida/storage/sqlite_zip/migrations/legacy/v09_to_v10.py diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v10_to_v11.py b/aiida/storage/sqlite_zip/migrations/legacy/v10_to_v11.py similarity index 100% rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v10_to_v11.py rename to aiida/storage/sqlite_zip/migrations/legacy/v10_to_v11.py diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v11_to_v12.py b/aiida/storage/sqlite_zip/migrations/legacy/v11_to_v12.py similarity index 100% rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v11_to_v12.py rename to aiida/storage/sqlite_zip/migrations/legacy/v11_to_v12.py diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy_to_new.py b/aiida/storage/sqlite_zip/migrations/legacy_to_main.py similarity index 62% rename from aiida/tools/archive/implementations/sqlite/migrations/legacy_to_new.py rename to aiida/storage/sqlite_zip/migrations/legacy_to_main.py index c770e9f233..27566bccc1 100644 --- a/aiida/tools/archive/implementations/sqlite/migrations/legacy_to_new.py +++ b/aiida/storage/sqlite_zip/migrations/legacy_to_main.py @@ -7,11 +7,10 @@ # For further information on the license, see the LICENSE.txt file # # For further information please visit http://www.aiida.net # ########################################################################### -"""Migration from legacy JSON format.""" +"""Migration from the "legacy" JSON format, to an sqlite database, and node uuid based repository to hash based.""" from contextlib import contextmanager from datetime import datetime from hashlib import sha256 -import json from pathlib import Path, PurePosixPath import shutil import tarfile @@ -21,14 +20,14 @@ from sqlalchemy import insert, select from sqlalchemy.exc import IntegrityError +from aiida.common.exceptions import CorruptStorage, StorageMigrationError from aiida.common.hashing import chunked_file_hash from aiida.common.progress_reporter import get_progress_reporter from aiida.repository.common import File, FileType -from aiida.tools.archive.common import MIGRATE_LOGGER, batch_iter -from aiida.tools.archive.exceptions import CorruptArchive, MigrationValidationError +from aiida.storage.log import MIGRATE_LOGGER -from . import v1_db_schema as db -from ..common import DB_FILENAME, META_FILENAME, REPO_FOLDER, create_sqla_engine +from . import v1_db_schema as v1_schema +from ..utils import DB_FILENAME, REPO_FOLDER, create_sqla_engine from .utils import update_metadata _NODE_ENTITY_NAME = 'Node' @@ -57,18 +56,26 @@ } aiida_orm_to_backend = { - _USER_ENTITY_NAME: db.DbUser, - _GROUP_ENTITY_NAME: db.DbGroup, - _NODE_ENTITY_NAME: db.DbNode, - _COMMENT_ENTITY_NAME: db.DbComment, - _COMPUTER_ENTITY_NAME: db.DbComputer, - _LOG_ENTITY_NAME: db.DbLog, + _USER_ENTITY_NAME: v1_schema.DbUser, + _GROUP_ENTITY_NAME: v1_schema.DbGroup, + _NODE_ENTITY_NAME: v1_schema.DbNode, + _COMMENT_ENTITY_NAME: v1_schema.DbComment, + _COMPUTER_ENTITY_NAME: v1_schema.DbComputer, + _LOG_ENTITY_NAME: v1_schema.DbLog, } +LEGACY_TO_MAIN_REVISION = 'main_0000' + def perform_v1_migration( # pylint: disable=too-many-locals - inpath: Path, working: Path, archive_name: str, is_tar: bool, metadata: dict, data: dict, compression: int -) -> str: + inpath: Path, + working: Path, + new_zip: ZipPath, + central_dir: Dict[str, Any], + is_tar: bool, + metadata: dict, + data: dict, +) -> Path: """Perform the repository and JSON to SQLite migration. 1. Iterate though the repository paths in the archive @@ -78,10 +85,11 @@ def perform_v1_migration( # pylint: disable=too-many-locals :param inpath: the input path to the old archive :param metadata: the metadata to migrate :param data: the data to migrate + + :returns:the path to the sqlite database file """ MIGRATE_LOGGER.report('Initialising new archive...') node_repos: Dict[str, List[Tuple[str, Optional[str]]]] = {} - central_dir: Dict[str, Any] = {} if is_tar: # we cannot stream from a tar file performantly, so we extract it to disk first @contextmanager @@ -95,65 +103,56 @@ def in_archive_context(_inpath): shutil.rmtree(temp_folder) else: in_archive_context = ZipPath # type: ignore - with ZipPath( - working / archive_name, - mode='w', - compresslevel=compression, - name_to_info=central_dir, - info_order=(META_FILENAME, DB_FILENAME) - ) as new_path: - with in_archive_context(inpath) as path: - length = sum(1 for _ in path.glob('**/*')) - base_parts = len(path.parts) - with get_progress_reporter()(desc='Converting repo', total=length) as progress: - for subpath in path.glob('**/*'): - progress.update() - parts = subpath.parts[base_parts:] - # repository file are stored in the legacy archive as `nodes/uuid[0:2]/uuid[2:4]/uuid[4:]/path/...` - if len(parts) < 6 or parts[0] != 'nodes' or parts[4] not in ('raw_input', 'path'): - continue - uuid = ''.join(parts[1:4]) - posix_rel = PurePosixPath(*parts[5:]) - hashkey = None - if subpath.is_file(): + + with in_archive_context(inpath) as path: + length = sum(1 for _ in path.glob('**/*')) + base_parts = len(path.parts) + with get_progress_reporter()(desc='Converting repo', total=length) as progress: + for subpath in path.glob('**/*'): + progress.update() + parts = subpath.parts[base_parts:] + # repository file are stored in the legacy archive as `nodes/uuid[0:2]/uuid[2:4]/uuid[4:]/path/...` + if len(parts) < 6 or parts[0] != 'nodes' or parts[4] not in ('raw_input', 'path'): + continue + uuid = ''.join(parts[1:4]) + posix_rel = PurePosixPath(*parts[5:]) + hashkey = None + if subpath.is_file(): + with subpath.open('rb') as handle: + hashkey = chunked_file_hash(handle, sha256) + if f'{REPO_FOLDER}/{hashkey}' not in central_dir: with subpath.open('rb') as handle: - hashkey = chunked_file_hash(handle, sha256) - if f'{REPO_FOLDER}/{hashkey}' not in central_dir: - with subpath.open('rb') as handle: - with (new_path / f'{REPO_FOLDER}/{hashkey}').open(mode='wb') as handle2: - shutil.copyfileobj(handle, handle2) - node_repos.setdefault(uuid, []).append((posix_rel.as_posix(), hashkey)) - MIGRATE_LOGGER.report(f'Unique files written: {len(central_dir)}') - - _json_to_sqlite(working / DB_FILENAME, data, node_repos) - - MIGRATE_LOGGER.report('Finalising archive') - with (working / DB_FILENAME).open('rb') as handle: - with (new_path / DB_FILENAME).open(mode='wb') as handle2: - shutil.copyfileobj(handle, handle2) - - # remove legacy keys from metadata and store - metadata.pop('unique_identifiers', None) - metadata.pop('all_fields_info', None) - # remove legacy key nesting - metadata['creation_parameters'] = metadata.pop('export_parameters', {}) - metadata['compression'] = compression - metadata['key_format'] = 'sha256' - metadata['mtime'] = datetime.now().isoformat() - update_metadata(metadata, '1.0') - (new_path / META_FILENAME).write_text(json.dumps(metadata)) - - return '1.0' - - -def _json_to_sqlite( + with (new_zip / f'{REPO_FOLDER}/{hashkey}').open(mode='wb') as handle2: + shutil.copyfileobj(handle, handle2) + node_repos.setdefault(uuid, []).append((posix_rel.as_posix(), hashkey)) + MIGRATE_LOGGER.report(f'Unique repository files written: {len(central_dir)}') + + # convert the JSON database to SQLite + _json_to_sqlite(working / DB_FILENAME, data, node_repos) + + # remove legacy keys from metadata and store + metadata.pop('unique_identifiers', None) + metadata.pop('all_fields_info', None) + # remove legacy key nesting + metadata['creation_parameters'] = metadata.pop('export_parameters', {}) + metadata['key_format'] = 'sha256' + + # update the version in the metadata + update_metadata(metadata, LEGACY_TO_MAIN_REVISION) + + return working / DB_FILENAME + + +def _json_to_sqlite( # pylint: disable=too-many-branches,too-many-locals outpath: Path, data: dict, node_repos: Dict[str, List[Tuple[str, Optional[str]]]], batch_size: int = 100 ) -> None: """Convert a JSON archive format to SQLite.""" + from aiida.tools.archive.common import batch_iter + MIGRATE_LOGGER.report('Converting DB to SQLite') engine = create_sqla_engine(outpath) - db.ArchiveV1Base.metadata.create_all(engine) + v1_schema.ArchiveV1Base.metadata.create_all(engine) with engine.begin() as connection: # proceed in order of relationships @@ -168,10 +167,11 @@ def _json_to_sqlite( with get_progress_reporter()(desc=f'Adding {entity_type}s', total=length) as progress: for nrows, rows in batch_iter(_iter_entity_fields(data, entity_type, node_repos), batch_size): # to-do check for unused keys? + # to-do handle null values? try: connection.execute(insert(backend_cls.__table__), rows) # type: ignore except IntegrityError as exc: - raise MigrationValidationError(f'Database integrity error: {exc}') from exc + raise StorageMigrationError(f'Database integrity error: {exc}') from exc progress.update(nrows) if not (data['groups_uuid'] or data['links_uuid']): @@ -180,43 +180,59 @@ def _json_to_sqlite( with engine.begin() as connection: # get mapping of node IDs to node UUIDs - node_uuid_map = {uuid: pk for uuid, pk in connection.execute(select(db.DbNode.uuid, db.DbNode.id))} # pylint: disable=unnecessary-comprehension + node_uuid_map = { + uuid: pk for uuid, pk in connection.execute(select(v1_schema.DbNode.uuid, v1_schema.DbNode.id)) # pylint: disable=unnecessary-comprehension + } # links if data['links_uuid']: def _transform_link(link_row): + try: + input_id = node_uuid_map[link_row['input']] + except KeyError: + raise StorageMigrationError(f'Database contains link with unknown input node: {link_row}') + try: + output_id = node_uuid_map[link_row['output']] + except KeyError: + raise StorageMigrationError(f'Database contains link with unknown output node: {link_row}') return { - 'input_id': node_uuid_map[link_row['input']], - 'output_id': node_uuid_map[link_row['output']], + 'input_id': input_id, + 'output_id': output_id, 'label': link_row['label'], 'type': link_row['type'] } with get_progress_reporter()(desc='Adding Links', total=len(data['links_uuid'])) as progress: for nrows, rows in batch_iter(data['links_uuid'], batch_size, transform=_transform_link): - connection.execute(insert(db.DbLink.__table__), rows) + connection.execute(insert(v1_schema.DbLink.__table__), rows) progress.update(nrows) # groups to nodes if data['groups_uuid']: # get mapping of node IDs to node UUIDs - group_uuid_map = {uuid: pk for uuid, pk in connection.execute(select(db.DbGroup.uuid, db.DbGroup.id))} # pylint: disable=unnecessary-comprehension + group_uuid_map = { + uuid: pk for uuid, pk in connection.execute(select(v1_schema.DbGroup.uuid, v1_schema.DbGroup.id)) # pylint: disable=unnecessary-comprehension + } length = sum(len(uuids) for uuids in data['groups_uuid'].values()) + unknown_nodes: Dict[str, set] = {} with get_progress_reporter()(desc='Adding Group-Nodes', total=length) as progress: for group_uuid, node_uuids in data['groups_uuid'].items(): group_id = group_uuid_map[group_uuid] - connection.execute( - insert(db.DbGroupNodes.__table__), [{ - 'dbnode_id': node_uuid_map[uuid], - 'dbgroup_id': group_id - } for uuid in node_uuids] - ) + rows = [] + for uuid in node_uuids: + if uuid in node_uuid_map: + rows.append({'dbnode_id': node_uuid_map[uuid], 'dbgroup_id': group_id}) + else: + unknown_nodes.setdefault(group_uuid, set()).add(uuid) + connection.execute(insert(v1_schema.DbGroupNodes.__table__), rows) progress.update(len(node_uuids)) + if unknown_nodes: + MIGRATE_LOGGER.warning(f'Dropped unknown nodes in groups: {unknown_nodes}') def _convert_datetime(key, value): - if key in ('time', 'ctime', 'mtime'): + if key in ('time', 'ctime', 'mtime') and value is not None: return datetime.strptime(value, '%Y-%m-%dT%H:%M:%S.%f') return value @@ -234,9 +250,9 @@ def _iter_entity_fields( extras = data.get('node_extras', {}) for pk, all_fields in data['export_data'].get(name, {}).items(): if pk not in attributes: - raise CorruptArchive(f'Unable to find attributes info for Node with Pk={pk}') + raise CorruptStorage(f'Unable to find attributes info for Node with Pk={pk}') if pk not in extras: - raise CorruptArchive(f'Unable to find extra info for Node with Pk={pk}') + raise CorruptStorage(f'Unable to find extra info for Node with Pk={pk}') uuid = all_fields['uuid'] repository_metadata = _create_repo_metadata(node_repos[uuid]) if uuid in node_repos else {} yield { diff --git a/aiida/storage/sqlite_zip/migrations/script.py.mako b/aiida/storage/sqlite_zip/migrations/script.py.mako new file mode 100644 index 0000000000..b0e41c2687 --- /dev/null +++ b/aiida/storage/sqlite_zip/migrations/script.py.mako @@ -0,0 +1,26 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade(): + """Migrations for the upgrade.""" + ${upgrades if upgrades else "pass"} + + +def downgrade(): + """Migrations for the downgrade.""" + ${downgrades if downgrades else "pass"} diff --git a/aiida/tools/archive/implementations/sqlite/common.py b/aiida/storage/sqlite_zip/migrations/utils.py similarity index 79% rename from aiida/tools/archive/implementations/sqlite/common.py rename to aiida/storage/sqlite_zip/migrations/utils.py index a375cf7c26..dfd72ec6ca 100644 --- a/aiida/tools/archive/implementations/sqlite/common.py +++ b/aiida/storage/sqlite_zip/migrations/utils.py @@ -12,41 +12,55 @@ from pathlib import Path import shutil import tempfile -from typing import Callable, Sequence, Union +from typing import Callable, Sequence from archive_path import TarPath, ZipPath -from sqlalchemy import event -from sqlalchemy.future.engine import Engine, create_engine -from aiida.common import json +from aiida.common import exceptions from aiida.common.progress_reporter import create_callback, get_progress_reporter -META_FILENAME = 'metadata.json' -DB_FILENAME = 'db.sqlite3' -# folder to store repository files in -REPO_FOLDER = 'repo' - - -def sqlite_enforce_foreign_keys(dbapi_connection, _): - """Enforce foreign key constraints, when using sqlite backend (off by default)""" - cursor = dbapi_connection.cursor() - cursor.execute('PRAGMA foreign_keys=ON;') - cursor.close() - - -def create_sqla_engine(path: Union[str, Path], *, enforce_foreign_keys: bool = True, **kwargs) -> Engine: - """Create a new engine instance.""" - engine = create_engine( - f'sqlite:///{path}', - json_serializer=json.dumps, - json_deserializer=json.loads, - encoding='utf-8', - future=True, - **kwargs - ) - if enforce_foreign_keys: - event.listen(engine, 'connect', sqlite_enforce_foreign_keys) - return engine + +def update_metadata(metadata, version): + """Update the metadata with a new version number and a notification of the conversion that was executed. + + :param metadata: the content of an export archive metadata.json file + :param version: string version number that the updated metadata should get + """ + from aiida import get_version + + old_version = metadata['export_version'] + conversion_info = metadata.get('conversion_info', []) + + conversion_message = f'Converted from version {old_version} to {version} with AiiDA v{get_version()}' + conversion_info.append(conversion_message) + + metadata['aiida_version'] = get_version() + metadata['export_version'] = version + metadata['conversion_info'] = conversion_info + + +def verify_metadata_version(metadata, version=None): + """Utility function to verify that the metadata has the correct version number. + + If no version number is passed, it will just extract the version number and return it. + + :param metadata: the content of an export archive metadata.json file + :param version: string version number that the metadata is expected to have + """ + try: + metadata_version = metadata['export_version'] + except KeyError: + raise exceptions.StorageMigrationError("metadata is missing the 'export_version' key") + + if version is None: + return metadata_version + + if metadata_version != version: + raise exceptions.StorageMigrationError( + f'expected archive file with version {version} but found version {metadata_version}' + ) + + return None def copy_zip_to_zip( diff --git a/aiida/storage/sqlite_zip/migrations/v1_db_schema.py b/aiida/storage/sqlite_zip/migrations/v1_db_schema.py new file mode 100644 index 0000000000..bad4f14ac0 --- /dev/null +++ b/aiida/storage/sqlite_zip/migrations/v1_db_schema.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +"""This is the sqlite DB schema, coresponding to the `main_0000` revision of the `sqlite_zip` backend, +see: `versions/main_0000_initial.py` + +For normal operation of the archive, +we auto-generate the schema from the models in ``aiida.storage.psql_dos.models``. +However, when migrating an archive from the old format, we require a fixed revision of the schema. + +The only difference between the PostGreSQL schema and SQLite one, +is the replacement of ``JSONB`` with ``JSON``, and ``UUID`` with ``CHAR(32)``. +""" +from sqlalchemy import ForeignKey, MetaData, orm +from sqlalchemy.dialects.sqlite import JSON +from sqlalchemy.schema import Column, UniqueConstraint +from sqlalchemy.types import CHAR, Boolean, DateTime, Integer, String, Text + +from aiida.common import timezone +from aiida.common.utils import get_new_uuid + +# see https://alembic.sqlalchemy.org/en/latest/naming.html +naming_convention = ( + ('pk', '%(table_name)s_pkey'), + ('ix', 'ix_%(table_name)s_%(column_0_N_label)s'), + ('uq', 'uq_%(table_name)s_%(column_0_N_name)s'), + ('ck', 'ck_%(table_name)s_%(constraint_name)s'), + ('fk', 'fk_%(table_name)s_%(column_0_N_name)s_%(referred_table_name)s'), +) + +ArchiveV1Base = orm.declarative_base(metadata=MetaData(naming_convention=dict(naming_convention))) + + +class DbAuthInfo(ArchiveV1Base): + """Class that keeps the authentication data.""" + + __tablename__ = 'db_dbauthinfo' + __table_args__ = (UniqueConstraint('aiidauser_id', 'dbcomputer_id'),) + + id = Column(Integer, primary_key=True) # pylint: disable=invalid-name + aiidauser_id = Column( + Integer, + ForeignKey('db_dbuser.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'), + nullable=True, + index=True + ) + dbcomputer_id = Column( + Integer, + ForeignKey('db_dbcomputer.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'), + nullable=True, + index=True + ) + _metadata = Column('metadata', JSON, default=dict, nullable=True) + auth_params = Column(JSON, default=dict, nullable=True) + enabled = Column(Boolean, default=True, nullable=True) + + +class DbComment(ArchiveV1Base): + """Class to store comments.""" + + __tablename__ = 'db_dbcomment' + + id = Column(Integer, primary_key=True) # pylint: disable=invalid-name + uuid = Column(CHAR(32), default=get_new_uuid, nullable=False, unique=True) + dbnode_id = Column( + Integer, + ForeignKey('db_dbnode.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'), + nullable=True, + index=True + ) + ctime = Column(DateTime(timezone=True), default=timezone.now, nullable=True) + mtime = Column(DateTime(timezone=True), default=timezone.now, nullable=True) + user_id = Column( + Integer, + ForeignKey('db_dbuser.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'), + nullable=True, + index=True + ) + content = Column(Text, default='', nullable=True) + + +class DbComputer(ArchiveV1Base): + """Class to store computers.""" + __tablename__ = 'db_dbcomputer' + + id = Column(Integer, primary_key=True) # pylint: disable=invalid-name + uuid = Column(CHAR(32), default=get_new_uuid, nullable=False, unique=True) + label = Column(String(255), unique=True, nullable=False) + hostname = Column(String(255), default='', nullable=True) + description = Column(Text, default='', nullable=True) + scheduler_type = Column(String(255), default='', nullable=True) + transport_type = Column(String(255), default='', nullable=True) + _metadata = Column('metadata', JSON, default=dict, nullable=True) + + +class DbGroupNodes(ArchiveV1Base): + """Class to store join table for group -> nodes.""" + + __tablename__ = 'db_dbgroup_dbnodes' + __table_args__ = (UniqueConstraint('dbgroup_id', 'dbnode_id'),) + + id = Column(Integer, primary_key=True) # pylint: disable=invalid-name + dbnode_id = Column( + Integer, ForeignKey('db_dbnode.id', deferrable=True, initially='DEFERRED'), nullable=False, index=True + ) + dbgroup_id = Column( + Integer, ForeignKey('db_dbgroup.id', deferrable=True, initially='DEFERRED'), nullable=False, index=True + ) + + +class DbGroup(ArchiveV1Base): + """Class to store groups.""" + + __tablename__ = 'db_dbgroup' + __table_args__ = (UniqueConstraint('label', 'type_string'),) + + id = Column(Integer, primary_key=True) # pylint: disable=invalid-name + uuid = Column(CHAR(32), default=get_new_uuid, nullable=False, unique=True) + label = Column(String(255), nullable=False, index=True) + type_string = Column(String(255), default='', nullable=True, index=True) + time = Column(DateTime(timezone=True), default=timezone.now, nullable=True) + description = Column(Text, default='', nullable=True) + extras = Column(JSON, default=dict, nullable=False) + user_id = Column( + Integer, + ForeignKey('db_dbuser.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'), + nullable=False, + index=True + ) + + +class DbLog(ArchiveV1Base): + """Class to store logs.""" + + __tablename__ = 'db_dblog' + + id = Column(Integer, primary_key=True) # pylint: disable=invalid-name + uuid = Column(CHAR(32), default=get_new_uuid, nullable=False, unique=True) + time = Column(DateTime(timezone=True), default=timezone.now, nullable=True) + loggername = Column(String(255), default='', nullable=True, index=True) + levelname = Column(String(50), default='', nullable=True, index=True) + dbnode_id = Column( + Integer, + ForeignKey('db_dbnode.id', deferrable=True, initially='DEFERRED', ondelete='CASCADE'), + nullable=False, + index=True + ) + message = Column(Text(), default='', nullable=True) + _metadata = Column('metadata', JSON, default=dict, nullable=True) + + +class DbNode(ArchiveV1Base): + """Class to store nodes.""" + + __tablename__ = 'db_dbnode' + + id = Column(Integer, primary_key=True) # pylint: disable=invalid-name + uuid = Column(CHAR(32), default=get_new_uuid, nullable=False, unique=True) + node_type = Column(String(255), default='', nullable=False, index=True) + process_type = Column(String(255), index=True) + label = Column(String(255), default='', index=True, nullable=True) + description = Column(Text(), default='', nullable=True) + ctime = Column(DateTime(timezone=True), default=timezone.now, nullable=True, index=True) + mtime = Column(DateTime(timezone=True), default=timezone.now, nullable=True, index=True) + attributes = Column(JSON) + extras = Column(JSON) + repository_metadata = Column(JSON, nullable=False, default=dict, server_default='{}') + dbcomputer_id = Column( + Integer, + ForeignKey('db_dbcomputer.id', deferrable=True, initially='DEFERRED', ondelete='RESTRICT'), + nullable=True, + index=True + ) + user_id = Column( + Integer, + ForeignKey('db_dbuser.id', deferrable=True, initially='DEFERRED', ondelete='restrict'), + nullable=False, + index=True + ) + + +class DbLink(ArchiveV1Base): + """Class to store links between nodes.""" + + __tablename__ = 'db_dblink' + + id = Column(Integer, primary_key=True) # pylint: disable=invalid-name + input_id = Column( + Integer, ForeignKey('db_dbnode.id', deferrable=True, initially='DEFERRED'), nullable=False, index=True + ) + output_id = Column( + Integer, + ForeignKey('db_dbnode.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'), + nullable=False, + index=True + ) + label = Column(String(255), default='', nullable=False, index=True) + type = Column(String(255), nullable=False, index=True) + + +class DbUser(ArchiveV1Base): + """Class to store users.""" + + __tablename__ = 'db_dbuser' + + id = Column(Integer, primary_key=True) # pylint: disable=invalid-name + email = Column(String(254), nullable=False, unique=True) + first_name = Column(String(254), default='', nullable=True) + last_name = Column(String(254), default='', nullable=True) + institution = Column(String(254), default='', nullable=True) diff --git a/aiida/storage/sqlite_zip/migrations/versions/__init__.py b/aiida/storage/sqlite_zip/migrations/versions/__init__.py new file mode 100644 index 0000000000..2776a55f97 --- /dev/null +++ b/aiida/storage/sqlite_zip/migrations/versions/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### diff --git a/aiida/storage/sqlite_zip/migrations/versions/main_0000_initial.py b/aiida/storage/sqlite_zip/migrations/versions/main_0000_initial.py new file mode 100644 index 0000000000..d45772daaa --- /dev/null +++ b/aiida/storage/sqlite_zip/migrations/versions/main_0000_initial.py @@ -0,0 +1,204 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +# pylint: disable=invalid-name,no-member +"""Initial main branch schema + +This schema is mainly equivalent to the `main_0001` schema of the `psql_dos` backend. +The difference are: + +1. Data types: the replacement of ``JSONB`` with ``JSON``, and ``UUID`` with ``CHAR(32)``. +2. Some more fields are nullable, to allow migrations from legacy to main. + The nullable fields are then filled with default values, and set to non-nullable, in subsequent migrations. + +Revision ID: main_0000 +Revises: +Create Date: 2021-02-02 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects.sqlite import JSON + +revision = 'main_0000' +down_revision = None +branch_labels = ('main',) +depends_on = None + + +def upgrade(): + """Migrations for the upgrade.""" + op.create_table( + 'db_dbcomputer', + sa.Column('id', sa.Integer(), nullable=False, primary_key=True), + sa.Column('uuid', sa.CHAR(32), nullable=False, unique=True), + sa.Column('label', sa.String(length=255), nullable=False, unique=True), + sa.Column('hostname', sa.String(length=255), nullable=False), + sa.Column('description', sa.Text(), nullable=False), + sa.Column('scheduler_type', sa.String(length=255), nullable=False), + sa.Column('transport_type', sa.String(length=255), nullable=False), + sa.Column('metadata', JSON(), nullable=False), + ) + op.create_table( + 'db_dbuser', + sa.Column('id', sa.Integer(), nullable=False, primary_key=True), + sa.Column('email', sa.String(length=254), nullable=False, unique=True), + sa.Column('first_name', sa.String(length=254), nullable=False), + sa.Column('last_name', sa.String(length=254), nullable=False), + sa.Column('institution', sa.String(length=254), nullable=False), + ) + op.create_table( + 'db_dbauthinfo', + sa.Column('id', sa.Integer(), nullable=False, primary_key=True), + sa.Column('aiidauser_id', sa.Integer(), nullable=False, index=True), + sa.Column('dbcomputer_id', sa.Integer(), nullable=False, index=True), + sa.Column('metadata', JSON(), nullable=False), + sa.Column('auth_params', JSON(), nullable=False), + sa.Column('enabled', sa.Boolean(), nullable=False), + sa.ForeignKeyConstraint( + ['aiidauser_id'], + ['db_dbuser.id'], + ondelete='CASCADE', + initially='DEFERRED', + deferrable=True, + ), + sa.ForeignKeyConstraint( + ['dbcomputer_id'], + ['db_dbcomputer.id'], + ondelete='CASCADE', + initially='DEFERRED', + deferrable=True, + ), + sa.UniqueConstraint('aiidauser_id', 'dbcomputer_id'), + ) + op.create_table( + 'db_dbgroup', + sa.Column('id', sa.Integer(), nullable=False, primary_key=True), + sa.Column('uuid', sa.CHAR(32), nullable=False, unique=True), + sa.Column('label', sa.String(length=255), nullable=False, index=True), + sa.Column('type_string', sa.String(length=255), nullable=False, index=True), + sa.Column('time', sa.DateTime(timezone=True), nullable=False), + sa.Column('description', sa.Text(), nullable=False), + sa.Column('extras', JSON(), nullable=False), + sa.Column('user_id', sa.Integer(), nullable=False, index=True), + sa.ForeignKeyConstraint( + ['user_id'], + ['db_dbuser.id'], + ondelete='CASCADE', + initially='DEFERRED', + deferrable=True, + ), + sa.UniqueConstraint('label', 'type_string'), + ) + + op.create_table( + 'db_dbnode', + sa.Column('id', sa.Integer(), nullable=False, primary_key=True), + sa.Column('uuid', sa.CHAR(32), nullable=False, unique=True), + sa.Column('node_type', sa.String(length=255), nullable=False, index=True), + sa.Column('process_type', sa.String(length=255), nullable=True, index=True), + sa.Column('label', sa.String(length=255), nullable=False, index=True), + sa.Column('description', sa.Text(), nullable=False), + sa.Column('ctime', sa.DateTime(timezone=True), nullable=False, index=True), + sa.Column('mtime', sa.DateTime(timezone=True), nullable=False, index=True), + sa.Column('attributes', JSON(), nullable=True), + sa.Column('extras', JSON(), nullable=True), + sa.Column('repository_metadata', JSON(), nullable=False), + sa.Column('dbcomputer_id', sa.Integer(), nullable=True, index=True), + sa.Column('user_id', sa.Integer(), nullable=False, index=True), + sa.ForeignKeyConstraint( + ['dbcomputer_id'], + ['db_dbcomputer.id'], + ondelete='RESTRICT', + initially='DEFERRED', + deferrable=True, + ), + sa.ForeignKeyConstraint( + ['user_id'], + ['db_dbuser.id'], + ondelete='restrict', + initially='DEFERRED', + deferrable=True, + ), + ) + + op.create_table( + 'db_dbcomment', + sa.Column('id', sa.Integer(), nullable=False, primary_key=True), + sa.Column('uuid', sa.CHAR(32), nullable=False, unique=True), + sa.Column('dbnode_id', sa.Integer(), nullable=False, index=True), + sa.Column('ctime', sa.DateTime(timezone=True), nullable=False), + sa.Column('mtime', sa.DateTime(timezone=True), nullable=False), + sa.Column('user_id', sa.Integer(), nullable=False, index=True), + sa.Column('content', sa.Text(), nullable=False), + sa.ForeignKeyConstraint( + ['dbnode_id'], + ['db_dbnode.id'], + ondelete='CASCADE', + initially='DEFERRED', + deferrable=True, + ), + sa.ForeignKeyConstraint( + ['user_id'], + ['db_dbuser.id'], + ondelete='CASCADE', + initially='DEFERRED', + deferrable=True, + ), + ) + + op.create_table( + 'db_dbgroup_dbnodes', + sa.Column('id', sa.Integer(), nullable=False, primary_key=True), + sa.Column('dbnode_id', sa.Integer(), nullable=False, index=True), + sa.Column('dbgroup_id', sa.Integer(), nullable=False, index=True), + sa.ForeignKeyConstraint(['dbgroup_id'], ['db_dbgroup.id'], initially='DEFERRED', deferrable=True), + sa.ForeignKeyConstraint(['dbnode_id'], ['db_dbnode.id'], initially='DEFERRED', deferrable=True), + sa.UniqueConstraint('dbgroup_id', 'dbnode_id'), + ) + op.create_table( + 'db_dblink', + sa.Column('id', sa.Integer(), nullable=False, primary_key=True), + sa.Column('input_id', sa.Integer(), nullable=False, index=True), + sa.Column('output_id', sa.Integer(), nullable=False, index=True), + sa.Column('label', sa.String(length=255), nullable=False, index=True), + sa.Column('type', sa.String(length=255), nullable=False, index=True), + sa.ForeignKeyConstraint(['input_id'], ['db_dbnode.id'], initially='DEFERRED', deferrable=True), + sa.ForeignKeyConstraint( + ['output_id'], + ['db_dbnode.id'], + ondelete='CASCADE', + initially='DEFERRED', + deferrable=True, + ), + ) + + op.create_table( + 'db_dblog', + sa.Column('id', sa.Integer(), nullable=False, primary_key=True), + sa.Column('uuid', sa.CHAR(32), nullable=False, unique=True), + sa.Column('time', sa.DateTime(timezone=True), nullable=False), + sa.Column('loggername', sa.String(length=255), nullable=False, index=True), + sa.Column('levelname', sa.String(length=50), nullable=False, index=True), + sa.Column('dbnode_id', sa.Integer(), nullable=False, index=True), + sa.Column('message', sa.Text(), nullable=False), + sa.Column('metadata', JSON(), nullable=False), + sa.ForeignKeyConstraint( + ['dbnode_id'], + ['db_dbnode.id'], + ondelete='CASCADE', + initially='DEFERRED', + deferrable=True, + ), + ) + + +def downgrade(): + """Migrations for the downgrade.""" + raise NotImplementedError('Downgrade of main_0000.') diff --git a/aiida/storage/sqlite_zip/migrations/versions/main_0000a_replace_nulls.py b/aiida/storage/sqlite_zip/migrations/versions/main_0000a_replace_nulls.py new file mode 100644 index 0000000000..7d5fa87463 --- /dev/null +++ b/aiida/storage/sqlite_zip/migrations/versions/main_0000a_replace_nulls.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +# pylint: disable=invalid-name,no-member +"""Replace null values with defaults + +Revision ID: main_0000a +Revises: main_0000 +Create Date: 2022-03-04 + +""" +from alembic import op +import sqlalchemy as sa + +from aiida.common import timezone + +# revision identifiers, used by Alembic. +revision = 'main_0000a' +down_revision = 'main_0000' +branch_labels = None +depends_on = None + + +def upgrade(): # pylint: disable=too-many-statements + """Convert null values to default values. + + This migration is performed in preparation for the next migration, + which will make these fields non-nullable. + """ + db_dbauthinfo = sa.sql.table( + 'db_dbauthinfo', + sa.sql.column('aiidauser_id', sa.Integer), + sa.sql.column('dbcomputer_id', sa.Integer), + sa.Column('enabled', sa.Boolean), + sa.Column('auth_params', sa.JSON), + sa.Column('metadata', sa.JSON()), + ) + + # remove rows with null values, which may have previously resulted from deletion of a user or computer + op.execute(db_dbauthinfo.delete().where(db_dbauthinfo.c.aiidauser_id.is_(None))) # type: ignore[arg-type] + op.execute(db_dbauthinfo.delete().where(db_dbauthinfo.c.dbcomputer_id.is_(None))) # type: ignore[arg-type] + + op.execute(db_dbauthinfo.update().where(db_dbauthinfo.c.enabled.is_(None)).values(enabled=True)) + op.execute(db_dbauthinfo.update().where(db_dbauthinfo.c.auth_params.is_(None)).values(auth_params={})) + op.execute(db_dbauthinfo.update().where(db_dbauthinfo.c.metadata.is_(None)).values(metadata={})) + + db_dbcomment = sa.sql.table( + 'db_dbcomment', + sa.sql.column('dbnode_id', sa.Integer), + sa.sql.column('user_id', sa.Integer), + sa.Column('content', sa.Text), + sa.Column('ctime', sa.DateTime(timezone=True)), + sa.Column('mtime', sa.DateTime(timezone=True)), + sa.Column('uuid', sa.CHAR(32)), + ) + + # remove rows with null values, which may have previously resulted from deletion of a node or user + op.execute(db_dbcomment.delete().where(db_dbcomment.c.dbnode_id.is_(None))) # type: ignore[arg-type] + op.execute(db_dbcomment.delete().where(db_dbcomment.c.user_id.is_(None))) # type: ignore[arg-type] + + op.execute(db_dbcomment.update().where(db_dbcomment.c.content.is_(None)).values(content='')) + op.execute(db_dbcomment.update().where(db_dbcomment.c.ctime.is_(None)).values(ctime=timezone.now())) + op.execute(db_dbcomment.update().where(db_dbcomment.c.mtime.is_(None)).values(mtime=timezone.now())) + + db_dbcomputer = sa.sql.table( + 'db_dbcomputer', + sa.Column('description', sa.Text), + sa.Column('hostname', sa.String(255)), + sa.Column('metadata', sa.JSON()), + sa.Column('scheduler_type', sa.String(255)), + sa.Column('transport_type', sa.String(255)), + sa.Column('uuid', sa.CHAR(32)), + ) + + op.execute(db_dbcomputer.update().where(db_dbcomputer.c.description.is_(None)).values(description='')) + op.execute(db_dbcomputer.update().where(db_dbcomputer.c.hostname.is_(None)).values(hostname='')) + op.execute(db_dbcomputer.update().where(db_dbcomputer.c.metadata.is_(None)).values(metadata={})) + op.execute(db_dbcomputer.update().where(db_dbcomputer.c.scheduler_type.is_(None)).values(scheduler_type='')) + op.execute(db_dbcomputer.update().where(db_dbcomputer.c.transport_type.is_(None)).values(transport_type='')) + + db_dbgroup = sa.sql.table( + 'db_dbgroup', + sa.Column('description', sa.Text), + sa.Column('label', sa.String(255)), + sa.Column('time', sa.DateTime(timezone=True)), + sa.Column('type_string', sa.String(255)), + sa.Column('uuid', sa.CHAR(32)), + ) + + op.execute(db_dbgroup.update().where(db_dbgroup.c.description.is_(None)).values(description='')) + op.execute(db_dbgroup.update().where(db_dbgroup.c.time.is_(None)).values(time=timezone.now())) + op.execute(db_dbgroup.update().where(db_dbgroup.c.type_string.is_(None)).values(type_string='core')) + + db_dblog = sa.sql.table( + 'db_dblog', + sa.Column('levelname', sa.String(255)), + sa.Column('loggername', sa.String(255)), + sa.Column('message', sa.Text), + sa.Column('metadata', sa.JSON()), + sa.Column('time', sa.DateTime(timezone=True)), + sa.Column('uuid', sa.CHAR(32)), + ) + + op.execute(db_dblog.update().where(db_dblog.c.levelname.is_(None)).values(levelname='')) + op.execute(db_dblog.update().where(db_dblog.c.loggername.is_(None)).values(loggername='')) + op.execute(db_dblog.update().where(db_dblog.c.message.is_(None)).values(message='')) + op.execute(db_dblog.update().where(db_dblog.c.metadata.is_(None)).values(metadata={})) + op.execute(db_dblog.update().where(db_dblog.c.time.is_(None)).values(time=timezone.now())) + + db_dbnode = sa.sql.table( + 'db_dbnode', + sa.Column('ctime', sa.DateTime(timezone=True)), + sa.Column('description', sa.Text), + sa.Column('label', sa.String(255)), + sa.Column('mtime', sa.DateTime(timezone=True)), + sa.Column('node_type', sa.String(255)), + sa.Column('uuid', sa.CHAR(32)), + ) + + op.execute(db_dbnode.update().where(db_dbnode.c.ctime.is_(None)).values(ctime=timezone.now())) + op.execute(db_dbnode.update().where(db_dbnode.c.description.is_(None)).values(description='')) + op.execute(db_dbnode.update().where(db_dbnode.c.label.is_(None)).values(label='')) + op.execute(db_dbnode.update().where(db_dbnode.c.mtime.is_(None)).values(mtime=timezone.now())) + + db_dbuser = sa.sql.table( + 'db_dbuser', + sa.Column('email', sa.String(254)), + sa.Column('first_name', sa.String(254)), + sa.Column('last_name', sa.String(254)), + sa.Column('institution', sa.String(254)), + ) + + op.execute(db_dbuser.update().where(db_dbuser.c.first_name.is_(None)).values(first_name='')) + op.execute(db_dbuser.update().where(db_dbuser.c.last_name.is_(None)).values(last_name='')) + op.execute(db_dbuser.update().where(db_dbuser.c.institution.is_(None)).values(institution='')) + + +def downgrade(): + """Downgrade database schema.""" + raise NotImplementedError('Downgrade of main_0000a.') diff --git a/aiida/storage/sqlite_zip/migrations/versions/main_0000b_non_nullable.py b/aiida/storage/sqlite_zip/migrations/versions/main_0000b_non_nullable.py new file mode 100644 index 0000000000..69d0119c8e --- /dev/null +++ b/aiida/storage/sqlite_zip/migrations/versions/main_0000b_non_nullable.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +# pylint: disable=invalid-name,no-member +"""Alter columns to be non-nullable (to bring inline with psql_dos main_0001). + +Revision ID: main_0000b +Revises: main_0000a +Create Date: 2022-03-04 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = 'main_0000b' +down_revision = 'main_0000a' +branch_labels = None +depends_on = None + + +def upgrade(): + """Upgrade database schema.""" + # see https://alembic.sqlalchemy.org/en/latest/batch.html#running-batch-migrations-for-sqlite-and-other-databases + # for why we run these in batches + with op.batch_alter_table('db_dbauthinfo') as batch_op: + batch_op.alter_column('aiidauser_id', existing_type=sa.INTEGER(), nullable=False) + batch_op.alter_column('dbcomputer_id', existing_type=sa.INTEGER(), nullable=False) + batch_op.alter_column('metadata', existing_type=sa.JSON(), nullable=False) + batch_op.alter_column('auth_params', existing_type=sa.JSON(), nullable=False) + batch_op.alter_column('enabled', existing_type=sa.BOOLEAN(), nullable=False) + + with op.batch_alter_table('db_dbcomment') as batch_op: + batch_op.alter_column('dbnode_id', existing_type=sa.INTEGER(), nullable=False) + batch_op.alter_column('user_id', existing_type=sa.INTEGER(), nullable=False) + batch_op.alter_column('content', existing_type=sa.TEXT(), nullable=False) + batch_op.alter_column('ctime', existing_type=sa.DateTime(timezone=True), nullable=False) + batch_op.alter_column('mtime', existing_type=sa.DateTime(timezone=True), nullable=False) + + with op.batch_alter_table('db_dbcomputer') as batch_op: + batch_op.alter_column('description', existing_type=sa.TEXT(), nullable=False) + batch_op.alter_column('hostname', existing_type=sa.String(255), nullable=False) + batch_op.alter_column('metadata', existing_type=sa.JSON(), nullable=False) + batch_op.alter_column('scheduler_type', existing_type=sa.String(255), nullable=False) + batch_op.alter_column('transport_type', existing_type=sa.String(255), nullable=False) + + with op.batch_alter_table('db_dbgroup') as batch_op: + batch_op.alter_column('description', existing_type=sa.TEXT(), nullable=False) + batch_op.alter_column('time', existing_type=sa.DateTime(timezone=True), nullable=False) + batch_op.alter_column('type_string', existing_type=sa.String(255), nullable=False) + + with op.batch_alter_table('db_dblog') as batch_op: + batch_op.alter_column('levelname', existing_type=sa.String(50), nullable=False) + batch_op.alter_column('loggername', existing_type=sa.String(255), nullable=False) + batch_op.alter_column('message', existing_type=sa.TEXT(), nullable=False) + batch_op.alter_column('time', existing_type=sa.DateTime(timezone=True), nullable=False) + batch_op.alter_column('metadata', existing_type=sa.JSON(), nullable=False) + + with op.batch_alter_table('db_dbnode') as batch_op: + batch_op.alter_column('ctime', existing_type=sa.DateTime(timezone=True), nullable=False) + batch_op.alter_column('description', existing_type=sa.TEXT(), nullable=False) + batch_op.alter_column('label', existing_type=sa.String(255), nullable=False) + batch_op.alter_column('mtime', existing_type=sa.DateTime(timezone=True), nullable=False) + + with op.batch_alter_table('db_dbuser') as batch_op: + batch_op.alter_column('first_name', existing_type=sa.String(254), nullable=False) + batch_op.alter_column('last_name', existing_type=sa.String(254), nullable=False) + batch_op.alter_column('institution', existing_type=sa.String(254), nullable=False) + + +def downgrade(): + """Downgrade database schema.""" + raise NotImplementedError('Downgrade of main_0000b.') diff --git a/aiida/storage/sqlite_zip/migrations/versions/main_0001.py b/aiida/storage/sqlite_zip/migrations/versions/main_0001.py new file mode 100644 index 0000000000..706fc1c25e --- /dev/null +++ b/aiida/storage/sqlite_zip/migrations/versions/main_0001.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +# pylint: disable=invalid-name,no-member +"""Bring schema inline with psql_dos main_0001 + +Revision ID: main_0001 +Revises: +Create Date: 2021-02-02 + +""" +revision = 'main_0001' +down_revision = 'main_0000b' +branch_labels = None +depends_on = None + + +def upgrade(): + """Migrations for the upgrade.""" + + +def downgrade(): + """Migrations for the downgrade.""" + raise NotImplementedError('Downgrade of main_0001.') diff --git a/aiida/storage/sqlite_zip/migrator.py b/aiida/storage/sqlite_zip/migrator.py new file mode 100644 index 0000000000..52cd81a91a --- /dev/null +++ b/aiida/storage/sqlite_zip/migrator.py @@ -0,0 +1,375 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +"""Versioning and migration implementation for the sqlite_zip format.""" +import contextlib +from datetime import datetime +import os +from pathlib import Path +import shutil +import tarfile +import tempfile +from typing import Any, Dict, Iterator, List, Optional, Union +import zipfile + +from alembic.command import upgrade +from alembic.config import Config +from alembic.runtime.environment import EnvironmentContext +from alembic.runtime.migration import MigrationContext, MigrationInfo +from alembic.script import ScriptDirectory +from archive_path import ZipPath, extract_file_in_zip, open_file_in_tar, open_file_in_zip + +from aiida.common import json +from aiida.common.exceptions import CorruptStorage, IncompatibleStorageSchema, StorageMigrationError +from aiida.common.progress_reporter import get_progress_reporter +from aiida.storage.log import MIGRATE_LOGGER + +from .migrations.legacy import FINAL_LEGACY_VERSION, LEGACY_MIGRATE_FUNCTIONS +from .migrations.legacy_to_main import LEGACY_TO_MAIN_REVISION, perform_v1_migration +from .migrations.utils import copy_tar_to_zip, copy_zip_to_zip, update_metadata +from .utils import DB_FILENAME, META_FILENAME, REPO_FOLDER, create_sqla_engine, extract_metadata, read_version + + +def get_schema_version_head() -> str: + """Return the head schema version for this storage, i.e. the latest schema this storage can be migrated to.""" + return _alembic_script().revision_map.get_current_head('main') or '' + + +def list_versions() -> List[str]: + """Return all available schema versions (oldest to latest).""" + legacy_versions = list(LEGACY_MIGRATE_FUNCTIONS) + [FINAL_LEGACY_VERSION] + alembic_versions = [entry.revision for entry in reversed(list(_alembic_script().walk_revisions()))] + return legacy_versions + alembic_versions + + +def validate_storage(inpath: Path) -> None: + """Validate that the storage is at the head version. + + :raises: :class:`aiida.common.exceptions.UnreachableStorage` if the file does not exist + :raises: :class:`aiida.common.exceptions.CorruptStorage` + if the version cannot be read from the storage. + :raises: :class:`aiida.common.exceptions.IncompatibleStorageSchema` + if the storage is not compatible with the code API. + """ + schema_version_code = get_schema_version_head() + schema_version_archive = read_version(inpath) + if schema_version_archive != schema_version_code: + raise IncompatibleStorageSchema( + f'Archive schema version `{schema_version_archive}` ' + f'is incompatible with the required schema version `{schema_version_code}`. ' + 'To migrate the archive schema version to the current one, ' + f'run the following command: verdi archive migrate {str(inpath)!r}' + ) + + +def migrate( # pylint: disable=too-many-branches,too-many-statements,too-many-locals + inpath: Union[str, Path], + outpath: Union[str, Path], + version: str, + *, + force: bool = False, + compression: int = 6 +) -> None: + """Migrate an `sqlite_zip` storage file to a specific version. + + Historically, this format could be a zip or a tar file, + contained the database as a bespoke JSON format, and the repository files in the "legacy" per-node format. + For these versions, we first migrate the JSON database to the final legacy schema, + then we convert this file to the SQLite database, whilst sequentially migrating the repository files. + + Once any legacy migrations have been performed, we can then migrate the SQLite database to the final schema, + using alembic. + + Note that, to minimise disk space usage, we never fully extract/uncompress the input file + (except when migrating from a legacy tar file, whereby we cannot extract individual files): + + 1. The sqlite database is extracted to a temporary location and migrated + 2. A new zip file is opened, within a temporary folder + 3. The repository files are "streamed" directly between the input file and the new zip file + 4. The sqlite database and metadata JSON are written to the new zip file + 5. The new zip file is closed (which writes its final central directory) + 6. The new zip file is moved to the output location, removing any existing file if `force=True` + + :param path: Path to the file + :param outpath: Path to output the migrated file + :param version: Target version + :param force: If True, overwrite the output file if it exists + :param compression: Compression level for the output file + """ + inpath = Path(inpath) + outpath = Path(outpath) + + # halt immediately, if we could not write to the output file + if outpath.exists() and not force: + raise StorageMigrationError('Output path already exists and force=False') + if outpath.exists() and not outpath.is_file(): + raise StorageMigrationError('Existing output path is not a file') + + # the file should be either a tar (legacy only) or zip file + if tarfile.is_tarfile(str(inpath)): + is_tar = True + elif zipfile.is_zipfile(str(inpath)): + is_tar = False + else: + raise CorruptStorage(f'The input file is neither a tar nor a zip file: {inpath}') + + # read the metadata.json which should always be present + metadata = extract_metadata(inpath, search_limit=None) + + # obtain the current version from the metadata + if 'export_version' not in metadata: + raise CorruptStorage('No export_version found in metadata') + current_version = metadata['export_version'] + # update the modified time of the file and the compression + metadata['mtime'] = datetime.now().isoformat() + metadata['compression'] = compression + + # check versions are valid + # versions 0.1, 0.2, 0.3 are no longer supported, + # since 0.3 -> 0.4 requires costly migrations of repo files (you would need to unpack all of them) + if current_version in ('0.1', '0.2', '0.3') or version in ('0.1', '0.2', '0.3'): + raise StorageMigrationError( + f"Legacy migration from '{current_version}' -> '{version}' is not supported in aiida-core v2. " + 'First migrate them to the latest version in aiida-core v1.' + ) + all_versions = list_versions() + if current_version not in all_versions: + raise StorageMigrationError(f"Unknown current version '{current_version}'") + if version not in all_versions: + raise StorageMigrationError(f"Unknown target version '{version}'") + + # if we are already at the desired version, then no migration is required, so simply copy the file if necessary + if current_version == version: + if inpath != outpath: + if outpath.exists() and force: + outpath.unlink() + shutil.copyfile(inpath, outpath) + return + + # if the archive is a "legacy" format, i.e. has a data.json file, migrate it to the target/final legacy schema + data: Optional[Dict[str, Any]] = None + if current_version in LEGACY_MIGRATE_FUNCTIONS: + MIGRATE_LOGGER.report(f'Legacy migrations required from {"tar" if is_tar else "zip"} format') + MIGRATE_LOGGER.report('Extracting data.json ...') + # read the data.json file + data = _read_json(inpath, 'data.json', is_tar) + to_version = FINAL_LEGACY_VERSION if version not in LEGACY_MIGRATE_FUNCTIONS else version + current_version = _perform_legacy_migrations(current_version, to_version, metadata, data) + + # if we are now at the target version, then write the updated files to a new zip file and exit + if current_version == version: + # create new legacy archive with updated metadata & data + def path_callback(inpath, outpath) -> bool: + if inpath.name == 'metadata.json': + outpath.write_text(json.dumps(metadata)) + return True + if inpath.name == 'data.json': + outpath.write_text(json.dumps(data)) + return True + return False + + func = copy_tar_to_zip if is_tar else copy_zip_to_zip + + func( + inpath, + outpath, + path_callback, + overwrite=force, + compression=compression, + title='Writing migrated legacy archive', + info_order=('metadata.json', 'data.json') + ) + return + + # open the temporary directory, to perform further migrations + with tempfile.TemporaryDirectory() as tmpdirname: + + # open the new zip file, within which to write the migrated content + new_zip_path = Path(tmpdirname) / 'new.zip' + central_dir: Dict[str, Any] = {} + with ZipPath( + new_zip_path, + mode='w', + compresslevel=compression, + name_to_info=central_dir, + # this ensures that the metadata and database files are written above the repository files, + # in in the central directory, so that they can be accessed easily + info_order=(META_FILENAME, DB_FILENAME) + ) as new_zip: + + written_repo = False + if current_version == FINAL_LEGACY_VERSION: + # migrate from the legacy format, + # streaming the repository files directly to the new zip file + MIGRATE_LOGGER.report( + f'legacy {FINAL_LEGACY_VERSION!r} -> {LEGACY_TO_MAIN_REVISION!r} conversion required' + ) + if data is None: + MIGRATE_LOGGER.report('Extracting data.json ...') + data = _read_json(inpath, 'data.json', is_tar) + db_path = perform_v1_migration(inpath, Path(tmpdirname), new_zip, central_dir, is_tar, metadata, data) + # the migration includes adding the repository files to the new zip file + written_repo = True + current_version = LEGACY_TO_MAIN_REVISION + else: + if is_tar: + raise CorruptStorage('Tar files are not supported for this format') + # extract the sqlite database, for alembic migrations + db_path = Path(tmpdirname) / DB_FILENAME + with db_path.open('wb') as handle: + try: + extract_file_in_zip(inpath, DB_FILENAME, handle) + except Exception as exc: + raise CorruptStorage(f'database could not be read: {exc}') from exc + + # perform alembic migrations + # note, we do this before writing the repository files (unless a legacy migration), + # so that we don't waste time doing that (which could be slow), only for alembic to fail + if current_version != version: + MIGRATE_LOGGER.report('Performing SQLite migrations:') + with _migration_context(db_path) as context: + assert context.script is not None + context.stamp(context.script, current_version) + context.connection.commit() # type: ignore + # see https://alembic.sqlalchemy.org/en/latest/batch.html#dealing-with-referencing-foreign-keys + # for why we do not enforce foreign keys here + with _alembic_connect(db_path, enforce_foreign_keys=False) as config: + upgrade(config, version) + update_metadata(metadata, version) + + if not written_repo: + # stream the repository files directly to the new zip file + with ZipPath(inpath, mode='r') as old_zip: + length = sum(1 for _ in old_zip.glob('**/*', include_virtual=False)) + title = 'Copying repository files' + with get_progress_reporter()(desc=title, total=length) as progress: + for subpath in old_zip.glob('**/*', include_virtual=False): + new_path_sub = new_zip.joinpath(subpath.at) + if subpath.parts[0] == REPO_FOLDER: + if subpath.is_dir(): + new_path_sub.mkdir(exist_ok=True) + else: + new_path_sub.putfile(subpath) + progress.update() + + MIGRATE_LOGGER.report('Finalising the migration ...') + + # write the final database file to the new zip file + with db_path.open('rb') as handle: + with (new_zip / DB_FILENAME).open(mode='wb') as handle2: + shutil.copyfileobj(handle, handle2) + + # write the final metadata.json file to the new zip file + (new_zip / META_FILENAME).write_text(json.dumps(metadata)) + + # on exiting the the ZipPath context, the zip file is closed and the central directory written + + # move the new zip file to the final location + if outpath.exists() and force: + outpath.unlink() + shutil.move(new_zip_path, outpath) # type: ignore[arg-type] + + +def _read_json(inpath: Path, filename: str, is_tar: bool) -> Dict[str, Any]: + """Read a JSON file from the archive.""" + if is_tar: + with open_file_in_tar(inpath, filename) as handle: + data = json.load(handle) + else: + with open_file_in_zip(inpath, filename) as handle: + data = json.load(handle) + return data + + +def _perform_legacy_migrations(current_version: str, to_version: str, metadata: dict, data: dict) -> str: + """Perform legacy migrations from the current version to the desired version. + + Legacy archives use the old ``data.json`` format for storing the database. + These migrations simply manipulate the metadata and data in-place. + + :param current_version: current version of the archive + :param to_version: version to migrate to + :param metadata: the metadata to migrate + :param data: the data to migrate + :return: the new version of the archive + """ + # compute the migration pathway + prev_version = current_version + pathway: List[str] = [] + while prev_version != to_version: + if prev_version not in LEGACY_MIGRATE_FUNCTIONS: + raise StorageMigrationError(f"No migration pathway available for '{current_version}' to '{to_version}'") + if prev_version in pathway: + raise StorageMigrationError( + f'cyclic migration pathway encountered: {" -> ".join(pathway + [prev_version])}' + ) + pathway.append(prev_version) + prev_version = LEGACY_MIGRATE_FUNCTIONS[prev_version][0] + + if not pathway: + MIGRATE_LOGGER.report('No migration required') + return to_version + + MIGRATE_LOGGER.report('Legacy migration pathway: %s', ' -> '.join(pathway + [to_version])) + + with get_progress_reporter()(total=len(pathway), desc='Performing migrations: ') as progress: + for from_version in pathway: + to_version = LEGACY_MIGRATE_FUNCTIONS[from_version][0] + progress.set_description_str(f'Performing migrations: {from_version} -> {to_version}', refresh=True) + LEGACY_MIGRATE_FUNCTIONS[from_version][1](metadata, data) + progress.update() + + return to_version + + +def _alembic_config() -> Config: + """Return an instance of an Alembic `Config`.""" + config = Config() + config.set_main_option('script_location', str(Path(os.path.realpath(__file__)).parent / 'migrations')) + return config + + +def _alembic_script() -> ScriptDirectory: + """Return an instance of an Alembic `ScriptDirectory`.""" + return ScriptDirectory.from_config(_alembic_config()) + + +@contextlib.contextmanager +def _alembic_connect(db_path: Path, enforce_foreign_keys=True) -> Iterator[Config]: + """Context manager to return an instance of an Alembic configuration. + + The profiles's database connection is added in the `attributes` property, through which it can then also be + retrieved, also in the `env.py` file, which is run when the database is migrated. + """ + with create_sqla_engine(db_path, enforce_foreign_keys=enforce_foreign_keys).connect() as connection: + config = _alembic_config() + config.attributes['connection'] = connection # pylint: disable=unsupported-assignment-operation + + def _callback(step: MigrationInfo, **kwargs): # pylint: disable=unused-argument + """Callback to be called after a migration step is executed.""" + from_rev = step.down_revision_ids[0] if step.down_revision_ids else '' + MIGRATE_LOGGER.report(f'- {from_rev} -> {step.up_revision_id}') + + config.attributes['on_version_apply'] = _callback # pylint: disable=unsupported-assignment-operation + + yield config + + +@contextlib.contextmanager +def _migration_context(db_path: Path) -> Iterator[MigrationContext]: + """Context manager to return an instance of an Alembic migration context. + + This migration context will have been configured with the current database connection, which allows this context + to be used to inspect the contents of the database, such as the current revision. + """ + with _alembic_connect(db_path) as config: + script = ScriptDirectory.from_config(config) + with EnvironmentContext(config, script) as context: + context.configure(context.config.attributes['connection']) + yield context.get_context() diff --git a/aiida/storage/sqlite_zip/models.py b/aiida/storage/sqlite_zip/models.py new file mode 100644 index 0000000000..7e637e4bb1 --- /dev/null +++ b/aiida/storage/sqlite_zip/models.py @@ -0,0 +1,167 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +"""This module contains the SQLAlchemy models for the SQLite backend. + +These models are intended to be identical to those of the `psql_dos` backend, +except for changes to the database specific types: + +- UUID -> CHAR(32) +- DateTime -> TZDateTime +- JSONB -> JSON + +Also, `varchar_pattern_ops` indexes are not possible in sqlite. +""" +from datetime import datetime +import functools +from typing import Any, Optional, Set, Tuple + +import pytz +import sqlalchemy as sa +from sqlalchemy import orm as sa_orm +from sqlalchemy.dialects.postgresql import JSONB, UUID +from sqlalchemy.dialects.sqlite import JSON + +from aiida.orm.entities import EntityTypes +# we need to import all models, to ensure they are loaded on the SQLA Metadata +from aiida.storage.psql_dos.models import authinfo, base, comment, computer, group, log, node, user + + +class SqliteModel: + """Represent a row in an sqlite database table""" + + def __repr__(self) -> str: + """Return a representation of the row columns""" + string = f'<{self.__class__.__name__}' + for col in self.__table__.columns: # type: ignore[attr-defined] # pylint: disable=no-member + # don't include columns with potentially large values + if isinstance(col.type, (JSON, sa.Text)): + continue + string += f' {col.name}={getattr(self, col.name)}' + return string + '>' + + +class TZDateTime(sa.TypeDecorator): # pylint: disable=abstract-method + """A timezone naive UTC ``DateTime`` implementation for SQLite. + + see: https://docs.sqlalchemy.org/en/14/core/custom_types.html#store-timezone-aware-timestamps-as-timezone-naive-utc + """ + impl = sa.DateTime + cache_ok = True + + def process_bind_param(self, value: Optional[datetime], dialect): + """Process before writing to database.""" + if value is None: + return value + if value.tzinfo is None: + value = value.astimezone(pytz.utc) + value = value.astimezone(pytz.utc).replace(tzinfo=None) + return value + + def process_result_value(self, value: Optional[datetime], dialect): + """Process when returning from database.""" + if value is None: + return value + if value.tzinfo is None: + return value.replace(tzinfo=pytz.utc) + return value.astimezone(pytz.utc) + + +SqliteBase = sa.orm.declarative_base( + cls=SqliteModel, name='SqliteModel', metadata=sa.MetaData(naming_convention=dict(base.naming_convention)) +) + + +def pg_to_sqlite(pg_table: sa.Table): + """Convert a model intended for PostGreSQL to one compatible with SQLite""" + new = pg_table.to_metadata(SqliteBase.metadata) + for column in new.columns: + if isinstance(column.type, UUID): + column.type = sa.String(32) + elif isinstance(column.type, sa.DateTime): + column.type = TZDateTime() + elif isinstance(column.type, JSONB): + column.type = JSON() + # remove any postgresql specific indexes, e.g. varchar_pattern_ops + new.indexes.difference_update([idx for idx in new.indexes if idx.dialect_kwargs]) + return new + + +def create_orm_cls(klass: base.Base) -> SqliteBase: + """Create an ORM class from an existing table in the declarative meta""" + tbl = SqliteBase.metadata.tables[klass.__tablename__] + return type( # type: ignore[return-value] + klass.__name__, + (SqliteBase,), + { + '__tablename__': tbl.name, + '__table__': tbl, + **{col.name if col.name != 'metadata' else '_metadata': col for col in tbl.columns}, + }, + ) + + +for table in base.Base.metadata.sorted_tables: + pg_to_sqlite(table) + +DbUser = create_orm_cls(user.DbUser) +DbComputer = create_orm_cls(computer.DbComputer) +DbAuthInfo = create_orm_cls(authinfo.DbAuthInfo) +DbGroup = create_orm_cls(group.DbGroup) +DbNode = create_orm_cls(node.DbNode) +DbGroupNodes = create_orm_cls(group.DbGroupNode) +DbComment = create_orm_cls(comment.DbComment) +DbLog = create_orm_cls(log.DbLog) +DbLink = create_orm_cls(node.DbLink) + +# to-do ideally these relationships should be auto-generated in `create_orm_cls`, but this proved difficult +DbAuthInfo.aiidauser = sa_orm.relationship( # type: ignore[attr-defined] + 'DbUser', backref=sa_orm.backref('authinfos', passive_deletes=True, cascade='all, delete') +) +DbAuthInfo.dbcomputer = sa_orm.relationship( # type: ignore[attr-defined] + 'DbComputer', backref=sa_orm.backref('authinfos', passive_deletes=True, cascade='all, delete') +) +DbComment.dbnode = sa_orm.relationship('DbNode', backref='dbcomments') # type: ignore[attr-defined] +DbComment.user = sa_orm.relationship('DbUser') # type: ignore[attr-defined] +DbGroup.user = sa_orm.relationship( # type: ignore[attr-defined] + 'DbUser', backref=sa_orm.backref('dbgroups', cascade='merge') +) +DbGroup.dbnodes = sa_orm.relationship( # type: ignore[attr-defined] + 'DbNode', secondary='db_dbgroup_dbnodes', backref='dbgroups', lazy='dynamic' +) +DbLog.dbnode = sa_orm.relationship( # type: ignore[attr-defined] + 'DbNode', backref=sa_orm.backref('dblogs', passive_deletes='all', cascade='merge') +) +DbNode.dbcomputer = sa_orm.relationship( # type: ignore[attr-defined] + 'DbComputer', backref=sa_orm.backref('dbnodes', passive_deletes='all', cascade='merge') +) +DbNode.user = sa_orm.relationship('DbUser', backref=sa_orm.backref( # type: ignore[attr-defined] + 'dbnodes', + passive_deletes='all', + cascade='merge', +)) + + +@functools.lru_cache(maxsize=10) +def get_model_from_entity(entity_type: EntityTypes) -> Tuple[Any, Set[str]]: + """Return the Sqlalchemy model and column names corresponding to the given entity.""" + model = { + EntityTypes.USER: DbUser, + EntityTypes.AUTHINFO: DbAuthInfo, + EntityTypes.GROUP: DbGroup, + EntityTypes.NODE: DbNode, + EntityTypes.COMMENT: DbComment, + EntityTypes.COMPUTER: DbComputer, + EntityTypes.LOG: DbLog, + EntityTypes.LINK: DbLink, + EntityTypes.GROUP_NODE: DbGroupNodes + }[entity_type] + mapper = sa.inspect(model).mapper + column_names = {col.name for col in mapper.c.values()} + return model, column_names diff --git a/aiida/storage/sqlite_zip/utils.py b/aiida/storage/sqlite_zip/utils.py new file mode 100644 index 0000000000..cd2838314e --- /dev/null +++ b/aiida/storage/sqlite_zip/utils.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +"""Utilities for this backend.""" +from pathlib import Path +import tarfile +from typing import Any, Dict, Optional, Union +import zipfile + +from archive_path import read_file_in_tar, read_file_in_zip +from sqlalchemy import event +from sqlalchemy.future.engine import Engine, create_engine + +from aiida.common import json +from aiida.common.exceptions import CorruptStorage, UnreachableStorage + +META_FILENAME = 'metadata.json' +"""The filename containing meta information about the storage instance.""" + +DB_FILENAME = 'db.sqlite3' +"""The filename of the SQLite database.""" + +REPO_FOLDER = 'repo' +"""The name of the folder containing the repository files.""" + + +def sqlite_enforce_foreign_keys(dbapi_connection, _): + """Enforce foreign key constraints, when using sqlite backend (off by default)""" + cursor = dbapi_connection.cursor() + cursor.execute('PRAGMA foreign_keys=ON;') + cursor.close() + + +def create_sqla_engine(path: Union[str, Path], *, enforce_foreign_keys: bool = True, **kwargs) -> Engine: + """Create a new engine instance.""" + engine = create_engine( + f'sqlite:///{path}', + json_serializer=json.dumps, + json_deserializer=json.loads, + encoding='utf-8', + future=True, + **kwargs + ) + if enforce_foreign_keys: + event.listen(engine, 'connect', sqlite_enforce_foreign_keys) + return engine + + +def extract_metadata(path: Union[str, Path], *, search_limit: Optional[int] = 10) -> Dict[str, Any]: + """Extract the metadata dictionary from the archive. + + :param search_limit: the maximum number of records to search for the metadata file in a zip file. + """ + path = Path(path) + if not path.exists(): + raise UnreachableStorage(f'path not found: {path}') + + if path.is_dir(): + if not path.joinpath(META_FILENAME).is_file(): + raise CorruptStorage('Could not find metadata file') + try: + metadata = json.loads(path.joinpath(META_FILENAME).read_text(encoding='utf8')) + except Exception as exc: + raise CorruptStorage(f'Could not read metadata: {exc}') from exc + elif path.is_file() and zipfile.is_zipfile(path): + try: + metadata = json.loads(read_file_in_zip(path, META_FILENAME, search_limit=search_limit)) + except Exception as exc: + raise CorruptStorage(f'Could not read metadata: {exc}') from exc + elif path.is_file() and tarfile.is_tarfile(path): + try: + metadata = json.loads(read_file_in_tar(path, META_FILENAME)) + except Exception as exc: + raise CorruptStorage(f'Could not read metadata: {exc}') from exc + else: + raise CorruptStorage('Path not a folder, zip or tar file') + + if not isinstance(metadata, dict): + raise CorruptStorage(f'Metadata is not a dictionary: {type(metadata)}') + + return metadata + + +def read_version(path: Union[str, Path], *, search_limit: Optional[int] = None) -> str: + """Read the version of the storage instance from the path. + + This is intended to work for all versions of the storage format. + + :param path: path to storage instance, either a folder, zip file or tar file. + :param search_limit: the maximum number of records to search for the metadata file in a zip file. + + :raises: ``UnreachableStorage`` if a version cannot be read from the file + """ + metadata = extract_metadata(path, search_limit=search_limit) + if 'export_version' in metadata: + return metadata['export_version'] + + raise CorruptStorage("Metadata does not contain 'export_version' key") diff --git a/aiida/tools/archive/__init__.py b/aiida/tools/archive/__init__.py index 4252c80745..735e4dc43d 100644 --- a/aiida/tools/archive/__init__.py +++ b/aiida/tools/archive/__init__.py @@ -17,7 +17,6 @@ # pylint: disable=wildcard-import from .abstract import * -from .common import * from .create import * from .exceptions import * from .implementations import * @@ -28,10 +27,8 @@ 'ArchiveFormatAbstract', 'ArchiveFormatSqlZip', 'ArchiveImportError', - 'ArchiveMigrationError', 'ArchiveReaderAbstract', 'ArchiveWriterAbstract', - 'CorruptArchive', 'EXPORT_LOGGER', 'ExportImportException', 'ExportValidationError', @@ -39,9 +36,6 @@ 'ImportTestRun', 'ImportUniquenessError', 'ImportValidationError', - 'IncompatibleArchiveVersionError', - 'MIGRATE_LOGGER', - 'MigrationValidationError', 'create_archive', 'get_format', 'import_archive', diff --git a/aiida/tools/archive/abstract.py b/aiida/tools/archive/abstract.py index b45eded9a6..08a5cb9ad8 100644 --- a/aiida/tools/archive/abstract.py +++ b/aiida/tools/archive/abstract.py @@ -141,7 +141,7 @@ def __exit__(self, *args, **kwargs) -> None: def get_metadata(self) -> Dict[str, Any]: """Return the top-level metadata. - :raises: ``UnreadableArchiveError`` if the top-level metadata cannot be read from the archive + :raises: ``CorruptStorage`` if the top-level metadata cannot be read from the archive """ @abstractmethod @@ -180,13 +180,8 @@ class ArchiveFormatAbstract(ABC): @property @abstractmethod - def versions(self) -> List[str]: - """Return ordered list of versions of the archive format, oldest -> latest.""" - - @property def latest_version(self) -> str: - """Return the latest version of the archive format.""" - return self.versions[-1] + """Return the latest schema version of the archive format.""" @property @abstractmethod @@ -201,8 +196,8 @@ def read_version(self, path: Union[str, Path]) -> str: :param path: archive path - :raises: ``FileNotFoundError`` if the file does not exist - :raises: ``UnreadableArchiveError`` if a version cannot be read from the archive + :raises: ``UnreachableStorage`` if the file does not exist + :raises: ``CorruptStorage`` if a version cannot be read from the archive """ @overload @@ -279,13 +274,13 @@ def migrate( """ -def get_format(name: str = 'sqlitezip') -> ArchiveFormatAbstract: +def get_format(name: str = 'sqlite_zip') -> ArchiveFormatAbstract: """Get the archive format instance. :param name: name of the archive format :return: archive format instance """ # to-do entry point for archive formats? - assert name == 'sqlitezip' - from aiida.tools.archive.implementations.sqlite.main import ArchiveFormatSqlZip + assert name == 'sqlite_zip' + from aiida.tools.archive.implementations.sqlite_zip.main import ArchiveFormatSqlZip return ArchiveFormatSqlZip() diff --git a/aiida/tools/archive/common.py b/aiida/tools/archive/common.py index a6bdce8094..0411dd2bcc 100644 --- a/aiida/tools/archive/common.py +++ b/aiida/tools/archive/common.py @@ -13,14 +13,9 @@ import urllib.parse import urllib.request -from aiida.common.log import AIIDA_LOGGER from aiida.orm import AuthInfo, Comment, Computer, Entity, Group, Log, Node, User from aiida.orm.entities import EntityTypes -__all__ = ('MIGRATE_LOGGER',) - -MIGRATE_LOGGER = AIIDA_LOGGER.getChild('migrate') - # Mapping from entity names to AiiDA classes entity_type_to_orm: Dict[EntityTypes, Type[Entity]] = { EntityTypes.AUTHINFO: AuthInfo, diff --git a/aiida/tools/archive/create.py b/aiida/tools/archive/create.py index edd60d5132..acb5a200fe 100644 --- a/aiida/tools/archive/create.py +++ b/aiida/tools/archive/create.py @@ -36,7 +36,7 @@ from .abstract import ArchiveFormatAbstract, ArchiveWriterAbstract from .common import batch_iter, entity_type_to_orm from .exceptions import ArchiveExportError, ExportValidationError -from .implementations.sqlite import ArchiveFormatSqlZip +from .implementations.sqlite_zip import ArchiveFormatSqlZip __all__ = ('create_archive', 'EXPORT_LOGGER') @@ -281,13 +281,12 @@ def create_archive( writer.update_metadata({ 'ctime': datetime.now().isoformat(), 'creation_parameters': { - 'entities_starting_set': + 'entities_starting_set': None if entities is None else {etype.value: list(unique) for etype, unique in starting_uuids.items() if unique}, 'include_authinfos': include_authinfos, 'include_comments': include_comments, 'include_logs': include_logs, 'graph_traversal_rules': full_traversal_rules, - 'entity_counts': dict(count_summary), # type: ignore } }) # stream entity data to the archive diff --git a/aiida/tools/archive/exceptions.py b/aiida/tools/archive/exceptions.py index 1ad358308f..05db839a36 100644 --- a/aiida/tools/archive/exceptions.py +++ b/aiida/tools/archive/exceptions.py @@ -19,11 +19,7 @@ 'ExportImportException', 'ArchiveExportError', 'ExportValidationError', - 'CorruptArchive', - 'ArchiveMigrationError', - 'MigrationValidationError', 'ArchiveImportError', - 'IncompatibleArchiveVersionError', 'ImportValidationError', 'ImportUniquenessError', 'ImportTestRun', @@ -42,22 +38,10 @@ class ExportValidationError(ArchiveExportError): """Raised when validation fails during export, e.g. for non-sealed ``ProcessNode`` s.""" -class UnreadableArchiveError(ArchiveExportError): - """Raised when the version cannot be extracted from the archive.""" - - -class CorruptArchive(ExportImportException): - """Raised when an operation is applied to a corrupt export archive, e.g. missing files or invalid formats.""" - - class ArchiveImportError(ExportImportException): """Base class for all AiiDA import exceptions.""" -class IncompatibleArchiveVersionError(ExportImportException): - """Raised when trying to import an export archive with an incompatible schema version.""" - - class ImportUniquenessError(ArchiveImportError): """Raised when the user tries to violate a uniqueness constraint. @@ -71,25 +55,3 @@ class ImportValidationError(ArchiveImportError): class ImportTestRun(ArchiveImportError): """Raised during an import, before the transaction is commited.""" - - -class ArchiveMigrationError(ExportImportException): - """Base class for all AiiDA export archive migration exceptions.""" - - -class MigrationValidationError(ArchiveMigrationError): - """Raised when validation fails during migration of export archives.""" - - -class ReadOnlyError(IOError): - """Raised when a write operation is called on a read-only archive.""" - - def __init__(self, msg='Archive is read-only'): # pylint: disable=useless-super-delegation - super().__init__(msg) - - -class ArchiveClosedError(IOError): - """Raised when the archive is closed.""" - - def __init__(self, msg='Archive is closed'): # pylint: disable=useless-super-delegation - super().__init__(msg) diff --git a/aiida/tools/archive/implementations/__init__.py b/aiida/tools/archive/implementations/__init__.py index 6f85411389..fed227acb2 100644 --- a/aiida/tools/archive/implementations/__init__.py +++ b/aiida/tools/archive/implementations/__init__.py @@ -14,7 +14,7 @@ # yapf: disable # pylint: disable=wildcard-import -from .sqlite import * +from .sqlite_zip import * __all__ = ( 'ArchiveFormatSqlZip', diff --git a/aiida/tools/archive/implementations/sqlite/backend.py b/aiida/tools/archive/implementations/sqlite/backend.py deleted file mode 100644 index 934dd2bf1b..0000000000 --- a/aiida/tools/archive/implementations/sqlite/backend.py +++ /dev/null @@ -1,469 +0,0 @@ -# -*- coding: utf-8 -*- -########################################################################### -# Copyright (c), The AiiDA team. All rights reserved. # -# This file is part of the AiiDA code. # -# # -# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # -# For further information on the license, see the LICENSE.txt file # -# For further information please visit http://www.aiida.net # -########################################################################### -"""The table models are dynamically generated from the sqlalchemy backend models.""" -from contextlib import contextmanager -from datetime import datetime -from functools import singledispatch -from pathlib import Path -import tempfile -from typing import BinaryIO, Iterable, Iterator, List, Optional, Sequence, Tuple, Type, cast -import zipfile -from zipfile import ZipFile - -from archive_path import extract_file_in_zip -import pytz -from sqlalchemy import CHAR, Text, orm, types -from sqlalchemy.dialects.postgresql import JSONB, UUID -from sqlalchemy.dialects.sqlite import JSON -from sqlalchemy.sql.schema import Table - -from aiida.common.exceptions import UnreachableStorage -from aiida.manage import Profile -from aiida.orm.entities import EntityTypes -from aiida.orm.implementation import StorageBackend -from aiida.repository.backend.abstract import AbstractRepositoryBackend -# we need to import all models, to ensure they are loaded on the SQLA Metadata -from aiida.storage.psql_dos.models import authinfo, base, comment, computer, group, log, node, user -from aiida.storage.psql_dos.orm import authinfos, comments, computers, entities, groups, logs, nodes, users -from aiida.storage.psql_dos.orm.querybuilder import SqlaQueryBuilder -from aiida.storage.psql_dos.orm.utils import ModelWrapper -from aiida.tools.archive.exceptions import ArchiveClosedError, CorruptArchive, ReadOnlyError - -from .common import DB_FILENAME, REPO_FOLDER, create_sqla_engine - - -class SqliteModel: - """Represent a row in an sqlite database table""" - - def __repr__(self) -> str: - """Return a representation of the row columns""" - string = f'<{self.__class__.__name__}' - for col in self.__table__.columns: # type: ignore[attr-defined] # pylint: disable=no-member - # don't include columns with potentially large values - if isinstance(col.type, (JSON, Text)): - continue - string += f' {col.name}={getattr(self, col.name)}' - return string + '>' - - -class TZDateTime(types.TypeDecorator): # pylint: disable=abstract-method - """A timezone naive UTC ``DateTime`` implementation for SQLite. - - see: https://docs.sqlalchemy.org/en/14/core/custom_types.html#store-timezone-aware-timestamps-as-timezone-naive-utc - """ - impl = types.DateTime - cache_ok = True - - def process_bind_param(self, value: Optional[datetime], dialect): - """Process before writing to database.""" - if value is None: - return value - if value.tzinfo is None: - value = value.astimezone(pytz.utc) - value = value.astimezone(pytz.utc).replace(tzinfo=None) - return value - - def process_result_value(self, value: Optional[datetime], dialect): - """Process when returning from database.""" - if value is None: - return value - if value.tzinfo is None: - return value.replace(tzinfo=pytz.utc) - return value.astimezone(pytz.utc) - - -ArchiveDbBase = orm.declarative_base(cls=SqliteModel, name='SqliteModel') - - -def pg_to_sqlite(pg_table: Table): - """Convert a model intended for PostGreSQL to one compatible with SQLite""" - new = pg_table.to_metadata(ArchiveDbBase.metadata) - for column in new.columns: - if isinstance(column.type, UUID): - column.type = CHAR(32) - elif isinstance(column.type, types.DateTime): - column.type = TZDateTime() - elif isinstance(column.type, JSONB): - column.type = JSON() - return new - - -def create_orm_cls(klass: base.Base) -> ArchiveDbBase: - """Create an ORM class from an existing table in the declarative meta""" - tbl = ArchiveDbBase.metadata.tables[klass.__tablename__] - return type( # type: ignore[return-value] - klass.__name__, - (ArchiveDbBase,), - { - '__tablename__': tbl.name, - '__table__': tbl, - **{col.name if col.name != 'metadata' else '_metadata': col for col in tbl.columns}, - }, - ) - - -for table in base.Base.metadata.sorted_tables: - pg_to_sqlite(table) - -DbUser = create_orm_cls(user.DbUser) -DbComputer = create_orm_cls(computer.DbComputer) -DbAuthInfo = create_orm_cls(authinfo.DbAuthInfo) -DbGroup = create_orm_cls(group.DbGroup) -DbNode = create_orm_cls(node.DbNode) -DbGroupNodes = create_orm_cls(group.DbGroupNode) -DbComment = create_orm_cls(comment.DbComment) -DbLog = create_orm_cls(log.DbLog) -DbLink = create_orm_cls(node.DbLink) - -# to-do This was the minimum for creating a graph, but really all relationships should be copied -DbNode.dbcomputer = orm.relationship('DbComputer', backref='dbnodes') # type: ignore[attr-defined] -DbGroup.dbnodes = orm.relationship( # type: ignore[attr-defined] - 'DbNode', secondary='db_dbgroup_dbnodes', backref='dbgroups', lazy='dynamic' -) - - -class ZipfileBackendRepository(AbstractRepositoryBackend): - """A read-only backend for an open zip file.""" - - def __init__(self, file: ZipFile): - self._zipfile = file - - @property - def zipfile(self) -> ZipFile: - if self._zipfile.fp is None: - raise ArchiveClosedError() - return self._zipfile - - @property - def uuid(self) -> Optional[str]: - return None - - @property - def key_format(self) -> Optional[str]: - return 'sha256' - - def initialise(self, **kwargs) -> None: - pass - - @property - def is_initialised(self) -> bool: - return True - - def erase(self) -> None: - raise ReadOnlyError() - - def _put_object_from_filelike(self, handle: BinaryIO) -> str: - raise ReadOnlyError() - - def has_object(self, key: str) -> bool: - try: - self.zipfile.getinfo(f'{REPO_FOLDER}/{key}') - except KeyError: - return False - return True - - def has_objects(self, keys: List[str]) -> List[bool]: - return [self.has_object(key) for key in keys] - - def list_objects(self) -> Iterable[str]: - for name in self.zipfile.namelist(): - if name.startswith(REPO_FOLDER + '/') and name[len(REPO_FOLDER) + 1:]: - yield name[len(REPO_FOLDER) + 1:] - - @contextmanager - def open(self, key: str) -> Iterator[BinaryIO]: - try: - handle = self.zipfile.open(f'{REPO_FOLDER}/{key}') - yield cast(BinaryIO, handle) - except KeyError: - raise FileNotFoundError(f'object with key `{key}` does not exist.') - finally: - handle.close() - - def iter_object_streams(self, keys: List[str]) -> Iterator[Tuple[str, BinaryIO]]: - for key in keys: - with self.open(key) as handle: # pylint: disable=not-context-manager - yield key, handle - - def delete_objects(self, keys: List[str]) -> None: - raise ReadOnlyError() - - def get_object_hash(self, key: str) -> str: - return key - - def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None: - raise NotImplementedError - - def get_info(self, statistics: bool = False, **kwargs) -> dict: - return {'objects': {'count': len(list(self.list_objects()))}} - - -class ArchiveBackendQueryBuilder(SqlaQueryBuilder): - """Archive query builder""" - - @property - def Node(self): - return DbNode - - @property - def Link(self): - return DbLink - - @property - def Computer(self): - return DbComputer - - @property - def User(self): - return DbUser - - @property - def Group(self): - return DbGroup - - @property - def AuthInfo(self): - return DbAuthInfo - - @property - def Comment(self): - return DbComment - - @property - def Log(self): - return DbLog - - @property - def table_groups_nodes(self): - return DbGroupNodes.__table__ # type: ignore[attr-defined] # pylint: disable=no-member - - -class ArchiveReadOnlyBackend(StorageBackend): # pylint: disable=too-many-public-methods - """A read-only backend for the archive.""" - - @classmethod - def version_head(cls) -> str: - raise NotImplementedError - - @classmethod - def version_profile(cls, profile: Profile) -> None: - raise NotImplementedError - - @classmethod - def migrate(cls, profile: Profile): - raise ReadOnlyError() - - def __init__(self, profile: Profile): - super().__init__(profile) - self._path = Path(profile.storage_config['path']) - if not self._path.is_file(): - raise UnreachableStorage(f'archive file `{self._path}` does not exist.') - # lazy open the archive zipfile and extract the database file - self._db_file: Optional[Path] = None - self._session: Optional[orm.Session] = None - self._zipfile: Optional[zipfile.ZipFile] = None - self._closed = False - - def __str__(self) -> str: - state = 'closed' if self.is_closed else 'open' - return f'Aiida archive (read-only) [{state}] @ {self._path}' - - @property - def is_closed(self) -> bool: - return self._closed - - def close(self): - """Close the backend""" - if self._session: - self._session.close() - if self._db_file and self._db_file.exists(): - self._db_file.unlink() - if self._zipfile: - self._zipfile.close() - self._session = None - self._db_file = None - self._zipfile = None - self._closed = True - - def get_session(self) -> orm.Session: - """Return an SQLAlchemy session.""" - if self._closed: - raise ArchiveClosedError() - if self._db_file is None: - _, path = tempfile.mkstemp() - self._db_file = Path(path) - with self._db_file.open('wb') as handle: - try: - extract_file_in_zip(self._path, DB_FILENAME, handle, search_limit=4) - except Exception as exc: - raise CorruptArchive(f'database could not be read: {exc}') from exc - if self._session is None: - self._session = orm.Session(create_sqla_engine(self._db_file)) - return self._session - - def get_repository(self) -> ZipfileBackendRepository: - if self._closed: - raise ArchiveClosedError() - if self._zipfile is None: - self._zipfile = ZipFile(self._path, mode='r') # pylint: disable=consider-using-with - return ZipfileBackendRepository(self._zipfile) - - def query(self) -> ArchiveBackendQueryBuilder: - return ArchiveBackendQueryBuilder(self) - - def get_backend_entity(self, res): # pylint: disable=no-self-use - """Return the backend entity that corresponds to the given Model instance.""" - klass = get_backend_entity(res) - return klass(self, res) - - @property - def authinfos(self): - return create_backend_collection(authinfos.SqlaAuthInfoCollection, self, authinfos.SqlaAuthInfo, DbAuthInfo) - - @property - def comments(self): - return create_backend_collection(comments.SqlaCommentCollection, self, comments.SqlaComment, DbComment) - - @property - def computers(self): - return create_backend_collection(computers.SqlaComputerCollection, self, computers.SqlaComputer, DbComputer) - - @property - def groups(self): - return create_backend_collection(groups.SqlaGroupCollection, self, groups.SqlaGroup, DbGroup) - - @property - def logs(self): - return create_backend_collection(logs.SqlaLogCollection, self, logs.SqlaLog, DbLog) - - @property - def nodes(self): - return create_backend_collection(nodes.SqlaNodeCollection, self, nodes.SqlaNode, DbNode) - - @property - def users(self): - return create_backend_collection(users.SqlaUserCollection, self, users.SqlaUser, DbUser) - - def _clear(self, recreate_user: bool = True) -> None: - raise ReadOnlyError() - - def transaction(self): - raise ReadOnlyError() - - @property - def in_transaction(self) -> bool: - return False - - def bulk_insert(self, entity_type: EntityTypes, rows: List[dict], allow_defaults: bool = False) -> List[int]: - raise ReadOnlyError() - - def bulk_update(self, entity_type: EntityTypes, rows: List[dict]) -> None: - raise ReadOnlyError() - - def delete_nodes_and_connections(self, pks_to_delete: Sequence[int]): - raise ReadOnlyError() - - def get_global_variable(self, key: str): - raise NotImplementedError - - def set_global_variable(self, key: str, value, description: Optional[str] = None, overwrite=True) -> None: - raise ReadOnlyError() - - def maintain(self, full: bool = False, dry_run: bool = False, **kwargs) -> None: - raise NotImplementedError - - def get_info(self, statistics: bool = False) -> dict: - results = super().get_info(statistics=statistics) - results['repository'] = self.get_repository().get_info(statistics) - return results - - -def create_backend_cls(base_class, model_cls): - """Create an archive backend class for the given model class.""" - - class ReadOnlyEntityBackend(base_class): # type: ignore - """Backend class for the read-only archive.""" - - MODEL_CLASS = model_cls - - def __init__(self, _backend, model): - """Initialise the backend entity.""" - self._backend = _backend - self._model = ModelWrapper(model, _backend) - - @property - def model(self) -> ModelWrapper: - """Return an ORM model that correctly updates and flushes the data model when getting or setting a field.""" - return self._model - - @property - def bare_model(self): - """Return the underlying SQLAlchemy ORM model for this entity.""" - return self.model._model # pylint: disable=protected-access - - @classmethod - def from_dbmodel(cls, model, _backend): - return cls(_backend, model) - - @property - def is_stored(self): - return True - - def store(self): # pylint: disable=no-self-use - return ReadOnlyError() - - return ReadOnlyEntityBackend - - -def create_backend_collection(cls, _backend, entity_cls, model): - collection = cls(_backend) - new_cls = create_backend_cls(entity_cls, model) - collection.ENTITY_CLASS = new_cls - return collection - - -@singledispatch -def get_backend_entity(dbmodel) -> Type[entities.SqlaModelEntity]: # pylint: disable=unused-argument - raise TypeError(f'Cannot get backend entity for {dbmodel}') - - -@get_backend_entity.register(DbAuthInfo) # type: ignore[call-overload] -def _(dbmodel): - return create_backend_cls(authinfos.SqlaAuthInfo, dbmodel.__class__) - - -@get_backend_entity.register(DbComment) # type: ignore[call-overload] -def _(dbmodel): - return create_backend_cls(comments.SqlaComment, dbmodel.__class__) - - -@get_backend_entity.register(DbComputer) # type: ignore[call-overload] -def _(dbmodel): - return create_backend_cls(computers.SqlaComputer, dbmodel.__class__) - - -@get_backend_entity.register(DbGroup) # type: ignore[call-overload] -def _(dbmodel): - return create_backend_cls(groups.SqlaGroup, dbmodel.__class__) - - -@get_backend_entity.register(DbLog) # type: ignore[call-overload] -def _(dbmodel): - return create_backend_cls(logs.SqlaLog, dbmodel.__class__) - - -@get_backend_entity.register(DbNode) # type: ignore[call-overload] -def _(dbmodel): - return create_backend_cls(nodes.SqlaNode, dbmodel.__class__) - - -@get_backend_entity.register(DbUser) # type: ignore[call-overload] -def _(dbmodel): - return create_backend_cls(users.SqlaUser, dbmodel.__class__) diff --git a/aiida/tools/archive/implementations/sqlite/migrations/main.py b/aiida/tools/archive/implementations/sqlite/migrations/main.py deleted file mode 100644 index b0a69ac0f9..0000000000 --- a/aiida/tools/archive/implementations/sqlite/migrations/main.py +++ /dev/null @@ -1,187 +0,0 @@ -# -*- coding: utf-8 -*- -########################################################################### -# Copyright (c), The AiiDA team. All rights reserved. # -# This file is part of the AiiDA code. # -# # -# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # -# For further information on the license, see the LICENSE.txt file # -# For further information please visit http://www.aiida.net # -########################################################################### -"""AiiDA archive migrator implementation.""" -from pathlib import Path -import shutil -import tarfile -import tempfile -from typing import Any, Dict, List, Optional, Union -import zipfile - -from archive_path import open_file_in_tar, open_file_in_zip - -from aiida.common import json -from aiida.common.progress_reporter import get_progress_reporter -from aiida.tools.archive.common import MIGRATE_LOGGER -from aiida.tools.archive.exceptions import ArchiveMigrationError, CorruptArchive - -from ..common import copy_tar_to_zip, copy_zip_to_zip -from .legacy import FINAL_LEGACY_VERSION, LEGACY_MIGRATE_FUNCTIONS -from .legacy_to_new import perform_v1_migration - -ALL_VERSIONS = ['0.4', '0.5', '0.6', '0.7', '0.8', '0.9', '0.10', '0.11', '0.12', '1.0'] - - -def migrate( # pylint: disable=too-many-branches,too-many-statements - inpath: Union[str, Path], - outpath: Union[str, Path], - current_version: str, - version: str, - *, - force: bool = False, - compression: int = 6 -) -> None: - """Migrate an archive to a specific version. - - :param path: archive path - """ - inpath = Path(inpath) - outpath = Path(outpath) - - if outpath.exists() and not force: - raise IOError('Output path already exists and force=False') - if outpath.exists() and not outpath.is_file(): - raise IOError('Existing output path is not a file') - - # check versions are valid - # versions 0.1, 0.2, 0.3 are no longer supported, - # since 0.3 -> 0.4 requires costly migrations of repo files (you would need to unpack all of them) - if current_version in ('0.1', '0.2', '0.3') or version in ('0.1', '0.2', '0.3'): - raise ArchiveMigrationError( - f"Migration from '{current_version}' -> '{version}' is not supported in aiida-core v2" - ) - if current_version not in ALL_VERSIONS: - raise ArchiveMigrationError(f"Unknown current version '{current_version}'") - if version not in ALL_VERSIONS: - raise ArchiveMigrationError(f"Unknown target version '{version}'") - - # if we are already at the desired version, then no migration is required - if current_version == version: - if inpath != outpath: - if outpath.exists() and force: - outpath.unlink() - shutil.copyfile(inpath, outpath) - return - - # the file should be either a tar (legacy only) or zip file - if tarfile.is_tarfile(str(inpath)): - is_tar = True - elif zipfile.is_zipfile(str(inpath)): - is_tar = False - else: - raise CorruptArchive(f'The input file is neither a tar nor a zip file: {inpath}') - - # read the metadata.json which should always be present - metadata = _read_json(inpath, 'metadata.json', is_tar) - # data.json will only be read from legacy archives - data: Optional[Dict[str, Any]] = None - - # if the archive is a "legacy" format, i.e. has a data.json file, migrate to latest one - if current_version in LEGACY_MIGRATE_FUNCTIONS: - MIGRATE_LOGGER.report('Legacy migrations required') - MIGRATE_LOGGER.report('Extracting data.json ...') - # read the data.json file - data = _read_json(inpath, 'data.json', is_tar) - to_version = FINAL_LEGACY_VERSION if version not in LEGACY_MIGRATE_FUNCTIONS else version - current_version = _perform_legacy_migrations(current_version, to_version, metadata, data) - - if current_version == version: - # create new legacy archive with updated metadata & data - def path_callback(inpath, outpath) -> bool: - if inpath.name == 'metadata.json': - outpath.write_text(json.dumps(metadata)) - return True - if inpath.name == 'data.json': - outpath.write_text(json.dumps(data)) - return True - return False - - func = copy_tar_to_zip if is_tar else copy_zip_to_zip - - func( - inpath, - outpath, - path_callback, - overwrite=force, - compression=compression, - title='Writing migrated legacy archive', - info_order=('metadata.json', 'data.json') - ) - return - - with tempfile.TemporaryDirectory() as tmpdirname: - - if current_version == FINAL_LEGACY_VERSION: - MIGRATE_LOGGER.report('aiida-core v1 -> v2 migration required') - if data is None: - MIGRATE_LOGGER.report('Extracting data.json ...') - data = _read_json(inpath, 'data.json', is_tar) - current_version = perform_v1_migration( - inpath, Path(tmpdirname), 'new.zip', is_tar, metadata, data, compression - ) - - if not current_version == version: - raise ArchiveMigrationError(f"Migration from '{current_version}' -> '{version}' failed") - - if outpath.exists() and force: - outpath.unlink() - shutil.move(Path(tmpdirname) / 'new.zip', outpath) # type: ignore[arg-type] - - -def _read_json(inpath: Path, filename: str, is_tar: bool) -> Dict[str, Any]: - """Read a JSON file from the archive.""" - if is_tar: - with open_file_in_tar(inpath, filename) as handle: - data = json.load(handle) - else: - with open_file_in_zip(inpath, filename) as handle: - data = json.load(handle) - return data - - -def _perform_legacy_migrations(current_version: str, to_version: str, metadata: dict, data: dict) -> str: - """Perform legacy migrations from the current version to the desired version. - - Legacy archives use the old ``data.json`` format for storing the database. - These migrations simply manipulate the metadata and data in-place. - - :param current_version: current version of the archive - :param to_version: version to migrate to - :param metadata: the metadata to migrate - :param data: the data to migrate - :return: the new version of the archive - """ - # compute the migration pathway - prev_version = current_version - pathway: List[str] = [] - while prev_version != to_version: - if prev_version not in LEGACY_MIGRATE_FUNCTIONS: - raise ArchiveMigrationError(f"No migration pathway available for '{current_version}' to '{to_version}'") - if prev_version in pathway: - raise ArchiveMigrationError( - f'cyclic migration pathway encountered: {" -> ".join(pathway + [prev_version])}' - ) - pathway.append(prev_version) - prev_version = LEGACY_MIGRATE_FUNCTIONS[prev_version][0] - - if not pathway: - MIGRATE_LOGGER.report('No migration required') - return to_version - - MIGRATE_LOGGER.report('Legacy migration pathway: %s', ' -> '.join(pathway + [to_version])) - - with get_progress_reporter()(total=len(pathway), desc='Performing migrations: ') as progress: - for from_version in pathway: - to_version = LEGACY_MIGRATE_FUNCTIONS[from_version][0] - progress.set_description_str(f'Performing migrations: {from_version} -> {to_version}', refresh=True) - LEGACY_MIGRATE_FUNCTIONS[from_version][1](metadata, data) - progress.update() - - return to_version diff --git a/aiida/tools/archive/implementations/sqlite/migrations/utils.py b/aiida/tools/archive/implementations/sqlite/migrations/utils.py deleted file mode 100644 index e769de1bd4..0000000000 --- a/aiida/tools/archive/implementations/sqlite/migrations/utils.py +++ /dev/null @@ -1,75 +0,0 @@ -# -*- coding: utf-8 -*- -########################################################################### -# Copyright (c), The AiiDA team. All rights reserved. # -# This file is part of the AiiDA code. # -# # -# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # -# For further information on the license, see the LICENSE.txt file # -# For further information please visit http://www.aiida.net # -########################################################################### -"""Utility functions for migration of export-files.""" - -from aiida.tools.archive import exceptions - - -def verify_metadata_version(metadata, version=None): - """Utility function to verify that the metadata has the correct version number. - - If no version number is passed, it will just extract the version number and return it. - - :param metadata: the content of an export archive metadata.json file - :param version: string version number that the metadata is expected to have - """ - try: - metadata_version = metadata['export_version'] - except KeyError: - raise exceptions.ArchiveMigrationError("metadata is missing the 'export_version' key") - - if version is None: - return metadata_version - - if metadata_version != version: - raise exceptions.MigrationValidationError( - f'expected archive file with version {version} but found version {metadata_version}' - ) - - return None - - -def update_metadata(metadata, version): - """Update the metadata with a new version number and a notification of the conversion that was executed. - - :param metadata: the content of an export archive metadata.json file - :param version: string version number that the updated metadata should get - """ - from aiida import get_version - - old_version = metadata['export_version'] - conversion_info = metadata.get('conversion_info', []) - - conversion_message = f'Converted from version {old_version} to {version} with AiiDA v{get_version()}' - conversion_info.append(conversion_message) - - metadata['aiida_version'] = get_version() - metadata['export_version'] = version - metadata['conversion_info'] = conversion_info - - -def remove_fields(metadata, data, entities, fields): - """Remove fields under entities from data.json and metadata.json. - - :param metadata: the content of an export archive metadata.json file - :param data: the content of an export archive data.json file - :param entities: list of ORM entities - :param fields: list of fields to be removed from the export archive files - """ - # data.json - for entity in entities: - for content in data['export_data'].get(entity, {}).values(): - for field in fields: - content.pop(field, None) - - # metadata.json - for entity in entities: - for field in fields: - metadata['all_fields_info'][entity].pop(field, None) diff --git a/aiida/tools/archive/implementations/sqlite/migrations/v1_db_schema.py b/aiida/tools/archive/implementations/sqlite/migrations/v1_db_schema.py deleted file mode 100644 index 30baf8709f..0000000000 --- a/aiida/tools/archive/implementations/sqlite/migrations/v1_db_schema.py +++ /dev/null @@ -1,169 +0,0 @@ -# -*- coding: utf-8 -*- -########################################################################### -# Copyright (c), The AiiDA team. All rights reserved. # -# This file is part of the AiiDA code. # -# # -# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # -# For further information on the license, see the LICENSE.txt file # -# For further information please visit http://www.aiida.net # -########################################################################### -"""This is the sqlite DB schema, coresponding to the 34a831f4286d main DB revision. - -For normal operation of the archive, -we auto-generate the schema from the models in ``aiida.storage.psql_dos.models``. -However, when migrating an archive from the old format, we require a fixed revision of the schema. - -The only difference between the PostGreSQL schema and SQLite one, -is the replacement of ``JSONB`` with ``JSON``, and ``UUID`` with ``CHAR(36)``. -""" -from sqlalchemy import ForeignKey, orm -from sqlalchemy.dialects.sqlite import JSON -from sqlalchemy.schema import Column, Index, UniqueConstraint -from sqlalchemy.types import CHAR, Boolean, DateTime, Integer, String, Text - -ArchiveV1Base = orm.declarative_base() - - -class DbAuthInfo(ArchiveV1Base): - """Class that keeps the authernification data.""" - - __tablename__ = 'db_dbauthinfo' - __table_args__ = (UniqueConstraint('aiidauser_id', 'dbcomputer_id'),) - - id = Column(Integer, primary_key=True) # pylint: disable=invalid-name - aiidauser_id = Column( - Integer, ForeignKey('db_dbuser.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED') - ) - dbcomputer_id = Column( - Integer, ForeignKey('db_dbcomputer.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED') - ) - _metadata = Column('metadata', JSON) - auth_params = Column(JSON) - enabled = Column(Boolean, default=True) - - -class DbComment(ArchiveV1Base): - """Class to store comments.""" - - __tablename__ = 'db_dbcomment' - - id = Column(Integer, primary_key=True) # pylint: disable=invalid-name - uuid = Column(CHAR(36), unique=True) - dbnode_id = Column(Integer, ForeignKey('db_dbnode.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED')) - ctime = Column(DateTime(timezone=True)) - mtime = Column(DateTime(timezone=True)) - user_id = Column(Integer, ForeignKey('db_dbuser.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED')) - content = Column(Text, nullable=True) - - -class DbComputer(ArchiveV1Base): - """Class to store computers.""" - __tablename__ = 'db_dbcomputer' - - id = Column(Integer, primary_key=True) # pylint: disable=invalid-name - uuid = Column(CHAR(36), unique=True) - label = Column(String(255), unique=True, nullable=False) - hostname = Column(String(255)) - description = Column(Text, nullable=True) - scheduler_type = Column(String(255)) - transport_type = Column(String(255)) - _metadata = Column('metadata', JSON) - - -class DbGroupNodes(ArchiveV1Base): - """Class to store join table for group -> nodes.""" - - __tablename__ = 'db_dbgroup_dbnodes' - __table_args__ = (UniqueConstraint('dbgroup_id', 'dbnode_id', name='db_dbgroup_dbnodes_dbgroup_id_dbnode_id_key'),) - - id = Column(Integer, primary_key=True) # pylint: disable=invalid-name - dbnode_id = Column(Integer, ForeignKey('db_dbnode.id', deferrable=True, initially='DEFERRED')) - dbgroup_id = Column(Integer, ForeignKey('db_dbgroup.id', deferrable=True, initially='DEFERRED')) - - -class DbGroup(ArchiveV1Base): - """Class to store groups.""" - - __tablename__ = 'db_dbgroup' - __table_args__ = (UniqueConstraint('label', 'type_string'),) - - id = Column(Integer, primary_key=True) # pylint: disable=invalid-name - uuid = Column(CHAR(36), unique=True) - label = Column(String(255), index=True) - type_string = Column(String(255), default='', index=True) - time = Column(DateTime(timezone=True)) - description = Column(Text, nullable=True) - extras = Column(JSON, default=dict, nullable=False) - user_id = Column(Integer, ForeignKey('db_dbuser.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED')) - - Index('db_dbgroup_dbnodes_dbnode_id_idx', DbGroupNodes.dbnode_id) - Index('db_dbgroup_dbnodes_dbgroup_id_idx', DbGroupNodes.dbgroup_id) - - -class DbLog(ArchiveV1Base): - """Class to store logs.""" - - __tablename__ = 'db_dblog' - - id = Column(Integer, primary_key=True) # pylint: disable=invalid-name - uuid = Column(CHAR(36), unique=True) - time = Column(DateTime(timezone=True)) - loggername = Column(String(255), index=True) - levelname = Column(String(255), index=True) - dbnode_id = Column( - Integer, ForeignKey('db_dbnode.id', deferrable=True, initially='DEFERRED', ondelete='CASCADE'), nullable=False - ) - message = Column(Text(), nullable=True) - _metadata = Column('metadata', JSON) - - -class DbNode(ArchiveV1Base): - """Class to store nodes.""" - - __tablename__ = 'db_dbnode' - - id = Column(Integer, primary_key=True) # pylint: disable=invalid-name - uuid = Column(CHAR(36), unique=True) - node_type = Column(String(255), index=True) - process_type = Column(String(255), index=True) - label = Column(String(255), index=True, nullable=True, default='') - description = Column(Text(), nullable=True, default='') - ctime = Column(DateTime(timezone=True)) - mtime = Column(DateTime(timezone=True)) - attributes = Column(JSON) - extras = Column(JSON) - repository_metadata = Column(JSON, nullable=False, default=dict, server_default='{}') - dbcomputer_id = Column( - Integer, - ForeignKey('db_dbcomputer.id', deferrable=True, initially='DEFERRED', ondelete='RESTRICT'), - nullable=True - ) - user_id = Column( - Integer, ForeignKey('db_dbuser.id', deferrable=True, initially='DEFERRED', ondelete='restrict'), nullable=False - ) - - -class DbLink(ArchiveV1Base): - """Class to store links between nodes.""" - - __tablename__ = 'db_dblink' - - id = Column(Integer, primary_key=True) # pylint: disable=invalid-name - input_id = Column(Integer, ForeignKey('db_dbnode.id', deferrable=True, initially='DEFERRED'), index=True) - output_id = Column( - Integer, ForeignKey('db_dbnode.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'), index=True - ) - label = Column(String(255), index=True, nullable=False) - type = Column(String(255), index=True) - - -class DbUser(ArchiveV1Base): - """Class to store users.""" - - __tablename__ = 'db_dbuser' - - id = Column(Integer, primary_key=True) # pylint: disable=invalid-name - email = Column(String(254), unique=True, index=True) - first_name = Column(String(254), nullable=True) - last_name = Column(String(254), nullable=True) - institution = Column(String(254), nullable=True) diff --git a/aiida/tools/archive/implementations/sqlite/reader.py b/aiida/tools/archive/implementations/sqlite/reader.py deleted file mode 100644 index f3cdebbe74..0000000000 --- a/aiida/tools/archive/implementations/sqlite/reader.py +++ /dev/null @@ -1,112 +0,0 @@ -# -*- coding: utf-8 -*- -########################################################################### -# Copyright (c), The AiiDA team. All rights reserved. # -# This file is part of the AiiDA code. # -# # -# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # -# For further information on the license, see the LICENSE.txt file # -# For further information please visit http://www.aiida.net # -########################################################################### -"""AiiDA archive reader implementation.""" -import json -from pathlib import Path -import tarfile -from typing import Any, Dict, Optional, Union -import zipfile - -from archive_path import read_file_in_tar, read_file_in_zip - -from aiida.manage import Profile -from aiida.tools.archive.abstract import ArchiveReaderAbstract -from aiida.tools.archive.exceptions import CorruptArchive, UnreadableArchiveError - -from . import backend as db -from .common import META_FILENAME - - -class ArchiveReaderSqlZip(ArchiveReaderAbstract): - """An archive reader for the SQLite format.""" - - def __init__(self, path: Union[str, Path], **kwargs: Any): - super().__init__(path, **kwargs) - self._in_context = False - # we lazily create the storage backend, then clean up on exit - self._backend: Optional[db.ArchiveReadOnlyBackend] = None - - def __enter__(self) -> 'ArchiveReaderSqlZip': - self._in_context = True - return self - - def __exit__(self, *args, **kwargs) -> None: - """Close the archive backend.""" - super().__exit__(*args, **kwargs) - if self._backend: - self._backend.close() - self._backend = None - self._in_context = False - - def get_metadata(self) -> Dict[str, Any]: - try: - return extract_metadata(self.path) - except Exception as exc: - raise CorruptArchive('metadata could not be read') from exc - - def get_backend(self) -> db.ArchiveReadOnlyBackend: - if not self._in_context: - raise AssertionError('Not in context') - if self._backend is not None: - return self._backend - profile = Profile( - 'default', { - 'storage': { - 'backend': 'archive.sqlite', - 'config': { - 'path': str(self.path) - } - }, - 'process_control': { - 'backend': 'null', - 'config': {} - } - } - ) - self._backend = db.ArchiveReadOnlyBackend(profile) - return self._backend - - -def extract_metadata(path: Union[str, Path], search_limit: Optional[int] = 10) -> Dict[str, Any]: - """Extract the metadata dictionary from the archive""" - # we fail if not one of the first record in central directory (as expected) - # so we don't have to iter all repo files to fail - return json.loads(read_file_in_zip(path, META_FILENAME, 'utf8', search_limit=search_limit)) - - -def read_version(path: Union[str, Path]) -> str: - """Read the version of the archive from the file. - - Intended to work for all versions of the archive format. - - :param path: archive path - - :raises: ``FileNotFoundError`` if the file does not exist - :raises: ``UnreadableArchiveError`` if a version cannot be read from the archive - """ - path = Path(path) - if not path.is_file(): - raise FileNotFoundError('archive file not found') - # check the file is at least a zip or tar file - if zipfile.is_zipfile(path): - try: - metadata = extract_metadata(path, search_limit=None) - except Exception as exc: - raise UnreadableArchiveError(f'Could not read metadata for version: {exc}') from exc - elif tarfile.is_tarfile(path): - try: - metadata = json.loads(read_file_in_tar(path, META_FILENAME)) - except Exception as exc: - raise UnreadableArchiveError(f'Could not read metadata for version: {exc}') from exc - else: - raise UnreadableArchiveError('Not a zip or tar file') - if 'export_version' in metadata: - return metadata['export_version'] - raise UnreadableArchiveError("Metadata does not contain 'export_version' key") diff --git a/aiida/tools/archive/implementations/sqlite/__init__.py b/aiida/tools/archive/implementations/sqlite_zip/__init__.py similarity index 100% rename from aiida/tools/archive/implementations/sqlite/__init__.py rename to aiida/tools/archive/implementations/sqlite_zip/__init__.py diff --git a/aiida/tools/archive/implementations/sqlite/main.py b/aiida/tools/archive/implementations/sqlite_zip/main.py similarity index 88% rename from aiida/tools/archive/implementations/sqlite/main.py rename to aiida/tools/archive/implementations/sqlite_zip/main.py index 85b1242991..a86dc5dff1 100644 --- a/aiida/tools/archive/implementations/sqlite/main.py +++ b/aiida/tools/archive/implementations/sqlite_zip/main.py @@ -9,12 +9,13 @@ ########################################################################### """The file format implementation""" from pathlib import Path -from typing import Any, List, Literal, Union, overload +from typing import Any, Literal, Union, overload +from aiida.storage.sqlite_zip.migrator import get_schema_version_head, migrate +from aiida.storage.sqlite_zip.utils import read_version from aiida.tools.archive.abstract import ArchiveFormatAbstract -from .migrations.main import ALL_VERSIONS, migrate -from .reader import ArchiveReaderSqlZip, read_version +from .reader import ArchiveReaderSqlZip from .writer import ArchiveAppenderSqlZip, ArchiveWriterSqlZip __all__ = ('ArchiveFormatSqlZip',) @@ -36,8 +37,8 @@ class ArchiveFormatSqlZip(ArchiveFormatAbstract): """ @property - def versions(self) -> List[str]: - return ALL_VERSIONS + def latest_version(self) -> str: + return get_schema_version_head() def read_version(self, path: Union[str, Path]) -> str: return read_version(path) @@ -106,5 +107,4 @@ def migrate( :param path: archive path """ - current_version = self.read_version(inpath) - return migrate(inpath, outpath, current_version, version, force=force, compression=compression) + return migrate(inpath, outpath, version, force=force, compression=compression) diff --git a/aiida/tools/archive/implementations/sqlite_zip/reader.py b/aiida/tools/archive/implementations/sqlite_zip/reader.py new file mode 100644 index 0000000000..e5b73c18e4 --- /dev/null +++ b/aiida/tools/archive/implementations/sqlite_zip/reader.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +"""AiiDA archive reader implementation.""" +from pathlib import Path +from typing import Any, Dict, Optional, Union + +from aiida.common.exceptions import CorruptStorage +from aiida.storage.sqlite_zip.backend import SqliteZipBackend +from aiida.storage.sqlite_zip.utils import extract_metadata +from aiida.tools.archive.abstract import ArchiveReaderAbstract + + +class ArchiveReaderSqlZip(ArchiveReaderAbstract): + """An archive reader for the SQLite format.""" + + def __init__(self, path: Union[str, Path], **kwargs: Any): + super().__init__(path, **kwargs) + self._in_context = False + # we lazily create the storage backend, then clean up on exit + self._backend: Optional[SqliteZipBackend] = None + + def __enter__(self) -> 'ArchiveReaderSqlZip': + self._in_context = True + return self + + def __exit__(self, *args, **kwargs) -> None: + """Close the archive backend.""" + super().__exit__(*args, **kwargs) + if self._backend: + self._backend.close() + self._backend = None + self._in_context = False + + def get_metadata(self) -> Dict[str, Any]: + try: + return extract_metadata(self.path) + except Exception as exc: + raise CorruptStorage('metadata could not be read') from exc + + def get_backend(self) -> SqliteZipBackend: + if not self._in_context: + raise AssertionError('Not in context') + if self._backend is not None: + return self._backend + profile = SqliteZipBackend.create_profile(self.path) + self._backend = SqliteZipBackend(profile) + return self._backend diff --git a/aiida/tools/archive/implementations/sqlite/writer.py b/aiida/tools/archive/implementations/sqlite_zip/writer.py similarity index 86% rename from aiida/tools/archive/implementations/sqlite/writer.py rename to aiida/tools/archive/implementations/sqlite_zip/writer.py index 4ee7358c84..2e4315b1da 100644 --- a/aiida/tools/archive/implementations/sqlite/writer.py +++ b/aiida/tools/archive/implementations/sqlite_zip/writer.py @@ -9,7 +9,6 @@ ########################################################################### """AiiDA archive writer implementation.""" from datetime import datetime -import functools import hashlib from io import BytesIO import json @@ -20,46 +19,24 @@ import zipfile from archive_path import NOTSET, ZipPath, extract_file_in_zip, read_file_in_zip -from sqlalchemy import insert, inspect +from sqlalchemy import insert from sqlalchemy.exc import IntegrityError as SqlaIntegrityError from sqlalchemy.future.engine import Connection from aiida import get_version -from aiida.common.exceptions import IntegrityError +from aiida.common.exceptions import CorruptStorage, IncompatibleStorageSchema, IntegrityError from aiida.common.hashing import chunked_file_hash from aiida.common.progress_reporter import get_progress_reporter from aiida.orm.entities import EntityTypes +from aiida.storage.sqlite_zip import models, utils from aiida.tools.archive.abstract import ArchiveFormatAbstract, ArchiveWriterAbstract -from aiida.tools.archive.exceptions import CorruptArchive, IncompatibleArchiveVersionError - -from . import backend as db -from .common import DB_FILENAME, META_FILENAME, REPO_FOLDER, create_sqla_engine - - -@functools.lru_cache(maxsize=10) -def _get_model_from_entity(entity_type: EntityTypes): - """Return the Sqlalchemy model and column names corresponding to the given entity.""" - model = { - EntityTypes.USER: db.DbUser, - EntityTypes.AUTHINFO: db.DbAuthInfo, - EntityTypes.GROUP: db.DbGroup, - EntityTypes.NODE: db.DbNode, - EntityTypes.COMMENT: db.DbComment, - EntityTypes.COMPUTER: db.DbComputer, - EntityTypes.LOG: db.DbLog, - EntityTypes.LINK: db.DbLink, - EntityTypes.GROUP_NODE: db.DbGroupNodes - }[entity_type] - mapper = inspect(model).mapper - column_names = {col.name for col in mapper.c.values()} - return model, column_names class ArchiveWriterSqlZip(ArchiveWriterAbstract): """AiiDA archive writer implementation.""" - meta_name = META_FILENAME - db_name = DB_FILENAME + meta_name = utils.META_FILENAME + db_name = utils.DB_FILENAME def __init__( self, @@ -106,10 +83,10 @@ def __enter__(self) -> 'ArchiveWriterSqlZip': info_order=(self.meta_name, self.db_name), name_to_info=self._central_dir, ) - engine = create_sqla_engine( + engine = utils.create_sqla_engine( self._work_dir / self.db_name, enforce_foreign_keys=self._enforce_foreign_keys, echo=self._debug ) - db.ArchiveDbBase.metadata.create_all(engine) + models.SqliteBase.metadata.create_all(engine) self._conn = engine.connect() self._in_context = True return self @@ -150,7 +127,7 @@ def bulk_insert( return self._assert_in_context() assert self._conn is not None - model, col_keys = _get_model_from_entity(entity_type) + model, col_keys = models.get_model_from_entity(entity_type) if allow_defaults: for row in rows: if not col_keys.issuperset(row): @@ -197,8 +174,8 @@ def put_object(self, stream: BinaryIO, *, buffer_size: Optional[int] = None, key if key is None: key = chunked_file_hash(stream, hashlib.sha256) stream.seek(0) - if f'{REPO_FOLDER}/{key}' not in self._central_dir: - self._stream_binary(f'{REPO_FOLDER}/{key}', stream, buffer_size=buffer_size) + if f'{utils.REPO_FOLDER}/{key}' not in self._central_dir: + self._stream_binary(f'{utils.REPO_FOLDER}/{key}', stream, buffer_size=buffer_size) return key def delete_object(self, key: str) -> None: @@ -210,9 +187,9 @@ class ArchiveAppenderSqlZip(ArchiveWriterSqlZip): def delete_object(self, key: str) -> None: self._assert_in_context() - if f'{REPO_FOLDER}/{key}' in self._central_dir: + if f'{utils.REPO_FOLDER}/{key}' in self._central_dir: raise IOError(f'Cannot delete object {key!r} that has been added in the same append context') - self._deleted_paths.add(f'{REPO_FOLDER}/{key}') + self._deleted_paths.add(f'{utils.REPO_FOLDER}/{key}') def __enter__(self) -> 'ArchiveAppenderSqlZip': """Start appending to the archive""" @@ -222,11 +199,11 @@ def __enter__(self) -> 'ArchiveAppenderSqlZip': # the file should be an archive with the correct version version = self._format.read_version(self._path) if not version == self._format.latest_version: - raise IncompatibleArchiveVersionError( + raise IncompatibleStorageSchema( f'Archive is version {version!r} but expected {self._format.latest_version!r}' ) # load the metadata - self._metadata = json.loads(read_file_in_zip(self._path, META_FILENAME, 'utf8', search_limit=4)) + self._metadata = json.loads(read_file_in_zip(self._path, utils.META_FILENAME, 'utf8', search_limit=4)) # overwrite metadata self._metadata['mtime'] = datetime.now().isoformat() self._metadata['compression'] = self._compression @@ -247,11 +224,11 @@ def __enter__(self) -> 'ArchiveAppenderSqlZip': db_file = self._work_dir / self.db_name with db_file.open('wb') as handle: try: - extract_file_in_zip(self.path, DB_FILENAME, handle, search_limit=4) + extract_file_in_zip(self.path, utils.DB_FILENAME, handle, search_limit=4) except Exception as exc: - raise CorruptArchive(f'database could not be read: {exc}') from exc + raise CorruptStorage(f'archive database could not be read: {exc}') from exc # open a connection to the database - engine = create_sqla_engine( + engine = utils.create_sqla_engine( self._work_dir / self.db_name, enforce_foreign_keys=self._enforce_foreign_keys, echo=self._debug ) # to-do could check that the database has correct schema: diff --git a/aiida/tools/archive/imports.py b/aiida/tools/archive/imports.py index 7c0b002bf9..f5b0f332e6 100644 --- a/aiida/tools/archive/imports.py +++ b/aiida/tools/archive/imports.py @@ -16,6 +16,7 @@ from aiida import orm from aiida.common import timezone +from aiida.common.exceptions import IncompatibleStorageSchema from aiida.common.lang import type_check from aiida.common.links import LinkType from aiida.common.log import AIIDA_LOGGER @@ -28,8 +29,8 @@ from .abstract import ArchiveFormatAbstract from .common import batch_iter, entity_type_to_orm -from .exceptions import ImportTestRun, ImportUniquenessError, ImportValidationError, IncompatibleArchiveVersionError -from .implementations.sqlite import ArchiveFormatSqlZip +from .exceptions import ImportTestRun, ImportUniquenessError, ImportValidationError +from .implementations.sqlite_zip import ArchiveFormatSqlZip __all__ = ('IMPORT_LOGGER', 'import_archive') @@ -95,10 +96,9 @@ def import_archive( :returns: Primary Key of the import Group - :raises `~aiida.tools.archive.exceptions.IncompatibleArchiveVersionError`: if the provided archive's - version is not equal to the version of AiiDA at the moment of import. - :raises `~aiida.tools.archive.exceptions.ImportValidationError`: if parameters or the contents of - :raises `~aiida.tools.archive.exceptions.CorruptArchive`: if the provided archive cannot be read. + :raises `~aiida.common.exceptions.CorruptStorage`: if the provided archive cannot be read. + :raises `~aiida.common.exceptions.IncompatibleStorageSchema`: if the archive version is not at head. + :raises `~aiida.tools.archive.exceptions.ImportValidationError`: if invalid entities are found in the archive. :raises `~aiida.tools.archive.exceptions.ImportUniquenessError`: if a new unique entity can not be created. """ archive_format = archive_format or ArchiveFormatSqlZip() @@ -126,9 +126,9 @@ def import_archive( # i.e. its not whether the version is the latest that matters, it is that it is compatible with the backend version # its a bit weird at the moment because django/sqlalchemy have different versioning if not archive_format.read_version(path) == archive_format.latest_version: - raise IncompatibleArchiveVersionError( - f'The archive version {archive_format.read_version(path)} ' - f'is not the latest version {archive_format.latest_version}' + raise IncompatibleStorageSchema( + f'The archive version {archive_format.read_version(path)!r} ' + f'is not the latest version {archive_format.latest_version!r}' ) IMPORT_LOGGER.report( diff --git a/docs/source/nitpick-exceptions b/docs/source/nitpick-exceptions index bc87aa31aa..0464c9e354 100644 --- a/docs/source/nitpick-exceptions +++ b/docs/source/nitpick-exceptions @@ -52,6 +52,7 @@ py:class SelfType py:class Profile py:class PsqlDosBackend py:class str | list[str] +py:class str | Path ### AiiDA @@ -70,7 +71,6 @@ py:class aiida.tools.groups.paths.WalkNodeResult py:meth aiida.orm.groups.GroupCollection.delete py:class AbstractRepositoryBackend -py:class Backend py:class BackendEntity py:class BackendEntityType py:class BackendNode @@ -116,6 +116,7 @@ py:class ReturnType py:class Runner py:class Scheduler py:class SelfType +py:class StorageBackend py:class TransactionType py:class Transport py:class TransportQueue @@ -127,6 +128,7 @@ py:class orm.implementation.Backend py:class aiida.common.exceptions.UnreachableStorage py:class aiida.common.exceptions.IncompatibleDatabaseSchema py:class aiida.common.exceptions.DatabaseMigrationError +py:class aiida.storage.sqlite_zip.models.DbGroupNode py:class AuthInfoCollection py:class CommentCollection diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst index 7043a13b40..a01950b053 100644 --- a/docs/source/reference/command_line.rst +++ b/docs/source/reference/command_line.rst @@ -25,10 +25,11 @@ Below is a list with all available subcommands. --help Show this message and exit. Commands: - create Write subsets of the provenance graph to a single file. - import Import data from an AiiDA archive file. - inspect Inspect contents of an archive without importing it. - migrate Migrate an export archive to a more recent format version. + create Create an archive from all or part of a profiles's data. + import Import archived data to a profile. + info Summarise the contents of an archive. + migrate Migrate an archive to a more recent schema version. + version Print the current version of an archive's schema. .. _reference:command-line:verdi-calcjob: diff --git a/tests/cmdline/commands/test_archive_create.py b/tests/cmdline/commands/test_archive_create.py index 51a2229ce0..9f4ebf5cb9 100644 --- a/tests/cmdline/commands/test_archive_create.py +++ b/tests/cmdline/commands/test_archive_create.py @@ -7,7 +7,7 @@ # For further information on the license, see the LICENSE.txt file # # For further information please visit http://www.aiida.net # ########################################################################### -"""Tests for `verdi export`.""" +"""Tests for `verdi archive`.""" import shutil import zipfile @@ -15,6 +15,7 @@ from aiida.cmdline.commands import cmd_archive from aiida.orm import Code, Computer, Dict, Group +from aiida.storage.sqlite_zip.migrator import list_versions from aiida.tools.archive import ArchiveFormatSqlZip from tests.utils.archives import get_archive_file @@ -85,10 +86,10 @@ def test_create_basic(run_cli_command, tmp_path): assert archive.querybuilder().append(Dict, project=['uuid']).all(flat=True) == [node.uuid] -@pytest.mark.parametrize('version', ArchiveFormatSqlZip().versions[:-1]) +@pytest.mark.parametrize('version', ('0.4', '0.5', '0.6', '0.7', '0.8', '0.9', '0.10', '0.11', '0.12')) def test_migrate_versions_old(run_cli_command, tmp_path, version): """Migrating archives with a version older than the current should work.""" - archive = f'export_v{version}_simple.aiida' + archive = f'export_{version}_simple.aiida' filename_input = get_archive_file(archive, filepath='export/migrate') filename_output = tmp_path / 'archive.aiida' @@ -100,7 +101,7 @@ def test_migrate_versions_old(run_cli_command, tmp_path, version): def test_migrate_version_specific(run_cli_command, tmp_path): """Test the `-v/--version` option to migrate to a specific version instead of the latest.""" - archive = 'export_v0.5_simple.aiida' + archive = 'export_0.5_simple.aiida' target_version = '0.8' filename_input = get_archive_file(archive, filepath='export/migrate') filename_output = tmp_path / 'archive.aiida' @@ -117,7 +118,7 @@ def test_migrate_file_already_exists(run_cli_command, tmp_path): """Test that using a file that already exists will raise.""" outpath = tmp_path / 'archive.aiida' outpath.touch() - filename_input = get_archive_file('export_v0.6_simple.aiida', filepath='export/migrate') + filename_input = get_archive_file('export_0.6_simple.aiida', filepath='export/migrate') options = [filename_input, outpath] run_cli_command(cmd_archive.migrate, options, raises=True) @@ -126,7 +127,7 @@ def test_migrate_force(run_cli_command, tmp_path): """Test that using a file that already exists will work when the ``-f/--force`` parameter is used.""" outpath = tmp_path / 'archive.aiida' outpath.touch() - filename_input = get_archive_file('export_v0.6_simple.aiida', filepath='export/migrate') + filename_input = get_archive_file('export_0.6_simple.aiida', filepath='export/migrate') options = ['--force', filename_input, outpath] run_cli_command(cmd_archive.migrate, options) assert ArchiveFormatSqlZip().read_version(outpath) == ArchiveFormatSqlZip().latest_version @@ -134,7 +135,7 @@ def test_migrate_force(run_cli_command, tmp_path): def test_migrate_in_place(run_cli_command, tmp_path): """Test that passing the -i/--in-place option will overwrite the passed file.""" - archive = 'export_v0.6_simple.aiida' + archive = 'export_0.6_simple.aiida' target_version = '0.8' filename_input = get_archive_file(archive, filepath='export/migrate') filename_clone = tmp_path / 'archive.aiida' @@ -166,7 +167,7 @@ def test_migrate_low_verbosity(run_cli_command, tmp_path): Note that we use the ``config_with_profile`` fixture to create a dummy profile, since the ``--verbosity`` option will change the profile configuration which could potentially influence the other tests. """ - filename_input = get_archive_file('export_v0.6_simple.aiida', filepath='export/migrate') + filename_input = get_archive_file('export_0.6_simple.aiida', filepath='export/migrate') filename_output = tmp_path / 'archive.aiida' options = ['--verbosity', 'WARNING', filename_input, filename_output] @@ -177,36 +178,36 @@ def test_migrate_low_verbosity(run_cli_command, tmp_path): assert ArchiveFormatSqlZip().read_version(filename_output) == ArchiveFormatSqlZip().latest_version -@pytest.mark.parametrize('version', ArchiveFormatSqlZip().versions) -def test_inspect_version(run_cli_command, version): - """Test the functionality of `verdi export inspect --version`.""" - archive = f'export_v{version}_simple.aiida' +@pytest.mark.parametrize('version', [v for v in list_versions() if v not in ('main_0000a', 'main_0000b')]) +def test_version(run_cli_command, version): + """Test the functionality of `verdi archive version`.""" + archive = f'export_{version}_simple.aiida' filename_input = get_archive_file(archive, filepath='export/migrate') - options = ['--version', filename_input] - result = run_cli_command(cmd_archive.inspect, options) - assert result.output.strip() == f'{version}' + options = [filename_input] + result = run_cli_command(cmd_archive.archive_version, options) + assert version in result.output -def test_inspect_metadata(run_cli_command): - """Test the functionality of `verdi export inspect --meta-data`.""" - archive = f'export_v{ArchiveFormatSqlZip().latest_version}_simple.aiida' +def test_info(run_cli_command): + """Test the functionality of `verdi archive info`.""" + archive = f'export_{ArchiveFormatSqlZip().latest_version}_simple.aiida' filename_input = get_archive_file(archive, filepath='export/migrate') - options = ['--meta-data', filename_input] - result = run_cli_command(cmd_archive.inspect, options) + options = [filename_input] + result = run_cli_command(cmd_archive.archive_info, options) assert 'export_version' in result.output -def test_inspect_database(run_cli_command): - """Test the functionality of `verdi export inspect --meta-data`.""" - archive = f'export_v{ArchiveFormatSqlZip().latest_version}_simple.aiida' +def test_info_detailed(run_cli_command): + """Test the functionality of `verdi archive info --statistics`.""" + archive = f'export_{ArchiveFormatSqlZip().latest_version}_simple.aiida' filename_input = get_archive_file(archive, filepath='export/migrate') - options = ['--database', filename_input] - result = run_cli_command(cmd_archive.inspect, options) + options = ['--statistics', filename_input] + result = run_cli_command(cmd_archive.archive_info, options) assert 'Nodes:' in result.output -def test_inspect_empty_archive(run_cli_command): - """Test the functionality of `verdi export inspect` for an empty archive.""" +def test_info_empty_archive(run_cli_command): + """Test the functionality of `verdi archive info` for an empty archive.""" filename_input = get_archive_file('empty.aiida', filepath='export/migrate') - result = run_cli_command(cmd_archive.inspect, [filename_input], raises=True) - assert 'archive file of unknown format' in result.output + result = run_cli_command(cmd_archive.archive_info, [filename_input], raises=True) + assert 'archive file unreadable' in result.output diff --git a/tests/cmdline/commands/test_archive_import.py b/tests/cmdline/commands/test_archive_import.py index 7ea347e882..ddad778313 100644 --- a/tests/cmdline/commands/test_archive_import.py +++ b/tests/cmdline/commands/test_archive_import.py @@ -14,9 +14,12 @@ from aiida.cmdline.commands import cmd_archive from aiida.orm import Group +from aiida.storage.sqlite_zip.migrator import list_versions from aiida.tools.archive import ArchiveFormatSqlZip from tests.utils.archives import get_archive_file +ARCHIVE_PATH = 'export/migrate' + class TestVerdiImport: """Tests for `verdi import`.""" @@ -29,8 +32,7 @@ def init_cls(self, aiida_profile_clean): # pylint: disable=unused-argument # Helper variables self.url_path = 'https://raw.githubusercontent.com/aiidateam/aiida-core/' \ '0599dabf0887bee172a04f308307e99e3c3f3ff2/aiida/backends/tests/fixtures/export/migrate/' - self.archive_path = 'export/migrate' - self.newest_archive = f'export_v{ArchiveFormatSqlZip().latest_version}_simple.aiida' + self.newest_archive = f'export_{ArchiveFormatSqlZip().latest_version}_simple.aiida' def test_import_no_archives(self): """Test that passing no valid archives will lead to command failure.""" @@ -55,7 +57,7 @@ def test_import_archive(self): """ archives = [ get_archive_file('arithmetic.add.aiida', filepath='calcjob'), - get_archive_file(self.newest_archive, filepath=self.archive_path) + get_archive_file(self.newest_archive, filepath=ARCHIVE_PATH) ] options = [] + archives @@ -71,7 +73,7 @@ def test_import_to_group(self): """ archives = [ get_archive_file('arithmetic.add.aiida', filepath='calcjob'), - get_archive_file(self.newest_archive, filepath=self.archive_path) + get_archive_file(self.newest_archive, filepath=ARCHIVE_PATH) ] group_label = 'import_madness' @@ -115,7 +117,7 @@ def test_import_make_new_group(self): """Make sure imported entities are saved in new Group""" # Initialization group_label = 'new_group_for_verdi_import' - archives = [get_archive_file(self.newest_archive, filepath=self.archive_path)] + archives = [get_archive_file(self.newest_archive, filepath=ARCHIVE_PATH)] # Check Group does not already exist group_search = Group.objects.find(filters={'label': group_label}) @@ -134,7 +136,7 @@ def test_import_make_new_group(self): def test_no_import_group(self): """Test '--import-group/--no-import-group' options.""" - archives = [get_archive_file(self.newest_archive, filepath=self.archive_path)] + archives = [get_archive_file(self.newest_archive, filepath=ARCHIVE_PATH)] assert Group.objects.count() == 0, 'There should be no Groups.' @@ -165,27 +167,13 @@ def test_no_import_group(self): @pytest.mark.skip('Due to summary being logged, this can not be checked against `results.output`.') # pylint: disable=not-callable def test_comment_mode(self): """Test toggling comment mode flag""" - archives = [get_archive_file(self.newest_archive, filepath=self.archive_path)] + archives = [get_archive_file(self.newest_archive, filepath=ARCHIVE_PATH)] for mode in ['leave', 'newest', 'overwrite']: options = ['--comment-mode', mode] + archives result = self.cli_runner.invoke(cmd_archive.import_archive, options) assert result.exception is None, result.output assert result.exit_code == 0, result.output - def test_import_old_local_archives(self): - """ Test import of old local archives - Expected behavior: Automatically migrate to newest version and import correctly. - """ - for version in ArchiveFormatSqlZip().versions: - archive, version = (f'export_v{version}_simple.aiida', f'{version}') - options = [get_archive_file(archive, filepath=self.archive_path)] - result = self.cli_runner.invoke(cmd_archive.import_archive, options) - - assert result.exception is None, result.output - assert result.exit_code == 0, result.output - assert version in result.output, result.exception - assert f'Success: imported archive {options[0]}' in result.output, result.exception - def test_import_old_url_archives(self): """ Test import of old URL archives Expected behavior: Automatically migrate to newest version and import correctly. @@ -207,8 +195,8 @@ def test_import_url_and_local_archives(self): local_archive = self.newest_archive options = [ - get_archive_file(local_archive, filepath=self.archive_path), self.url_path + url_archive, - get_archive_file(local_archive, filepath=self.archive_path) + get_archive_file(local_archive, filepath=ARCHIVE_PATH), self.url_path + url_archive, + get_archive_file(local_archive, filepath=ARCHIVE_PATH) ] result = self.cli_runner.invoke(cmd_archive.import_archive, options) @@ -243,7 +231,7 @@ def test_migration(self): `migration` = True (default), Expected: No query, migrate `migration` = False, Expected: No query, no migrate """ - archive = get_archive_file('export_v0.4_simple.aiida', filepath=self.archive_path) + archive = get_archive_file('export_0.4_simple.aiida', filepath=ARCHIVE_PATH) success_message = f'Success: imported archive {archive}' # Import "normally", but explicitly specifying `--migration`, make sure confirm message is present @@ -267,3 +255,17 @@ def test_migration(self): assert 'trying migration' not in result.output, result.exception assert success_message not in result.output, result.exception + + +@pytest.mark.usefixtures('aiida_profile_clean') +@pytest.mark.parametrize('version', [v for v in list_versions() if v not in ('main_0000a', 'main_0000b')]) +def test_import_old_local_archives(version, run_cli_command): + """ Test import of old local archives + Expected behavior: Automatically migrate to newest version and import correctly. + """ + archive, version = (f'export_{version}_simple.aiida', f'{version}') + options = [get_archive_file(archive, filepath=ARCHIVE_PATH)] + result = run_cli_command(cmd_archive.import_archive, options) + + assert version in result.output, result.exception + assert f'Success: imported archive {options[0]}' in result.output, result.exception diff --git a/tests/fixtures/export/migrate/export_v0.10_simple.aiida b/tests/fixtures/export/migrate/export_v0.10_simple.aiida deleted file mode 100644 index dbeea937c1..0000000000 Binary files a/tests/fixtures/export/migrate/export_v0.10_simple.aiida and /dev/null differ diff --git a/tests/static/calcjob/arithmetic.add.aiida b/tests/static/calcjob/arithmetic.add.aiida index 9166ab33c4..5fdfab9548 100644 Binary files a/tests/static/calcjob/arithmetic.add.aiida and b/tests/static/calcjob/arithmetic.add.aiida differ diff --git a/tests/static/calcjob/arithmetic.add_old.aiida b/tests/static/calcjob/arithmetic.add_old.aiida index 7c3c1f985a..b5e5b01959 100644 Binary files a/tests/static/calcjob/arithmetic.add_old.aiida and b/tests/static/calcjob/arithmetic.add_old.aiida differ diff --git a/tests/static/calcjob/container/config.json b/tests/static/calcjob/container/config.json deleted file mode 100644 index 453ca5cce8..0000000000 --- a/tests/static/calcjob/container/config.json +++ /dev/null @@ -1 +0,0 @@ -{"container_version": 1, "loose_prefix_len": 2, "pack_size_target": 4294967296, "hash_type": "sha256", "container_id": "036ea0341e05499d9abde1cde49ee4ce", "compression_algorithm": "zlib+1"} \ No newline at end of file diff --git a/tests/static/calcjob/container/loose/04/bd777eeb8fb55b05d1ab72180cb56463c2897c7ff6fb93a5ebc5d64162e15b b/tests/static/calcjob/container/loose/04/bd777eeb8fb55b05d1ab72180cb56463c2897c7ff6fb93a5ebc5d64162e15b deleted file mode 100644 index ee6f127f39..0000000000 Binary files a/tests/static/calcjob/container/loose/04/bd777eeb8fb55b05d1ab72180cb56463c2897c7ff6fb93a5ebc5d64162e15b and /dev/null differ diff --git a/tests/static/calcjob/container/loose/33/7b794ce718a09a620090d53541c3b4640a64133bbee2188444810cd3169f81 b/tests/static/calcjob/container/loose/33/7b794ce718a09a620090d53541c3b4640a64133bbee2188444810cd3169f81 deleted file mode 100644 index 654d526942..0000000000 --- a/tests/static/calcjob/container/loose/33/7b794ce718a09a620090d53541c3b4640a64133bbee2188444810cd3169f81 +++ /dev/null @@ -1 +0,0 @@ -2 3 diff --git a/tests/static/calcjob/container/loose/59/ad1048cf9741febe6085cdbd021d8395e9ef993fec33cbb6c34a73d6cf5372 b/tests/static/calcjob/container/loose/59/ad1048cf9741febe6085cdbd021d8395e9ef993fec33cbb6c34a73d6cf5372 deleted file mode 100644 index 6a5c41cdf4..0000000000 --- a/tests/static/calcjob/container/loose/59/ad1048cf9741febe6085cdbd021d8395e9ef993fec33cbb6c34a73d6cf5372 +++ /dev/null @@ -1 +0,0 @@ -{"uuid": "9d3fda4f-6782-4441-a276-b8965aa3f97f", "codes_info": [{"cmdline_params": ["/home/candersen/virtualenv/new_tests/aiida/aiida_core/.ci/add.sh", "-in", "aiida.in"], "stdout_name": "aiida.out", "code_uuid": "8052fd27-f3ee-46cb-b23e-4ce5e446483e"}], "retrieve_list": ["aiida.out", "_scheduler-stdout.txt", "_scheduler-stderr.txt"], "local_copy_list": [], "remote_copy_list": []} \ No newline at end of file diff --git a/tests/static/calcjob/container/loose/95/f819ef2ea203bed2cacaf64df320ca1ce7d4a4d3a58f4f7920487a5ec7f532 b/tests/static/calcjob/container/loose/95/f819ef2ea203bed2cacaf64df320ca1ce7d4a4d3a58f4f7920487a5ec7f532 deleted file mode 100644 index cc936f6430..0000000000 --- a/tests/static/calcjob/container/loose/95/f819ef2ea203bed2cacaf64df320ca1ce7d4a4d3a58f4f7920487a5ec7f532 +++ /dev/null @@ -1 +0,0 @@ -{"shebang": "#!/bin/bash", "submit_as_hold": false, "rerunnable": false, "job_environment": {}, "job_name": "aiida-30", "sched_output_path": "_scheduler-stdout.txt", "sched_error_path": "_scheduler-stderr.txt", "sched_join_files": false, "prepend_text": "", "append_text": "", "job_resource": {"num_cores_per_machine": null, "num_cores_per_mpiproc": null, "num_machines": 1, "num_mpiprocs_per_machine": 1}, "codes_info": [{"cmdline_params": ["/home/candersen/virtualenv/new_tests/aiida/aiida_core/.ci/add.sh", "-in", "aiida.in"], "stdout_name": "aiida.out", "code_uuid": "8052fd27-f3ee-46cb-b23e-4ce5e446483e"}], "codes_run_mode": 0, "import_sys_environment": true} \ No newline at end of file diff --git a/tests/static/calcjob/container/loose/d3/e14d6651a535b4689e0605c6b814f542bf6bb88d29ac4213e7bf6afce0501e b/tests/static/calcjob/container/loose/d3/e14d6651a535b4689e0605c6b814f542bf6bb88d29ac4213e7bf6afce0501e deleted file mode 100644 index 2012bdf8a1..0000000000 --- a/tests/static/calcjob/container/loose/d3/e14d6651a535b4689e0605c6b814f542bf6bb88d29ac4213e7bf6afce0501e +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -exec > _scheduler-stdout.txt -exec 2> _scheduler-stderr.txt - - -'/home/candersen/virtualenv/new_tests/aiida/aiida_core/.ci/add.sh' '-in' 'aiida.in' > 'aiida.out' diff --git a/tests/static/calcjob/container/loose/e1/49222b6bf7570a66c6d9d63c5304c00bf94a8e6b6a0db33c940d1f49667879 b/tests/static/calcjob/container/loose/e1/49222b6bf7570a66c6d9d63c5304c00bf94a8e6b6a0db33c940d1f49667879 deleted file mode 100644 index 524686cd87..0000000000 Binary files a/tests/static/calcjob/container/loose/e1/49222b6bf7570a66c6d9d63c5304c00bf94a8e6b6a0db33c940d1f49667879 and /dev/null differ diff --git a/tests/static/calcjob/container/loose/e3/b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 b/tests/static/calcjob/container/loose/e3/b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/static/calcjob/container/loose/f0/b5c2c2211c8d67ed15e75e656c7862d086e9245420892a7de62cd9ec582a06 b/tests/static/calcjob/container/loose/f0/b5c2c2211c8d67ed15e75e656c7862d086e9245420892a7de62cd9ec582a06 deleted file mode 100644 index 7ed6ff82de..0000000000 --- a/tests/static/calcjob/container/loose/f0/b5c2c2211c8d67ed15e75e656c7862d086e9245420892a7de62cd9ec582a06 +++ /dev/null @@ -1 +0,0 @@ -5 diff --git a/tests/static/calcjob/container/packs.idx b/tests/static/calcjob/container/packs.idx deleted file mode 100644 index e47a083397..0000000000 Binary files a/tests/static/calcjob/container/packs.idx and /dev/null differ diff --git a/tests/static/calcjob/container/packs/0 b/tests/static/calcjob/container/packs/0 deleted file mode 100644 index 5bb5ef7be5..0000000000 Binary files a/tests/static/calcjob/container/packs/0 and /dev/null differ diff --git a/tests/static/calcjob/data.json b/tests/static/calcjob/data.json deleted file mode 100644 index 0212ee0df8..0000000000 --- a/tests/static/calcjob/data.json +++ /dev/null @@ -1 +0,0 @@ -{"links_uuid": [{"input": "8052fd27-f3ee-46cb-b23e-4ce5e446483e", "label": "code", "type": "input_calc", "output": "9d3fda4f-6782-4441-a276-b8965aa3f97f"}, {"input": "4ab65a6b-d784-44d7-9739-498a1dc1d062", "label": "x", "type": "input_calc", "output": "9d3fda4f-6782-4441-a276-b8965aa3f97f"}, {"input": "becfc71f-fa63-433f-9919-1493f2e1cc76", "label": "y", "type": "input_calc", "output": "9d3fda4f-6782-4441-a276-b8965aa3f97f"}, {"input": "9d3fda4f-6782-4441-a276-b8965aa3f97f", "label": "sum", "type": "create", "output": "6ab73adc-625c-4158-aad4-4d67ea9b57b8"}, {"input": "9d3fda4f-6782-4441-a276-b8965aa3f97f", "label": "retrieved", "type": "create", "output": "2f8fe6c7-1312-4908-9fa0-0dac95c1823d"}, {"input": "9d3fda4f-6782-4441-a276-b8965aa3f97f", "label": "remote_folder", "type": "create", "output": "2b216c5e-951b-43d5-b66f-5469efac5374"}], "export_data": {"Node": {"24": {"ctime": "2019-04-09T14:07:07.435307", "description": "Add number together", "dbcomputer": 1, "process_type": null, "label": "arithmetic.add", "node_type": "data.core.code.Code.", "user": 2, "mtime": "2019-04-09T14:07:07.685356", "uuid": "8052fd27-f3ee-46cb-b23e-4ce5e446483e"}, "25": {"ctime": "2019-04-09T14:09:15.953832", "description": "", "dbcomputer": null, "process_type": null, "label": "", "node_type": "data.core.int.Int.", "user": 2, "mtime": "2019-04-09T14:09:15.957547", "uuid": "becfc71f-fa63-433f-9919-1493f2e1cc76"}, "26": {"ctime": "2019-04-09T14:09:22.401149", "description": "", "dbcomputer": null, "process_type": null, "label": "", "node_type": "data.core.int.Int.", "user": 2, "mtime": "2019-04-09T14:09:22.402785", "uuid": "4ab65a6b-d784-44d7-9739-498a1dc1d062"}, "33": {"ctime": "2019-04-09T14:31:58.884999", "description": "", "dbcomputer": null, "process_type": null, "label": "", "node_type": "data.core.int.Int.", "user": 2, "mtime": "2019-04-09T14:31:58.935272", "uuid": "6ab73adc-625c-4158-aad4-4d67ea9b57b8"}, "32": {"ctime": "2019-04-09T14:31:58.700914", "description": "", "dbcomputer": null, "process_type": null, "label": "", "node_type": "data.core.folder.FolderData.", "user": 2, "mtime": "2019-04-09T14:31:58.750472", "uuid": "2f8fe6c7-1312-4908-9fa0-0dac95c1823d", "repository_metadata": {"o": {"gzipped_data": {"k": "e149222b6bf7570a66c6d9d63c5304c00bf94a8e6b6a0db33c940d1f49667879"}, "_scheduler-stdout.txt": {"k": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"}, "_scheduler-stderr.txt": {"k": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"}, "aiida.out": {"k": "f0b5c2c2211c8d67ed15e75e656c7862d086e9245420892a7de62cd9ec582a06"}}}}, "31": {"ctime": "2019-04-09T14:31:51.914077", "description": "", "dbcomputer": 1, "process_type": null, "label": "", "node_type": "data.core.remote.RemoteData.", "user": 2, "mtime": "2019-04-09T14:31:51.944102", "uuid": "2b216c5e-951b-43d5-b66f-5469efac5374"}, "30": {"ctime": "2019-04-09T14:31:49.024479", "description": "", "dbcomputer": 1, "process_type": "aiida.calculations:arithmetic.add", "label": "", "node_type": "process.calculation.calcjob.CalcJobNode.", "user": 2, "mtime": "2019-04-09T14:31:58.993336", "uuid": "9d3fda4f-6782-4441-a276-b8965aa3f97f", "repository_metadata": {"o": {".aiida": {"o": {"job_tmpl.json": {"k": "95f819ef2ea203bed2cacaf64df320ca1ce7d4a4d3a58f4f7920487a5ec7f532"}, "calcinfo.json": {"k": "59ad1048cf9741febe6085cdbd021d8395e9ef993fec33cbb6c34a73d6cf5372"}}}, "aiida.in": {"k": "337b794ce718a09a620090d53541c3b4640a64133bbee2188444810cd3169f81"}, "in_gzipped_data": {"k": "04bd777eeb8fb55b05d1ab72180cb56463c2897c7ff6fb93a5ebc5d64162e15b"}, "_aiidasubmit.sh": {"k": "d3e14d6651a535b4689e0605c6b814f542bf6bb88d29ac4213e7bf6afce0501e"}}}}}, "Computer": {"1": {"uuid": "b20faf4c-1475-42a4-bac7-cf9ff0eb6799", "hostname": "localhost", "transport_type": "local", "metadata": {"workdir": "/scratch/candersen/aiida/", "shebang": "#!/bin/bash", "prepend_text": "", "mpirun_command": [], "default_mpiprocs_per_machine": 1, "append_text": ""}, "scheduler_type": "direct", "description": "localhost", "label": "localhost"}}, "User": {"2": {"first_name": "Casper", "last_name": "Andersen", "institution": "EPFL", "email": "casper.andersen@epfl.ch"}}}, "node_attributes": {"24": {"is_local": false, "append_text": "", "remote_exec_path": "/home/candersen/virtualenv/new_tests/aiida/aiida_core/.ci/add.sh", "prepend_text": "", "input_plugin": "arithmetic.add"}, "25": {"value": 3}, "26": {"value": 2}, "33": {"value": 5}, "32": {}, "31": {"remote_path": "/scratch/candersen/aiida/9d/3f/da4f-6782-4441-a276-b8965aa3f97f"}, "30": {"import_sys_environment": true, "process_state": "finished", "exit_status": 0, "output_filename": "aiida.out", "scheduler_state": "done", "custom_scheduler_commands": "", "input_filename": "aiida.in", "job_id": "27752", "prepend_text": "", "remote_workdir": "/scratch/candersen/aiida/9d/3f/da4f-6782-4441-a276-b8965aa3f97f", "append_text": "", "sealed": true, "retrieve_singlefile_list": [], "resources": {"num_machines": 1, "num_mpiprocs_per_machine": 1, "default_mpiprocs_per_machine": 1}, "retrieve_list": ["aiida.out", "_scheduler-stdout.txt", "_scheduler-stderr.txt"], "retrieve_temporary_list": [], "scheduler_lastchecktime": "2019-04-09T14:31:56.534041+00:00", "mpirun_extra_params": [], "scheduler_stderr": "_scheduler-stderr.txt", "parser_name": "arithmetic.add", "withmpi": false, "environment_variables": {}, "process_label": "ArithmeticAddCalculation", "scheduler_stdout": "_scheduler-stdout.txt"}}, "node_extras": {"24": {"_aiida_hash": "669f3bac68d2edba2f8772d536d7ced8d3a481d9dc96beaa70cbdb51f85a7713", "hidden": false}, "25": {"_aiida_hash": "8f1442527a4ce25bf6c8234f04301d3e3da9c0cf8d77cbffa973fd9b59a89b7b"}, "26": {"_aiida_hash": "10540cd8422b78634ef7853859b1b9494793b6cd37bf8c2fe6c63ee229d986fe"}, "33": {"_aiida_hash": "3d5cf2c3e2b56c24b5410b1f7666974fc1795f2c425390f4e9b00e47a8707e29"}, "32": {"_aiida_hash": "83d8e6d169a5c7625c371ea8695f0d38b568a38c3a43ce220c5407c947afc026"}, "31": {"_aiida_hash": "13bfaf327a85cc6d8436754fe702a6ffb2dcdbac611b3241c4e0082424b61443"}, "30": {"_aiida_hash": "dbf0f929ed4f14bcd542306563c03338d1e6ddd704341938dcf3beca4171fd5d"}}, "groups_uuid": {}} \ No newline at end of file diff --git a/tests/static/calcjob/metadata.json b/tests/static/calcjob/metadata.json deleted file mode 100644 index 605c496389..0000000000 --- a/tests/static/calcjob/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"conversion_info": ["Converted from version 0.4 to 0.5 with AiiDA v1.0.0b3", "Converted from version 0.5 to 0.6 with AiiDA v1.0.0b4", "Converted from version 0.6 to 0.7 with AiiDA v1.0.0b4", "Converted from version 0.7 to 0.8 with AiiDA v1.0.0", "Converted from version 0.8 to 0.9 with AiiDA v1.1.1", "Converted from version 0.9 to 0.10 with AiiDA v1.4.2", "Converted from version 0.10 to 0.11 with AiiDA v1.5.2", "Converted from version 0.11 to 0.12 with AiiDA v1.6.2", "Converted from version 0.12 to 0.13 with AiiDA v2.0.0a1"], "export_version": "0.13", "aiida_version": "2.0.0a1", "unique_identifiers": {"Node": "uuid", "Comment": "uuid", "Group": "uuid", "Log": "uuid", "Computer": "uuid", "User": "email"}, "all_fields_info": {"Node": {"ctime": {"convert_type": "date"}, "description": {}, "dbcomputer": {"related_name": "dbnodes", "requires": "Computer"}, "process_type": {}, "label": {}, "node_type": {}, "user": {"related_name": "dbnodes", "requires": "User"}, "mtime": {"convert_type": "date"}, "uuid": {}, "attributes": {"convert_type": "jsonb"}, "extras": {"convert_type": "jsonb"}, "repository_metadata": {}}, "Comment": {"dbnode": {"related_name": "dbcomments", "requires": "Node"}, "uuid": {}, "content": {}, "user": {"related_name": "dbcomments", "requires": "User"}, "mtime": {"convert_type": "date"}, "ctime": {"convert_type": "date"}}, "Group": {"type_string": {}, "uuid": {}, "label": {}, "user": {"related_name": "dbgroups", "requires": "User"}, "time": {"convert_type": "date"}, "description": {}, "extras": {"convert_type": "jsonb"}}, "Log": {"dbnode": {"related_name": "dblogs", "requires": "Node"}, "uuid": {}, "loggername": {}, "time": {"convert_type": "date"}, "message": {}, "levelname": {}, "metadata": {}}, "Computer": {"uuid": {}, "hostname": {}, "transport_type": {}, "metadata": {}, "scheduler_type": {}, "description": {}, "label": {}}, "User": {"first_name": {}, "last_name": {}, "institution": {}, "email": {}}}} \ No newline at end of file diff --git a/tests/static/export/compare/django.aiida b/tests/static/export/compare/django.aiida index 254e73e62d..08a035840e 100644 Binary files a/tests/static/export/compare/django.aiida and b/tests/static/export/compare/django.aiida differ diff --git a/tests/static/export/compare/sqlalchemy.aiida b/tests/static/export/compare/sqlalchemy.aiida index bd78b641e2..810d73ee4d 100644 Binary files a/tests/static/export/compare/sqlalchemy.aiida and b/tests/static/export/compare/sqlalchemy.aiida differ diff --git a/tests/static/export/migrate/0.10_dangling_link.aiida b/tests/static/export/migrate/0.10_dangling_link.aiida new file mode 100644 index 0000000000..7bb644795f Binary files /dev/null and b/tests/static/export/migrate/0.10_dangling_link.aiida differ diff --git a/tests/static/export/migrate/0.10_null_fields.aiida b/tests/static/export/migrate/0.10_null_fields.aiida new file mode 100644 index 0000000000..59d1c07dbc Binary files /dev/null and b/tests/static/export/migrate/0.10_null_fields.aiida differ diff --git a/tests/static/export/migrate/0.10_unknown_nodes_in_group.aiida b/tests/static/export/migrate/0.10_unknown_nodes_in_group.aiida new file mode 100644 index 0000000000..460d73e276 Binary files /dev/null and b/tests/static/export/migrate/0.10_unknown_nodes_in_group.aiida differ diff --git a/tests/static/export/migrate/export_v0.10_simple.aiida b/tests/static/export/migrate/export_0.10_simple.aiida similarity index 100% rename from tests/static/export/migrate/export_v0.10_simple.aiida rename to tests/static/export/migrate/export_0.10_simple.aiida diff --git a/tests/static/export/migrate/export_v0.11_simple.aiida b/tests/static/export/migrate/export_0.11_simple.aiida similarity index 100% rename from tests/static/export/migrate/export_v0.11_simple.aiida rename to tests/static/export/migrate/export_0.11_simple.aiida diff --git a/tests/static/export/migrate/export_v0.12_simple.aiida b/tests/static/export/migrate/export_0.12_simple.aiida similarity index 100% rename from tests/static/export/migrate/export_v0.12_simple.aiida rename to tests/static/export/migrate/export_0.12_simple.aiida diff --git a/tests/static/export/migrate/export_v0.1_simple.aiida b/tests/static/export/migrate/export_0.1_simple.aiida similarity index 100% rename from tests/static/export/migrate/export_v0.1_simple.aiida rename to tests/static/export/migrate/export_0.1_simple.aiida diff --git a/tests/static/export/migrate/export_v0.2_simple.aiida b/tests/static/export/migrate/export_0.2_simple.aiida similarity index 100% rename from tests/static/export/migrate/export_v0.2_simple.aiida rename to tests/static/export/migrate/export_0.2_simple.aiida diff --git a/tests/static/export/migrate/export_v0.2_simple.tar.gz b/tests/static/export/migrate/export_0.2_simple.tar.gz similarity index 100% rename from tests/static/export/migrate/export_v0.2_simple.tar.gz rename to tests/static/export/migrate/export_0.2_simple.tar.gz diff --git a/tests/static/export/migrate/export_v0.3_simple.aiida b/tests/static/export/migrate/export_0.3_simple.aiida similarity index 100% rename from tests/static/export/migrate/export_v0.3_simple.aiida rename to tests/static/export/migrate/export_0.3_simple.aiida diff --git a/tests/static/export/migrate/export_v0.4_no_Nodes.aiida b/tests/static/export/migrate/export_0.4_no_Nodes.aiida similarity index 100% rename from tests/static/export/migrate/export_v0.4_no_Nodes.aiida rename to tests/static/export/migrate/export_0.4_no_Nodes.aiida diff --git a/tests/static/export/migrate/export_v0.4_simple.aiida b/tests/static/export/migrate/export_0.4_simple.aiida similarity index 100% rename from tests/static/export/migrate/export_v0.4_simple.aiida rename to tests/static/export/migrate/export_0.4_simple.aiida diff --git a/tests/static/export/migrate/export_v0.4_simple.tar.gz b/tests/static/export/migrate/export_0.4_simple.tar.gz similarity index 100% rename from tests/static/export/migrate/export_v0.4_simple.tar.gz rename to tests/static/export/migrate/export_0.4_simple.tar.gz diff --git a/tests/static/export/migrate/export_v0.5_simple.aiida b/tests/static/export/migrate/export_0.5_simple.aiida similarity index 100% rename from tests/static/export/migrate/export_v0.5_simple.aiida rename to tests/static/export/migrate/export_0.5_simple.aiida diff --git a/tests/static/export/migrate/export_v0.6_simple.aiida b/tests/static/export/migrate/export_0.6_simple.aiida similarity index 100% rename from tests/static/export/migrate/export_v0.6_simple.aiida rename to tests/static/export/migrate/export_0.6_simple.aiida diff --git a/tests/static/export/migrate/export_v0.7_simple.aiida b/tests/static/export/migrate/export_0.7_simple.aiida similarity index 100% rename from tests/static/export/migrate/export_v0.7_simple.aiida rename to tests/static/export/migrate/export_0.7_simple.aiida diff --git a/tests/static/export/migrate/export_v0.8_simple.aiida b/tests/static/export/migrate/export_0.8_simple.aiida similarity index 100% rename from tests/static/export/migrate/export_v0.8_simple.aiida rename to tests/static/export/migrate/export_0.8_simple.aiida diff --git a/tests/static/export/migrate/export_v0.9_simple.aiida b/tests/static/export/migrate/export_0.9_simple.aiida similarity index 100% rename from tests/static/export/migrate/export_v0.9_simple.aiida rename to tests/static/export/migrate/export_0.9_simple.aiida diff --git a/tests/static/export/migrate/export_main_0000_simple.aiida b/tests/static/export/migrate/export_main_0000_simple.aiida new file mode 100644 index 0000000000..b24b062b11 Binary files /dev/null and b/tests/static/export/migrate/export_main_0000_simple.aiida differ diff --git a/tests/static/export/migrate/export_v1.0_simple.aiida b/tests/static/export/migrate/export_main_0001_simple.aiida similarity index 81% rename from tests/static/export/migrate/export_v1.0_simple.aiida rename to tests/static/export/migrate/export_main_0001_simple.aiida index af95d66902..2c41a71f5a 100644 Binary files a/tests/static/export/migrate/export_v1.0_simple.aiida and b/tests/static/export/migrate/export_main_0001_simple.aiida differ diff --git a/tests/tools/archive/migration/conftest.py b/tests/tools/archive/migration/conftest.py index a6b9bccc4d..ef4ee36d0a 100644 --- a/tests/tools/archive/migration/conftest.py +++ b/tests/tools/archive/migration/conftest.py @@ -10,7 +10,7 @@ """Module with tests for export archive migrations.""" import pytest -from aiida.tools.archive.implementations.sqlite.migrations.utils import verify_metadata_version +from aiida.storage.sqlite_zip.migrations.utils import verify_metadata_version from tests.utils.archives import get_archive_file, read_json_files diff --git a/tests/tools/archive/migration/test_legacy_funcs.py b/tests/tools/archive/migration/test_legacy_funcs.py index 79aba89ab0..ba576f9c76 100644 --- a/tests/tools/archive/migration/test_legacy_funcs.py +++ b/tests/tools/archive/migration/test_legacy_funcs.py @@ -12,8 +12,8 @@ import pytest from aiida import get_version -from aiida.tools.archive.implementations.sqlite.migrations.legacy import LEGACY_MIGRATE_FUNCTIONS -from aiida.tools.archive.implementations.sqlite.migrations.utils import verify_metadata_version +from aiida.storage.sqlite_zip.migrations.legacy import LEGACY_MIGRATE_FUNCTIONS +from aiida.storage.sqlite_zip.migrations.utils import verify_metadata_version from tests.utils.archives import get_archive_file, read_json_files @@ -26,13 +26,13 @@ def test_migrations(migration_data): """Test each migration method from the `aiida.tools.archive.archive.migrations` module.""" version_old, (version_new, migration_method) = migration_data - filepath_archive_new = get_archive_file(f'export_v{version_new}_simple.aiida', filepath='export/migrate') + filepath_archive_new = get_archive_file(f'export_{version_new}_simple.aiida', filepath='export/migrate') metadata_new = read_json_files(filepath_archive_new, names=['metadata.json'])[0] verify_metadata_version(metadata_new, version=version_new) data_new = read_json_files(filepath_archive_new, names=['data.json'])[0] - filepath_archive_old = get_archive_file(f'export_v{version_old}_simple.aiida', filepath='export/migrate') + filepath_archive_old = get_archive_file(f'export_{version_old}_simple.aiida', filepath='export/migrate') metadata_old, data_old = read_json_files(filepath_archive_old, names=['metadata.json', 'data.json']) # pylint: disable=unbalanced-tuple-unpacking diff --git a/tests/tools/archive/migration/test_migration.py b/tests/tools/archive/migration/test_legacy_migrations.py similarity index 92% rename from tests/tools/archive/migration/test_migration.py rename to tests/tools/archive/migration/test_legacy_migrations.py index c998e1504f..9479117920 100644 --- a/tests/tools/archive/migration/test_migration.py +++ b/tests/tools/archive/migration/test_legacy_migrations.py @@ -12,7 +12,8 @@ import pytest from aiida import orm -from aiida.tools.archive import ArchiveFormatSqlZip, ArchiveMigrationError +from aiida.common.exceptions import StorageMigrationError +from aiida.tools.archive import ArchiveFormatSqlZip from tests.utils.archives import get_archive_file # archives to test migration against @@ -47,7 +48,7 @@ } -@pytest.mark.parametrize('archive_name', ('export_v0.4_simple.aiida', 'export_v0.4_simple.tar.gz')) +@pytest.mark.parametrize('archive_name', ('export_0.4_simple.aiida', 'export_0.4_simple.tar.gz')) def test_full_migration(tmp_path, core_archive, archive_name): """Test a migration from the first to newest archive version.""" @@ -77,17 +78,17 @@ def test_full_migration(tmp_path, core_archive, archive_name): def test_partial_migrations(core_archive, tmp_path): """Test migrations from a specific version (0.5) to other versions.""" - filepath_archive = get_archive_file('export_v0.5_simple.aiida', **core_archive) + filepath_archive = get_archive_file('export_0.5_simple.aiida', **core_archive) archive_format = ArchiveFormatSqlZip() assert archive_format.read_version(filepath_archive) == '0.5' new_archive = tmp_path / 'out.aiida' - with pytest.raises(ArchiveMigrationError, match='Unknown target version'): + with pytest.raises(StorageMigrationError, match='Unknown target version'): archive_format.migrate(filepath_archive, new_archive, 0.2) - with pytest.raises(ArchiveMigrationError, match='No migration pathway available'): + with pytest.raises(StorageMigrationError, match='No migration pathway available'): archive_format.migrate(filepath_archive, new_archive, '0.4') archive_format.migrate(filepath_archive, new_archive, '0.7') @@ -96,7 +97,7 @@ def test_partial_migrations(core_archive, tmp_path): def test_no_node_migration(tmp_path, core_archive): """Test migration of archive file that has no Node entities.""" - filepath_archive = get_archive_file('export_v0.4_no_Nodes.aiida', **core_archive) + filepath_archive = get_archive_file('export_0.4_no_Nodes.aiida', **core_archive) archive_format = ArchiveFormatSqlZip() new_archive = tmp_path / 'output_file.aiida' @@ -116,10 +117,10 @@ def test_no_node_migration(tmp_path, core_archive): @pytest.mark.parametrize('version', ['0.0', '0.1.0', '0.99']) def test_wrong_versions(core_archive, tmp_path, version): """Test correct errors are raised if archive files have wrong version numbers""" - filepath_archive = get_archive_file('export_v0.4_simple.aiida', **core_archive) + filepath_archive = get_archive_file('export_0.4_simple.aiida', **core_archive) archive_format = ArchiveFormatSqlZip() new_archive = tmp_path / 'out.aiida' - with pytest.raises(ArchiveMigrationError, match='Unknown target version'): + with pytest.raises(StorageMigrationError, match='Unknown target version'): archive_format.migrate(filepath_archive, new_archive, version) assert not new_archive.exists() diff --git a/tests/tools/archive/migration/test_legacy_to_main.py b/tests/tools/archive/migration/test_legacy_to_main.py new file mode 100644 index 0000000000..351ff9823f --- /dev/null +++ b/tests/tools/archive/migration/test_legacy_to_main.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +"""Test archive file migration from legacy format (JSON) to main format (SQLite).""" +import pytest + +from aiida.common.exceptions import StorageMigrationError +from aiida.storage.sqlite_zip.migrator import migrate +from tests.utils.archives import get_archive_file + + +def test_dangling_links(tmp_path): + """Test that links with node UUIDs that are not in the archive are correctly handled.""" + filepath_archive = get_archive_file('0.10_dangling_link.aiida', 'export/migrate') + with pytest.raises(StorageMigrationError, match='Database contains link with unknown input node'): + migrate(filepath_archive, tmp_path / 'archive.aiida', 'main_0001') + + +def test_missing_nodes_in_groups(tmp_path, aiida_caplog): + """Test that groups with listed node UUIDs that are not in the archive are correctly handled.""" + filepath_archive = get_archive_file('0.10_unknown_nodes_in_group.aiida', 'export/migrate') + migrate(filepath_archive, tmp_path / 'archive.aiida', 'main_0001') + assert 'Dropped unknown nodes in groups' in aiida_caplog.text, aiida_caplog.text + + +def test_fields_with_null_values(tmp_path): + """Test that fields with null values are correctly handled.""" + filepath_archive = get_archive_file('0.10_null_fields.aiida', 'export/migrate') + migrate(filepath_archive, tmp_path / 'archive.aiida', 'main_0001') diff --git a/tests/tools/archive/migration/test_v04_to_v05.py b/tests/tools/archive/migration/test_v04_to_v05.py index b22f0e2e48..f394426eff 100644 --- a/tests/tools/archive/migration/test_v04_to_v05.py +++ b/tests/tools/archive/migration/test_v04_to_v05.py @@ -8,7 +8,7 @@ # For further information please visit http://www.aiida.net # ########################################################################### """Test archive file migration from export version 0.4 to 0.5""" -from aiida.tools.archive.implementations.sqlite.migrations.legacy import migrate_v4_to_v5 +from aiida.storage.sqlite_zip.migrations.legacy import migrate_v4_to_v5 def test_migrate_external(migrate_from_func): diff --git a/tests/tools/archive/migration/test_v05_to_v06.py b/tests/tools/archive/migration/test_v05_to_v06.py index 4a6a29342c..7223e53ca2 100644 --- a/tests/tools/archive/migration/test_v05_to_v06.py +++ b/tests/tools/archive/migration/test_v05_to_v06.py @@ -9,8 +9,8 @@ ########################################################################### """Test archive file migration from export version 0.5 to 0.6""" from aiida.storage.psql_dos.migrations.utils.calc_state import STATE_MAPPING -from aiida.tools.archive.implementations.sqlite.migrations.legacy import migrate_v5_to_v6 -from aiida.tools.archive.implementations.sqlite.migrations.utils import verify_metadata_version +from aiida.storage.sqlite_zip.migrations.legacy import migrate_v5_to_v6 +from aiida.storage.sqlite_zip.migrations.utils import verify_metadata_version from tests.utils.archives import get_archive_file, read_json_files @@ -31,7 +31,7 @@ def test_migrate_v5_to_v6_calc_states(core_archive, migrate_from_func): module does not include a `CalcJobNode` with a legacy `state` attribute. """ # Get metadata.json and data.json as dicts from v0.5 file archive - archive_path = get_archive_file('export_v0.5_simple.aiida', **core_archive) + archive_path = get_archive_file('export_0.5_simple.aiida', **core_archive) metadata, data = read_json_files(archive_path) # pylint: disable=unbalanced-tuple-unpacking verify_metadata_version(metadata, version='0.5') @@ -45,7 +45,7 @@ def test_migrate_v5_to_v6_calc_states(core_archive, migrate_from_func): calc_jobs[pk] = data['node_attributes'][pk]['state'] # Migrate to v0.6 - metadata, data = migrate_from_func('export_v0.5_simple.aiida', '0.5', '0.6', migrate_v5_to_v6, core_archive) + metadata, data = migrate_from_func('export_0.5_simple.aiida', '0.5', '0.6', migrate_v5_to_v6, core_archive) verify_metadata_version(metadata, version='0.6') node_attributes = data['node_attributes'] @@ -73,10 +73,10 @@ def test_migrate_v5_to_v6_datetime(core_archive, migrate_from_func): Datetime attributes were serialized into strings, by first converting to UTC and then printing with the format '%Y-%m-%dT%H:%M:%S.%f'. In the database migration, datetimes were serialized *including* timezone information. Here we test that the archive migration correctly reattaches the timezone information. The archive that we are - using `export_v0.5_simple.aiida` contains a node with the attribute "scheduler_lastchecktime". + using `export_0.5_simple.aiida` contains a node with the attribute "scheduler_lastchecktime". """ # Get metadata.json and data.json as dicts from v0.5 file archive - archive_path = get_archive_file('export_v0.5_simple.aiida', **core_archive) + archive_path = get_archive_file('export_0.5_simple.aiida', **core_archive) metadata, data = read_json_files(archive_path) # pylint: disable=unbalanced-tuple-unpacking verify_metadata_version(metadata, version='0.5') @@ -90,7 +90,7 @@ def test_migrate_v5_to_v6_datetime(core_archive, migrate_from_func): assert '+' not in serialized_original, msg # Migrate to v0.6 - metadata, data = migrate_from_func('export_v0.5_simple.aiida', '0.5', '0.6', migrate_v5_to_v6, core_archive) + metadata, data = migrate_from_func('export_0.5_simple.aiida', '0.5', '0.6', migrate_v5_to_v6, core_archive) verify_metadata_version(metadata, version='0.6') serialized_migrated = data['node_attributes'][key]['scheduler_lastchecktime'] @@ -99,6 +99,6 @@ def test_migrate_v5_to_v6_datetime(core_archive, migrate_from_func): else: raise RuntimeError( - 'the archive `export_v0.5_simple.aiida` did not contain a node with the attribute ' + 'the archive `export_0.5_simple.aiida` did not contain a node with the attribute ' '`scheduler_lastchecktime` which is required for this test.' ) diff --git a/tests/tools/archive/migration/test_v06_to_v07.py b/tests/tools/archive/migration/test_v06_to_v07.py index 96a80aee31..b4f2e502b0 100644 --- a/tests/tools/archive/migration/test_v06_to_v07.py +++ b/tests/tools/archive/migration/test_v06_to_v07.py @@ -10,8 +10,8 @@ """Test archive file migration from export version 0.6 to 0.7""" import pytest -from aiida.tools.archive.exceptions import CorruptArchive -from aiida.tools.archive.implementations.sqlite.migrations.legacy.v06_to_v07 import ( +from aiida.common.exceptions import CorruptStorage +from aiida.storage.sqlite_zip.migrations.legacy.v06_to_v07 import ( data_migration_legacy_process_attributes, migrate_v6_to_v7, ) @@ -49,7 +49,7 @@ def test_migrate_external(migrate_from_func): def test_migration_0040_corrupt_archive(): - """Check CorruptArchive is raised for different cases during migration 0040""" + """Check CorruptStorage is raised for different cases during migration 0040""" # data has one "valid" entry, in the form of Node . # At least it has the needed key `node_type`. # data also has one "invalid" entry, in form of Node . @@ -73,7 +73,7 @@ def test_migration_0040_corrupt_archive(): } } - with pytest.raises(CorruptArchive, match='Your export archive is corrupt! Org. exception:'): + with pytest.raises(CorruptStorage, match='Your export archive is corrupt! Org. exception:'): data_migration_legacy_process_attributes(data) # data has one "valid" entry, in the form of Node . @@ -101,7 +101,7 @@ def test_migration_0040_corrupt_archive(): } } - with pytest.raises(CorruptArchive, match='Your export archive is corrupt! Please see the log-file'): + with pytest.raises(CorruptStorage, match='Your export archive is corrupt! Please see the log-file'): data_migration_legacy_process_attributes(data) diff --git a/tests/tools/archive/migration/test_v07_to_v08.py b/tests/tools/archive/migration/test_v07_to_v08.py index 34c9f0ece7..5c6dd52109 100644 --- a/tests/tools/archive/migration/test_v07_to_v08.py +++ b/tests/tools/archive/migration/test_v07_to_v08.py @@ -8,10 +8,7 @@ # For further information please visit http://www.aiida.net # ########################################################################### """Test archive file migration from export version 0.7 to 0.8""" -from aiida.tools.archive.implementations.sqlite.migrations.legacy.v07_to_v08 import ( - migrate_v7_to_v8, - migration_default_link_label, -) +from aiida.storage.sqlite_zip.migrations.legacy.v07_to_v08 import migrate_v7_to_v8, migration_default_link_label def test_migrate_external(migrate_from_func): @@ -28,7 +25,7 @@ def test_migrate_external(migrate_from_func): def test_migration_0043_default_link_label(): - """Check CorruptArchive is raised for different cases during migration 0040""" + """Check link labels are migrated properly.""" # data has one "valid" link, in the form of . # data also has one "invalid" link, in form of . # After the migration, the "invalid" link should have been updated to the "valid" link diff --git a/tests/tools/archive/migration/test_v08_to_v09.py b/tests/tools/archive/migration/test_v08_to_v09.py index 23c5adb136..46049771f4 100644 --- a/tests/tools/archive/migration/test_v08_to_v09.py +++ b/tests/tools/archive/migration/test_v08_to_v09.py @@ -8,10 +8,7 @@ # For further information please visit http://www.aiida.net # ########################################################################### """Test archive file migration from export version 0.8 to 0.9""" -from aiida.tools.archive.implementations.sqlite.migrations.legacy.v08_to_v09 import ( - migrate_v8_to_v9, - migration_dbgroup_type_string, -) +from aiida.storage.sqlite_zip.migrations.legacy.v08_to_v09 import migrate_v8_to_v9, migration_dbgroup_type_string def test_migrate_external(migrate_from_func): diff --git a/tests/tools/archive/orm/test_links.py b/tests/tools/archive/orm/test_links.py index 242dea30e7..d29dbaac42 100644 --- a/tests/tools/archive/orm/test_links.py +++ b/tests/tools/archive/orm/test_links.py @@ -12,7 +12,6 @@ from aiida.common.links import LinkType from aiida.orm.entities import EntityTypes from aiida.tools.archive import ArchiveFormatSqlZip, create_archive, import_archive -# from aiida.tools.archive.exceptions import DanglingLinkError from tests.tools.archive.utils import get_all_node_links @@ -533,10 +532,10 @@ def test_link_flags(tmp_path, aiida_profile_clean, aiida_localhost_factory): ) ) - link_flags_import_helper(input_links_forward, aiida_profile_clean.reset_db) - link_flags_import_helper(create_return_links_backward, aiida_profile_clean.reset_db) - link_flags_import_helper(call_links_backward_calc1, aiida_profile_clean.reset_db) - link_flags_import_helper(call_links_backward_work2, aiida_profile_clean.reset_db) + link_flags_import_helper(input_links_forward, aiida_profile_clean.clear_profile) + link_flags_import_helper(create_return_links_backward, aiida_profile_clean.clear_profile) + link_flags_import_helper(call_links_backward_calc1, aiida_profile_clean.clear_profile) + link_flags_import_helper(call_links_backward_work2, aiida_profile_clean.clear_profile) def test_double_return_links_for_workflows(tmp_path, aiida_profile_clean): diff --git a/tests/tools/archive/test_backend.py b/tests/tools/archive/test_backend.py index d537c32d09..62a01f83e7 100644 --- a/tests/tools/archive/test_backend.py +++ b/tests/tools/archive/test_backend.py @@ -19,13 +19,13 @@ @pytest.fixture() -def archive(tmp_path): +def archive(): """Yield the archive open in read mode.""" - filepath_archive = get_archive_file('export_v1.0_simple.aiida', filepath='export/migrate') archive_format = ArchiveFormatSqlZip() - new_archive = tmp_path / 'out.aiida' - archive_format.migrate(filepath_archive, new_archive, archive_format.latest_version) - with archive_format.open(new_archive, 'r') as reader: + filepath_archive = get_archive_file( + f'export_{archive_format.latest_version}_simple.aiida', filepath='export/migrate' + ) + with archive_format.open(filepath_archive, 'r') as reader: yield reader diff --git a/tests/tools/archive/test_schema.py b/tests/tools/archive/test_schema.py new file mode 100644 index 0000000000..238f32d191 --- /dev/null +++ b/tests/tools/archive/test_schema.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +"""Test the schema of the sqlite file within the archive.""" +from contextlib import suppress + +from archive_path import extract_file_in_zip +import pytest +from sqlalchemy import String, inspect +from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.engine import Inspector +import yaml + +from aiida import get_profile +from aiida.storage.psql_dos.utils import create_sqlalchemy_engine +from aiida.storage.sqlite_zip import models, utils +from aiida.storage.sqlite_zip.migrator import get_schema_version_head, migrate +from tests.utils.archives import get_archive_file + + +@pytest.mark.usefixtures('aiida_profile_clean') +def test_psql_sync_init(tmp_path): + """Test the schema is in-sync with the ``psql_dos`` backend, when initialising a new archive.""" + # note, directly using the global profile's engine here left connections open + with create_sqlalchemy_engine(get_profile().storage_config).connect() as conn: + psql_insp = inspect(conn) + + engine = utils.create_sqla_engine(tmp_path / 'archive.sqlite') + models.SqliteBase.metadata.create_all(engine) + sqlite_insp = inspect(engine) + + diffs = diff_schemas(psql_insp, sqlite_insp) + if diffs: + raise AssertionError(f'Schema is not in-sync with the psql backend:\n{yaml.safe_dump(diffs)}') + + +@pytest.mark.usefixtures('aiida_profile_clean') +def test_psql_sync_migrate(tmp_path): + """Test the schema is in-sync with the ``psql_dos`` backend, when migrating an old archive to the latest version.""" + # note, directly using the global profile's engine here left connections open + with create_sqlalchemy_engine(get_profile().storage_config).connect() as conn: + psql_insp = inspect(conn) + + # migrate an old archive + filepath_archive = get_archive_file('export_0.4_simple.aiida', 'export/migrate') + migrate(filepath_archive, tmp_path / 'archive.aiida', get_schema_version_head()) + + # extract the database + with tmp_path.joinpath('archive.sqlite').open('wb') as handle: + extract_file_in_zip(tmp_path / 'archive.aiida', 'db.sqlite3', handle) + + engine = utils.create_sqla_engine(tmp_path / 'archive.sqlite') + sqlite_insp = inspect(engine) + + diffs = diff_schemas(psql_insp, sqlite_insp) + if diffs: + raise AssertionError(f'Schema is not in-sync with the psql backend:\n{yaml.safe_dump(diffs)}') + + +def diff_schemas(psql_insp: Inspector, sqlite_insp: Inspector): # pylint: disable=too-many-branches,too-many-statements + """Compare the reflected schemas of the two databases.""" + diffs = {} + + for table_name in sqlite_insp.get_table_names(): + if not table_name.startswith('db_') or table_name == 'db_dbsetting': + continue # not an aiida table + if table_name not in psql_insp.get_table_names(): + diffs[table_name] = 'additional' + for table_name in psql_insp.get_table_names(): + if not table_name.startswith('db_') or table_name == 'db_dbsetting': + continue # not an aiida table + if table_name not in sqlite_insp.get_table_names(): + diffs[table_name] = 'missing' + continue + psql_columns = {col['name']: col for col in psql_insp.get_columns(table_name)} + sqlite_columns = {col['name']: col for col in sqlite_insp.get_columns(table_name)} + for column_name in psql_columns: + # check existence + if column_name not in sqlite_columns: + diffs.setdefault(table_name, {})[column_name] = 'missing' + continue + # check type + psql_type = psql_columns[column_name]['type'] + sqlite_type = sqlite_columns[column_name]['type'] + # standardise types + with suppress(NotImplementedError): + psql_type = psql_type.as_generic() + with suppress(NotImplementedError): + sqlite_type = sqlite_type.as_generic() + if isinstance(psql_type, UUID): + psql_type = String(length=32) + if not isinstance(sqlite_type, type(psql_type)): + diffs.setdefault(table_name, {}).setdefault(column_name, {})['type'] = f'{sqlite_type} != {psql_type}' + elif isinstance(psql_type, String): + if psql_type.length != sqlite_type.length: + diffs.setdefault(table_name, + {}).setdefault(column_name, + {})['length'] = f'{sqlite_type.length} != {psql_type.length}' + # check nullability + psql_nullable = psql_columns[column_name]['nullable'] + sqlite_nullable = sqlite_columns[column_name]['nullable'] + if psql_nullable != sqlite_nullable: + diffs.setdefault(table_name, {}).setdefault(column_name, + {})['nullable'] = f'{sqlite_nullable} != {psql_nullable}' + + # compare unique constraints + psql_uq_constraints = [c['name'] for c in psql_insp.get_unique_constraints(table_name)] + sqlite_uq_constraints = [c['name'] for c in sqlite_insp.get_unique_constraints(table_name)] + for uq_constraint in psql_uq_constraints: + if uq_constraint not in sqlite_uq_constraints: + diffs.setdefault(table_name, {}).setdefault('uq_constraints', {})[uq_constraint] = 'missing' + for uq_constraint in sqlite_uq_constraints: + if uq_constraint not in psql_uq_constraints: + diffs.setdefault(table_name, {}).setdefault('uq_constraints', {})[uq_constraint] = 'additional' + + # compare foreign key constraints + psql_fk_constraints = [c['name'] for c in psql_insp.get_foreign_keys(table_name)] + sqlite_fk_constraints = [c['name'] for c in sqlite_insp.get_foreign_keys(table_name)] + for fk_constraint in psql_fk_constraints: + if fk_constraint not in sqlite_fk_constraints: + diffs.setdefault(table_name, {}).setdefault('fk_constraints', {})[fk_constraint] = 'missing' + for fk_constraint in sqlite_fk_constraints: + if fk_constraint not in psql_fk_constraints: + diffs.setdefault(table_name, {}).setdefault('fk_constraints', {})[fk_constraint] = 'additional' + + # compare indexes (discarding any postgresql specific ones, e.g. varchar_pattern_ops) + psql_indexes = [ + idx['name'] + for idx in psql_insp.get_indexes(table_name) + if not idx['unique'] and not idx['name'].startswith('ix_pat_') + ] + sqlite_indexes = [idx['name'] for idx in sqlite_insp.get_indexes(table_name) if not idx['unique']] + for index in psql_indexes: + if index not in sqlite_indexes: + diffs.setdefault(table_name, {}).setdefault('indexes', {})[index] = 'missing' + for index in sqlite_indexes: + if index not in psql_indexes: + diffs.setdefault(table_name, {}).setdefault('indexes', {})[index] = 'additional' + + return diffs diff --git a/tests/tools/archive/test_simple.py b/tests/tools/archive/test_simple.py index 0f1002ee6e..43a58a1e7c 100644 --- a/tests/tools/archive/test_simple.py +++ b/tests/tools/archive/test_simple.py @@ -13,9 +13,9 @@ from aiida import orm from aiida.common import json -from aiida.common.exceptions import LicensingException +from aiida.common.exceptions import IncompatibleStorageSchema, LicensingException from aiida.common.links import LinkType -from aiida.tools.archive import create_archive, exceptions, import_archive +from aiida.tools.archive import create_archive, import_archive @pytest.mark.parametrize('entities', ['all', 'specific']) @@ -102,7 +102,7 @@ def test_check_for_export_format_version(aiida_profile_clean, tmp_path): # then try to import it aiida_profile_clean.clear_profile() - with pytest.raises(exceptions.IncompatibleArchiveVersionError): + with pytest.raises(IncompatibleStorageSchema): import_archive(filename2) diff --git a/tests/tools/archive/test_common.py b/tests/tools/archive/test_utils.py similarity index 94% rename from tests/tools/archive/test_common.py rename to tests/tools/archive/test_utils.py index 50512737f1..ef9c6ad76a 100644 --- a/tests/tools/archive/test_common.py +++ b/tests/tools/archive/test_utils.py @@ -7,10 +7,10 @@ # For further information on the license, see the LICENSE.txt file # # For further information please visit http://www.aiida.net # ########################################################################### -"""Test common functions.""" +"""Test utility functions.""" from archive_path import TarPath, ZipPath -from aiida.tools.archive.implementations.sqlite.common import copy_tar_to_zip, copy_zip_to_zip +from aiida.storage.sqlite_zip.migrations.utils import copy_tar_to_zip, copy_zip_to_zip def test_copy_zip_to_zip(tmp_path): diff --git a/utils/make_all.py b/utils/make_all.py index 9f8cc42174..daee5b9f12 100644 --- a/utils/make_all.py +++ b/utils/make_all.py @@ -157,7 +157,7 @@ def write_inits(folder_path: str, all_dict: dict, skip_children: Dict[str, List[ # skipped since this is for testing only not general use 'manage': ['tests'], # skipped since we don't want to expose the implementation at the top-level - 'storage': ['psql_dos'], + 'storage': ['psql_dos', 'sqlite_zip'], 'orm': ['implementation'], # skip all since the module requires extra requirements 'restapi': ['*'],