diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index aa07afaf39..541518a9c7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -103,6 +103,7 @@ repos:
aiida/storage/psql_dos/backend.py|
aiida/storage/psql_dos/orm/querybuilder/.*py|
aiida/storage/psql_dos/utils.py|
+ aiida/storage/sqlite_zip/.*.py|
aiida/tools/graph/graph_traversers.py|
aiida/tools/groups/paths.py|
aiida/tools/archive/.*py|
diff --git a/aiida/cmdline/commands/cmd_archive.py b/aiida/cmdline/commands/cmd_archive.py
index 2fdb40f933..94728e3012 100644
--- a/aiida/cmdline/commands/cmd_archive.py
+++ b/aiida/cmdline/commands/cmd_archive.py
@@ -11,18 +11,19 @@
"""`verdi archive` command."""
from enum import Enum
import logging
+from pathlib import Path
import traceback
from typing import List, Tuple
import urllib.request
import click
from click_spinner import spinner
-import tabulate
from aiida.cmdline.commands.cmd_verdi import verdi
from aiida.cmdline.params import arguments, options
from aiida.cmdline.params.types import GroupParamType, PathOrUrl
from aiida.cmdline.utils import decorators, echo
+from aiida.common.exceptions import CorruptStorage, IncompatibleStorageSchema, UnreachableStorage
from aiida.common.links import GraphTraversalRules
from aiida.common.log import AIIDA_LOGGER
@@ -36,66 +37,68 @@ def verdi_archive():
"""Create, inspect and import AiiDA archives."""
-@verdi_archive.command('inspect')
+@verdi_archive.command('version')
+@click.argument('path', nargs=1, type=click.Path(exists=True, readable=True))
+def archive_version(path):
+ """Print the current version of an archive's schema."""
+ # note: this mirrors `cmd_storage:storage_version`
+ # it is currently hardcoded to the `SqliteZipBackend`, but could be generalized in the future
+ from aiida.storage.sqlite_zip.backend import SqliteZipBackend
+ storage_cls = SqliteZipBackend
+ profile = storage_cls.create_profile(path)
+ head_version = storage_cls.version_head()
+ try:
+ profile_version = storage_cls.version_profile(profile)
+ except (UnreachableStorage, CorruptStorage) as exc:
+ echo.echo_critical(f'archive file version unreadable: {exc}')
+ echo.echo(f'Latest archive schema version: {head_version!r}')
+ echo.echo(f'Archive schema version of {Path(path).name!r}: {profile_version!r}')
+
+
+@verdi_archive.command('info')
+@click.argument('path', nargs=1, type=click.Path(exists=True, readable=True))
+@click.option('--statistics', is_flag=True, help='Provides more in-detail statistically relevant data.')
+def archive_info(path, statistics):
+ """Summarise the contents of an archive."""
+ # note: this mirrors `cmd_storage:storage_info`
+ # it is currently hardcoded to the `SqliteZipBackend`, but could be generalized in the future
+ from aiida.storage.sqlite_zip.backend import SqliteZipBackend
+ try:
+ storage = SqliteZipBackend(SqliteZipBackend.create_profile(path))
+ except (UnreachableStorage, CorruptStorage) as exc:
+ echo.echo_critical(f'archive file unreadable: {exc}')
+ except IncompatibleStorageSchema as exc:
+ echo.echo_critical(f'archive version incompatible: {exc}')
+ with spinner():
+ try:
+ data = storage.get_info(statistics=statistics)
+ finally:
+ storage.close()
+
+ echo.echo_dictionary(data, sort_keys=False, fmt='yaml')
+
+
+@verdi_archive.command('inspect', hidden=True)
@click.argument('archive', nargs=1, type=click.Path(exists=True, readable=True))
@click.option('-v', '--version', is_flag=True, help='Print the archive format version and exit.')
@click.option('-m', '--meta-data', is_flag=True, help='Print the meta data contents and exit.')
@click.option('-d', '--database', is_flag=True, help='Include information on entities in the database.')
-def inspect(archive, version, meta_data, database):
+@decorators.deprecated_command(
+ 'This command has been deprecated and will be removed soon. '
+ 'Please call `verdi archive version` or `verdi archive info` instead.\n'
+)
+@click.pass_context
+def inspect(ctx, archive, version, meta_data, database): # pylint: disable=unused-argument
"""Inspect contents of an archive without importing it.
- By default a summary of the archive contents will be printed.
- The various options can be used to change exactly what information is displayed.
+ .. deprecated:: v2.0.0, use `verdi archive version` or `verdi archive info` instead.
"""
- from aiida.tools.archive.abstract import get_format
- from aiida.tools.archive.exceptions import UnreadableArchiveError
-
- archive_format = get_format()
- latest_version = archive_format.latest_version
- try:
- current_version = archive_format.read_version(archive)
- except UnreadableArchiveError as exc:
- echo.echo_critical(f'archive file of unknown format: {exc}')
-
if version:
- echo.echo(current_version)
- return
-
- if current_version != latest_version:
- echo.echo_critical(
- f"Archive version is not the latest: '{current_version}' != '{latest_version}'. "
- 'Use `verdi migrate` to upgrade to the latest version'
- )
-
- with archive_format.open(archive, 'r') as archive_reader:
- metadata = archive_reader.get_metadata()
-
- if meta_data:
- echo.echo_dictionary(metadata, sort_keys=False)
- return
-
- statistics = {
- name: metadata[key] for key, name in [
- ['export_version', 'Version archive'],
- ['aiida_version', 'Version aiida'],
- ['compression', 'Compression'],
- ['ctime', 'Created'],
- ['mtime', 'Modified'],
- ] if key in metadata
- }
- if 'conversion_info' in metadata:
- statistics['Conversion info'] = '\n'.join(metadata['conversion_info'])
-
- echo.echo(tabulate.tabulate(statistics.items()))
-
- if database:
- echo.echo('')
- echo.echo('Database statistics')
- echo.echo('-------------------')
- with spinner():
- with archive_format.open(archive, 'r') as archive_reader:
- data = archive_reader.get_backend().get_info(statistics=True)
- echo.echo_dictionary(data, sort_keys=False, fmt='yaml')
+ ctx.invoke(archive_version, path=archive)
+ elif database:
+ ctx.invoke(archive_info, path=archive, statistics=True)
+ else:
+ ctx.invoke(archive_info, path=archive, statistics=False)
@verdi_archive.command('create')
@@ -136,7 +139,7 @@ def create(
create_backward, return_backward, call_calc_backward, call_work_backward, include_comments, include_logs,
include_authinfos, compress, batch_size, test_run
):
- """Write subsets of the provenance graph to a single file.
+ """Create an archive from all or part of a profiles's data.
Besides Nodes of the provenance graph, you can archive Groups, Codes, Computers, Comments and Logs.
@@ -214,7 +217,7 @@ def create(
help='Archive format version to migrate to (defaults to latest version).',
)
def migrate(input_file, output_file, force, in_place, version):
- """Migrate an export archive to a more recent format version."""
+ """Migrate an archive to a more recent schema version."""
from aiida.common.progress_reporter import set_progress_bar_tqdm, set_progress_reporter
from aiida.tools.archive.abstract import get_format
@@ -248,7 +251,7 @@ def migrate(input_file, output_file, force, in_place, version):
f'{error.__class__.__name__}:{error}'
)
- echo.echo_success(f'migrated the archive to version {version}')
+ echo.echo_success(f'migrated the archive to version {version!r}')
class ExtrasImportCode(Enum):
@@ -333,7 +336,7 @@ def import_archive(
ctx, archives, webpages, extras_mode_existing, extras_mode_new, comment_mode, include_authinfos, migration,
batch_size, import_group, group, test_run
):
- """Import data from an AiiDA archive file.
+ """Import archived data to a profile.
The archive can be specified by its relative or absolute file path, or its HTTP URL.
"""
@@ -424,12 +427,11 @@ def _import_archive_and_migrate(archive: str, web_based: bool, import_kwargs: di
:param archive: the path or URL to the archive
:param web_based: If the archive needs to be downloaded first
:param import_kwargs: keyword arguments to pass to the import function
- :param try_migration: whether to try a migration if the import raises IncompatibleArchiveVersionError
+ :param try_migration: whether to try a migration if the import raises `IncompatibleStorageSchema`
"""
from aiida.common.folders import SandboxFolder
from aiida.tools.archive.abstract import get_format
- from aiida.tools.archive.exceptions import IncompatibleArchiveVersionError
from aiida.tools.archive.imports import import_archive as _import_archive
archive_format = get_format()
@@ -452,7 +454,7 @@ def _import_archive_and_migrate(archive: str, web_based: bool, import_kwargs: di
echo.echo_report(f'starting import: {archive}')
try:
_import_archive(archive_path, archive_format=archive_format, **import_kwargs)
- except IncompatibleArchiveVersionError as exception:
+ except IncompatibleStorageSchema as exception:
if try_migration:
echo.echo_report(f'incompatible version detected for {archive}, trying migration')
diff --git a/aiida/manage/configuration/profile.py b/aiida/manage/configuration/profile.py
index fc5e9d96b4..a808efc668 100644
--- a/aiida/manage/configuration/profile.py
+++ b/aiida/manage/configuration/profile.py
@@ -127,9 +127,9 @@ def storage_cls(self) -> Type['StorageBackend']:
if self.storage_backend == 'psql_dos':
from aiida.storage.psql_dos.backend import PsqlDosBackend
return PsqlDosBackend
- if self.storage_backend == 'archive.sqlite':
- from aiida.tools.archive.implementations.sqlite.backend import ArchiveReadOnlyBackend
- return ArchiveReadOnlyBackend
+ if self.storage_backend == 'sqlite_zip':
+ from aiida.storage.sqlite_zip.backend import SqliteZipBackend
+ return SqliteZipBackend
raise ValueError(f'unknown storage backend type: {self.storage_backend}')
@property
diff --git a/aiida/storage/log.py b/aiida/storage/log.py
index 11ef376b36..24a037f442 100644
--- a/aiida/storage/log.py
+++ b/aiida/storage/log.py
@@ -12,3 +12,4 @@
from aiida.common.log import AIIDA_LOGGER
STORAGE_LOGGER = AIIDA_LOGGER.getChild('storage')
+MIGRATE_LOGGER = STORAGE_LOGGER.getChild('migrate')
diff --git a/aiida/storage/psql_dos/__init__.py b/aiida/storage/psql_dos/__init__.py
index eac0048fe9..8bea8e1e03 100644
--- a/aiida/storage/psql_dos/__init__.py
+++ b/aiida/storage/psql_dos/__init__.py
@@ -7,7 +7,7 @@
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
-"""Module with implementation of the storage backend using SqlAlchemy and the disk-objectstore."""
+"""Module with implementation of the storage backend using PostGreSQL and the disk-objectstore."""
# AUTO-GENERATED
diff --git a/aiida/storage/psql_dos/backend.py b/aiida/storage/psql_dos/backend.py
index 7f0fe3d59f..683484845a 100644
--- a/aiida/storage/psql_dos/backend.py
+++ b/aiida/storage/psql_dos/backend.py
@@ -55,7 +55,7 @@ def version_head(cls) -> str:
return cls.migrator.get_schema_version_head()
@classmethod
- def version_profile(cls, profile: Profile) -> None:
+ def version_profile(cls, profile: Profile) -> Optional[str]:
return cls.migrator(profile).get_schema_version_profile(check_legacy=True)
@classmethod
diff --git a/aiida/storage/psql_dos/migrations/env.py b/aiida/storage/psql_dos/migrations/env.py
index 613d237c34..aacf26e98d 100644
--- a/aiida/storage/psql_dos/migrations/env.py
+++ b/aiida/storage/psql_dos/migrations/env.py
@@ -16,18 +16,8 @@ def run_migrations_online():
The connection should have been passed to the config, which we use to configue the migration context.
"""
+ from aiida.storage.psql_dos.models.base import get_orm_metadata
- # pylint: disable=unused-import
- from aiida.common.exceptions import DbContentError
- from aiida.storage.psql_dos.models.authinfo import DbAuthInfo
- from aiida.storage.psql_dos.models.base import Base
- from aiida.storage.psql_dos.models.comment import DbComment
- from aiida.storage.psql_dos.models.computer import DbComputer
- from aiida.storage.psql_dos.models.group import DbGroup
- from aiida.storage.psql_dos.models.log import DbLog
- from aiida.storage.psql_dos.models.node import DbLink, DbNode
- from aiida.storage.psql_dos.models.settings import DbSetting
- from aiida.storage.psql_dos.models.user import DbUser
config = context.config # pylint: disable=no-member
connection = config.attributes.get('connection', None)
@@ -43,7 +33,7 @@ def run_migrations_online():
context.configure( # pylint: disable=no-member
connection=connection,
- target_metadata=Base.metadata,
+ target_metadata=get_orm_metadata(),
transaction_per_migration=True,
aiida_profile=aiida_profile,
on_version_apply=on_version_apply
diff --git a/aiida/storage/psql_dos/migrator.py b/aiida/storage/psql_dos/migrator.py
index ef97683548..fc85d30bf8 100644
--- a/aiida/storage/psql_dos/migrator.py
+++ b/aiida/storage/psql_dos/migrator.py
@@ -33,6 +33,7 @@
from aiida.common import exceptions
from aiida.manage.configuration.profile import Profile
+from aiida.storage.log import MIGRATE_LOGGER
from aiida.storage.psql_dos.models.settings import DbSetting
from aiida.storage.psql_dos.utils import create_sqlalchemy_engine
@@ -197,8 +198,6 @@ def migrate(self) -> None:
:raises: :class:`~aiida.common.exceptions.UnreachableStorage` if the storage cannot be accessed
"""
- from aiida.cmdline.utils import echo
-
# the database can be in one of a few states:
# 1. Completely empty -> we can simply initialise it with the current ORM schema
# 2. Legacy django database -> we transfer the version to alembic, migrate to the head of the django branch,
@@ -211,7 +210,7 @@ def migrate(self) -> None:
if not inspect(connection).has_table(self.alembic_version_tbl_name):
if not inspect(connection).has_table(self.django_version_table.name):
# the database is assumed to be empty, so we need to initialise it
- echo.echo_report('initialising empty storage schema')
+ MIGRATE_LOGGER.report('initialising empty storage schema')
self.initialise()
return
# the database is a legacy django one,
@@ -238,10 +237,10 @@ def migrate(self) -> None:
if 'django' in branches or 'sqlalchemy' in branches:
# migrate up to the top of the respective legacy branches
if 'django' in branches:
- echo.echo_report('Migrating to the head of the legacy django branch')
+ MIGRATE_LOGGER.report('Migrating to the head of the legacy django branch')
self.migrate_up('django@head')
elif 'sqlalchemy' in branches:
- echo.echo_report('Migrating to the head of the legacy sqlalchemy branch')
+ MIGRATE_LOGGER.report('Migrating to the head of the legacy sqlalchemy branch')
self.migrate_up('sqlalchemy@head')
# now re-stamp with the comparable revision on the main branch
with self._connection_context() as connection:
@@ -251,7 +250,7 @@ def migrate(self) -> None:
connection.commit()
# finally migrate to the main head revision
- echo.echo_report('Migrating to the head of the main branch')
+ MIGRATE_LOGGER.report('Migrating to the head of the main branch')
self.migrate_up('main@head')
def migrate_up(self, version: str) -> None:
@@ -284,7 +283,7 @@ def _alembic_script(cls):
return ScriptDirectory.from_config(cls._alembic_config())
@contextlib.contextmanager
- def _alembic_connect(self, _connection: Optional[Connection] = None):
+ def _alembic_connect(self, _connection: Optional[Connection] = None) -> Iterator[Config]:
"""Context manager to return an instance of an Alembic configuration.
The profiles's database connection is added in the `attributes` property, through which it can then also be
@@ -297,16 +296,15 @@ def _alembic_connect(self, _connection: Optional[Connection] = None):
def _callback(step: MigrationInfo, **kwargs): # pylint: disable=unused-argument
"""Callback to be called after a migration step is executed."""
- from aiida.cmdline.utils import echo
from_rev = step.down_revision_ids[0] if step.down_revision_ids else ''
- echo.echo_report(f'- {from_rev} -> {step.up_revision_id}')
+ MIGRATE_LOGGER.report(f'- {from_rev} -> {step.up_revision_id}')
config.attributes['on_version_apply'] = _callback # pylint: disable=unsupported-assignment-operation
yield config
@contextlib.contextmanager
- def _migration_context(self, _connection: Optional[Connection] = None) -> MigrationContext:
+ def _migration_context(self, _connection: Optional[Connection] = None) -> Iterator[MigrationContext]:
"""Context manager to return an instance of an Alembic migration context.
This migration context will have been configured with the current database connection, which allows this context
diff --git a/aiida/storage/sqlite_zip/__init__.py b/aiida/storage/sqlite_zip/__init__.py
new file mode 100644
index 0000000000..d79b5e11c6
--- /dev/null
+++ b/aiida/storage/sqlite_zip/__init__.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved. #
+# This file is part of the AiiDA code. #
+# #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file #
+# For further information please visit http://www.aiida.net #
+###########################################################################
+"""Module with implementation of the storage backend,
+using an SQLite database and repository files, within a zipfile.
+
+The content of the zip file is::
+
+ |- storage.zip
+ |- metadata.json
+ |- db.sqlite3
+ |- repo/
+ |- hashkey1
+ |- hashkey2
+ ...
+
+For quick access, the metadata (such as the version) is stored in a `metadata.json` file,
+at the "top" of the zip file, with the sqlite database, just below it, then the repository files.
+Repository files are named by their SHA256 content hash.
+
+This storage method is primarily intended for the AiiDA archive,
+as a read-only storage method.
+This is because sqlite and zip are not suitable for concurrent write access.
+
+The archive format originally used a JSON file to store the database,
+and these revisions are handled by the `version_profile` and `migrate` backend methods.
+"""
diff --git a/aiida/storage/sqlite_zip/backend.py b/aiida/storage/sqlite_zip/backend.py
new file mode 100644
index 0000000000..ff931cdb9d
--- /dev/null
+++ b/aiida/storage/sqlite_zip/backend.py
@@ -0,0 +1,485 @@
+# -*- coding: utf-8 -*-
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved. #
+# This file is part of the AiiDA code. #
+# #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file #
+# For further information please visit http://www.aiida.net #
+###########################################################################
+"""The table models are dynamically generated from the sqlalchemy backend models."""
+from __future__ import annotations
+
+from contextlib import contextmanager
+from functools import singledispatch
+from pathlib import Path
+import tempfile
+from typing import BinaryIO, Iterable, Iterator, Optional, Sequence, Tuple, Type, cast
+from zipfile import ZipFile, is_zipfile
+
+from archive_path import extract_file_in_zip
+from sqlalchemy.orm import Session
+
+from aiida.common.exceptions import AiidaException, ClosedStorage, CorruptStorage
+from aiida.manage import Profile
+from aiida.orm.entities import EntityTypes
+from aiida.orm.implementation import StorageBackend
+from aiida.repository.backend.abstract import AbstractRepositoryBackend
+from aiida.storage.psql_dos.orm import authinfos, comments, computers, entities, groups, logs, nodes, users
+from aiida.storage.psql_dos.orm.querybuilder import SqlaQueryBuilder
+from aiida.storage.psql_dos.orm.utils import ModelWrapper
+
+from . import models
+from .migrator import get_schema_version_head, validate_storage
+from .utils import DB_FILENAME, REPO_FOLDER, create_sqla_engine, extract_metadata, read_version
+
+
+class SqliteZipBackend(StorageBackend): # pylint: disable=too-many-public-methods
+ """A read-only backend for a sqlite/zip format.
+
+ The storage format uses an SQLite database and repository files, within a folder or zipfile.
+
+ The content of the folder/zipfile should be::
+
+ |- metadata.json
+ |- db.sqlite3
+ |- repo/
+ |- hashkey1
+ |- hashkey2
+ ...
+
+ """
+
+ @classmethod
+ def version_head(cls) -> str:
+ return get_schema_version_head()
+
+ @staticmethod
+ def create_profile(path: str | Path) -> Profile:
+ """Create a new profile instance for this backend, from the path to the zip file."""
+ profile_name = Path(path).name
+ return Profile(
+ profile_name, {
+ 'storage': {
+ 'backend': 'sqlite_zip',
+ 'config': {
+ 'path': str(path)
+ }
+ },
+ 'process_control': {
+ 'backend': 'null',
+ 'config': {}
+ }
+ }
+ )
+
+ @classmethod
+ def version_profile(cls, profile: Profile) -> Optional[str]:
+ return read_version(profile.storage_config['path'], search_limit=None)
+
+ @classmethod
+ def migrate(cls, profile: Profile):
+ raise NotImplementedError('use the migrate function directly.')
+
+ def __init__(self, profile: Profile):
+ super().__init__(profile)
+ self._path = Path(profile.storage_config['path'])
+ validate_storage(self._path)
+ # lazy open the archive zipfile and extract the database file
+ self._db_file: Optional[Path] = None
+ self._session: Optional[Session] = None
+ self._repo: Optional[_RoBackendRepository] = None
+ self._closed = False
+
+ def __str__(self) -> str:
+ state = 'closed' if self.is_closed else 'open'
+ return f'SqliteZip storage (read-only) [{state}] @ {self._path}'
+
+ @property
+ def is_closed(self) -> bool:
+ return self._closed
+
+ def close(self):
+ """Close the backend"""
+ if self._session:
+ self._session.close()
+ if self._db_file and self._db_file.exists():
+ self._db_file.unlink()
+ if self._repo:
+ self._repo.close()
+ self._session = None
+ self._db_file = None
+ self._repo = None
+ self._closed = True
+
+ def get_session(self) -> Session:
+ """Return an SQLAlchemy session."""
+ if self._closed:
+ raise ClosedStorage(str(self))
+ if self._session is None:
+ if is_zipfile(self._path):
+ _, path = tempfile.mkstemp()
+ db_file = self._db_file = Path(path)
+ with db_file.open('wb') as handle:
+ try:
+ extract_file_in_zip(self._path, DB_FILENAME, handle, search_limit=4)
+ except Exception as exc:
+ raise CorruptStorage(f'database could not be read: {exc}') from exc
+ else:
+ db_file = self._path / DB_FILENAME
+ if not db_file.exists():
+ raise CorruptStorage(f'database could not be read: non-existent {db_file}')
+ self._session = Session(create_sqla_engine(db_file))
+ return self._session
+
+ def get_repository(self) -> '_RoBackendRepository':
+ if self._closed:
+ raise ClosedStorage(str(self))
+ if self._repo is None:
+ if is_zipfile(self._path):
+ self._repo = ZipfileBackendRepository(self._path)
+ elif (self._path / REPO_FOLDER).exists():
+ self._repo = FolderBackendRepository(self._path / REPO_FOLDER)
+ else:
+ raise CorruptStorage(f'repository could not be read: non-existent {self._path / REPO_FOLDER}')
+ return self._repo
+
+ def query(self) -> 'SqliteBackendQueryBuilder':
+ return SqliteBackendQueryBuilder(self)
+
+ def get_backend_entity(self, res): # pylint: disable=no-self-use
+ """Return the backend entity that corresponds to the given Model instance."""
+ klass = get_backend_entity(res)
+ return klass(self, res)
+
+ @property
+ def authinfos(self):
+ return create_backend_collection(
+ authinfos.SqlaAuthInfoCollection, self, authinfos.SqlaAuthInfo, models.DbAuthInfo
+ )
+
+ @property
+ def comments(self):
+ return create_backend_collection(comments.SqlaCommentCollection, self, comments.SqlaComment, models.DbComment)
+
+ @property
+ def computers(self):
+ return create_backend_collection(
+ computers.SqlaComputerCollection, self, computers.SqlaComputer, models.DbComputer
+ )
+
+ @property
+ def groups(self):
+ return create_backend_collection(groups.SqlaGroupCollection, self, groups.SqlaGroup, models.DbGroup)
+
+ @property
+ def logs(self):
+ return create_backend_collection(logs.SqlaLogCollection, self, logs.SqlaLog, models.DbLog)
+
+ @property
+ def nodes(self):
+ return create_backend_collection(nodes.SqlaNodeCollection, self, nodes.SqlaNode, models.DbNode)
+
+ @property
+ def users(self):
+ return create_backend_collection(users.SqlaUserCollection, self, users.SqlaUser, models.DbUser)
+
+ def _clear(self, recreate_user: bool = True) -> None:
+ raise ReadOnlyError()
+
+ def transaction(self):
+ raise ReadOnlyError()
+
+ @property
+ def in_transaction(self) -> bool:
+ return False
+
+ def bulk_insert(self, entity_type: EntityTypes, rows: list[dict], allow_defaults: bool = False) -> list[int]:
+ raise ReadOnlyError()
+
+ def bulk_update(self, entity_type: EntityTypes, rows: list[dict]) -> None:
+ raise ReadOnlyError()
+
+ def delete_nodes_and_connections(self, pks_to_delete: Sequence[int]):
+ raise ReadOnlyError()
+
+ def get_global_variable(self, key: str):
+ raise NotImplementedError
+
+ def set_global_variable(self, key: str, value, description: Optional[str] = None, overwrite=True) -> None:
+ raise ReadOnlyError()
+
+ def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None:
+ raise NotImplementedError
+
+ def get_info(self, statistics: bool = False) -> dict:
+ # since extracting the database file is expensive, we only do it if statistics is True
+ results = {'metadata': extract_metadata(self._path)}
+ if statistics:
+ results.update(super().get_info(statistics=statistics))
+ results['repository'] = self.get_repository().get_info(statistics)
+ return results
+
+
+class ReadOnlyError(AiidaException):
+ """Raised when a write operation is called on a read-only archive."""
+
+ def __init__(self, msg='sqlite_zip storage is read-only'): # pylint: disable=useless-super-delegation
+ super().__init__(msg)
+
+
+class _RoBackendRepository(AbstractRepositoryBackend): # pylint: disable=abstract-method
+ """A backend abstract for a read-only folder or zip file."""
+
+ def __init__(self, path: str | Path):
+ """Initialise the repository backend.
+
+ :param path: the path to the zip file
+ """
+ self._path = Path(path)
+ self._closed = False
+
+ def close(self) -> None:
+ """Close the repository."""
+ self._closed = True
+
+ @property
+ def uuid(self) -> Optional[str]:
+ return None
+
+ @property
+ def key_format(self) -> Optional[str]:
+ return 'sha256'
+
+ def initialise(self, **kwargs) -> None:
+ pass
+
+ @property
+ def is_initialised(self) -> bool:
+ return True
+
+ def erase(self) -> None:
+ raise ReadOnlyError()
+
+ def _put_object_from_filelike(self, handle: BinaryIO) -> str:
+ raise ReadOnlyError()
+
+ def has_objects(self, keys: list[str]) -> list[bool]:
+ return [self.has_object(key) for key in keys]
+
+ def iter_object_streams(self, keys: list[str]) -> Iterator[Tuple[str, BinaryIO]]:
+ for key in keys:
+ with self.open(key) as handle: # pylint: disable=not-context-manager
+ yield key, handle
+
+ def delete_objects(self, keys: list[str]) -> None:
+ raise ReadOnlyError()
+
+ def get_object_hash(self, key: str) -> str:
+ return key
+
+ def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None:
+ pass
+
+ def get_info(self, statistics: bool = False, **kwargs) -> dict:
+ return {'objects': {'count': len(list(self.list_objects()))}}
+
+
+class ZipfileBackendRepository(_RoBackendRepository):
+ """A read-only backend for a zip file.
+
+ The zip file should contain repository files with the key format: ``/``,
+ i.e. files named by the sha256 hash of the file contents, inside a ```` directory.
+ """
+
+ def __init__(self, path: str | Path):
+ super().__init__(path)
+ self._folder = REPO_FOLDER
+ self.__zipfile: None | ZipFile = None
+
+ def close(self) -> None:
+ if self._zipfile:
+ self._zipfile.close()
+ super().close()
+
+ @property
+ def _zipfile(self) -> ZipFile:
+ """Return the open zip file."""
+ if self._closed:
+ raise ClosedStorage(f'repository is closed: {self._path}')
+ if self.__zipfile is None:
+ try:
+ self.__zipfile = ZipFile(self._path, mode='r') # pylint: disable=consider-using-with
+ except Exception as exc:
+ raise CorruptStorage(f'repository could not be read {self._path}: {exc}') from exc
+ return self.__zipfile
+
+ def has_object(self, key: str) -> bool:
+ try:
+ self._zipfile.getinfo(f'{self._folder}/{key}')
+ except KeyError:
+ return False
+ return True
+
+ def list_objects(self) -> Iterable[str]:
+ prefix = f'{self._folder}/'
+ prefix_len = len(prefix)
+ for name in self._zipfile.namelist():
+ if name.startswith(prefix) and name[prefix_len:]:
+ yield name[prefix_len:]
+
+ @contextmanager
+ def open(self, key: str) -> Iterator[BinaryIO]:
+ try:
+ handle = self._zipfile.open(f'{self._folder}/{key}')
+ yield cast(BinaryIO, handle)
+ except KeyError:
+ raise FileNotFoundError(f'object with key `{key}` does not exist.')
+ finally:
+ handle.close()
+
+
+class FolderBackendRepository(_RoBackendRepository):
+ """A read-only backend for a folder.
+
+ The folder should contain repository files, named by the sha256 hash of the file contents.
+ """
+
+ def has_object(self, key: str) -> bool:
+ return self._path.joinpath(key).is_file()
+
+ def list_objects(self) -> Iterable[str]:
+ for subpath in self._path.iterdir():
+ if subpath.is_file():
+ yield subpath.name
+
+ @contextmanager
+ def open(self, key: str) -> Iterator[BinaryIO]:
+ if not self._path.joinpath(key).is_file():
+ raise FileNotFoundError(f'object with key `{key}` does not exist.')
+ with self._path.joinpath(key).open('rb') as handle:
+ yield handle
+
+
+class SqliteBackendQueryBuilder(SqlaQueryBuilder):
+ """Archive query builder"""
+
+ @property
+ def Node(self):
+ return models.DbNode
+
+ @property
+ def Link(self):
+ return models.DbLink
+
+ @property
+ def Computer(self):
+ return models.DbComputer
+
+ @property
+ def User(self):
+ return models.DbUser
+
+ @property
+ def Group(self):
+ return models.DbGroup
+
+ @property
+ def AuthInfo(self):
+ return models.DbAuthInfo
+
+ @property
+ def Comment(self):
+ return models.DbComment
+
+ @property
+ def Log(self):
+ return models.DbLog
+
+ @property
+ def table_groups_nodes(self):
+ return models.DbGroupNodes.__table__ # type: ignore[attr-defined] # pylint: disable=no-member
+
+
+def create_backend_cls(base_class, model_cls):
+ """Create an archive backend class for the given model class."""
+
+ class ReadOnlyEntityBackend(base_class): # type: ignore
+ """Backend class for the read-only archive."""
+
+ MODEL_CLASS = model_cls
+
+ def __init__(self, _backend, model):
+ """Initialise the backend entity."""
+ self._backend = _backend
+ self._model = ModelWrapper(model, _backend)
+
+ @property
+ def model(self) -> ModelWrapper:
+ """Return an ORM model that correctly updates and flushes the data model when getting or setting a field."""
+ return self._model
+
+ @property
+ def bare_model(self):
+ """Return the underlying SQLAlchemy ORM model for this entity."""
+ return self.model._model # pylint: disable=protected-access
+
+ @classmethod
+ def from_dbmodel(cls, model, _backend):
+ return cls(_backend, model)
+
+ @property
+ def is_stored(self):
+ return True
+
+ def store(self): # pylint: disable=no-self-use
+ raise ReadOnlyError()
+
+ return ReadOnlyEntityBackend
+
+
+def create_backend_collection(cls, _backend, entity_cls, model):
+ collection = cls(_backend)
+ new_cls = create_backend_cls(entity_cls, model)
+ collection.ENTITY_CLASS = new_cls
+ return collection
+
+
+@singledispatch
+def get_backend_entity(dbmodel) -> Type[entities.SqlaModelEntity]: # pylint: disable=unused-argument
+ raise TypeError(f'Cannot get backend entity for {dbmodel}')
+
+
+@get_backend_entity.register(models.DbAuthInfo) # type: ignore[call-overload]
+def _(dbmodel):
+ return create_backend_cls(authinfos.SqlaAuthInfo, dbmodel.__class__)
+
+
+@get_backend_entity.register(models.DbComment) # type: ignore[call-overload]
+def _(dbmodel):
+ return create_backend_cls(comments.SqlaComment, dbmodel.__class__)
+
+
+@get_backend_entity.register(models.DbComputer) # type: ignore[call-overload]
+def _(dbmodel):
+ return create_backend_cls(computers.SqlaComputer, dbmodel.__class__)
+
+
+@get_backend_entity.register(models.DbGroup) # type: ignore[call-overload]
+def _(dbmodel):
+ return create_backend_cls(groups.SqlaGroup, dbmodel.__class__)
+
+
+@get_backend_entity.register(models.DbLog) # type: ignore[call-overload]
+def _(dbmodel):
+ return create_backend_cls(logs.SqlaLog, dbmodel.__class__)
+
+
+@get_backend_entity.register(models.DbNode) # type: ignore[call-overload]
+def _(dbmodel):
+ return create_backend_cls(nodes.SqlaNode, dbmodel.__class__)
+
+
+@get_backend_entity.register(models.DbUser) # type: ignore[call-overload]
+def _(dbmodel):
+ return create_backend_cls(users.SqlaUser, dbmodel.__class__)
diff --git a/aiida/tools/archive/implementations/sqlite/migrations/__init__.py b/aiida/storage/sqlite_zip/migrations/__init__.py
similarity index 90%
rename from aiida/tools/archive/implementations/sqlite/migrations/__init__.py
rename to aiida/storage/sqlite_zip/migrations/__init__.py
index 84dbe1264d..2776a55f97 100644
--- a/aiida/tools/archive/implementations/sqlite/migrations/__init__.py
+++ b/aiida/storage/sqlite_zip/migrations/__init__.py
@@ -7,4 +7,3 @@
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
-"""Migration archive files from old export versions to newer ones."""
diff --git a/aiida/storage/sqlite_zip/migrations/env.py b/aiida/storage/sqlite_zip/migrations/env.py
new file mode 100644
index 0000000000..2ee03a00b2
--- /dev/null
+++ b/aiida/storage/sqlite_zip/migrations/env.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved. #
+# This file is part of the AiiDA code. #
+# #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file #
+# For further information please visit http://www.aiida.net #
+###########################################################################
+"""Upper level SQLAlchemy migration funcitons."""
+from alembic import context
+
+
+def run_migrations_online():
+ """Run migrations in 'online' mode.
+
+ The connection should have been passed to the config, which we use to configue the migration context.
+ """
+ from aiida.storage.sqlite_zip.models import SqliteBase
+
+ config = context.config # pylint: disable=no-member
+
+ connection = config.attributes.get('connection', None)
+ aiida_profile = config.attributes.get('aiida_profile', None)
+ on_version_apply = config.attributes.get('on_version_apply', None)
+
+ if connection is None:
+ from aiida.common.exceptions import ConfigurationError
+ raise ConfigurationError('An initialized connection is expected for the AiiDA online migrations.')
+
+ context.configure( # pylint: disable=no-member
+ connection=connection,
+ target_metadata=SqliteBase.metadata,
+ transaction_per_migration=True,
+ aiida_profile=aiida_profile,
+ on_version_apply=on_version_apply
+ )
+
+ context.run_migrations() # pylint: disable=no-member
+
+
+try:
+ if context.is_offline_mode(): # pylint: disable=no-member
+ NotImplementedError('This feature is not currently supported.')
+
+ run_migrations_online()
+except NameError:
+ # This will occur in an environment that is just compiling the documentation
+ pass
diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/__init__.py b/aiida/storage/sqlite_zip/migrations/legacy/__init__.py
similarity index 94%
rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/__init__.py
rename to aiida/storage/sqlite_zip/migrations/legacy/__init__.py
index 5190ad4d96..f46a36c0bd 100644
--- a/aiida/tools/archive/implementations/sqlite/migrations/legacy/__init__.py
+++ b/aiida/storage/sqlite_zip/migrations/legacy/__init__.py
@@ -24,8 +24,7 @@
from .v11_to_v12 import migrate_v11_to_v12
# version from -> version to, function which modifies metadata, data in-place
-_vtype = Dict[str, Tuple[str, Callable[[dict, dict], None]]]
-LEGACY_MIGRATE_FUNCTIONS: _vtype = {
+LEGACY_MIGRATE_FUNCTIONS: Dict[str, Tuple[str, Callable[[dict, dict], None]]] = {
'0.4': ('0.5', migrate_v4_to_v5),
'0.5': ('0.6', migrate_v5_to_v6),
'0.6': ('0.7', migrate_v6_to_v7),
diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v04_to_v05.py b/aiida/storage/sqlite_zip/migrations/legacy/v04_to_v05.py
similarity index 75%
rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v04_to_v05.py
rename to aiida/storage/sqlite_zip/migrations/legacy/v04_to_v05.py
index 2e872db20f..17402b4e85 100644
--- a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v04_to_v05.py
+++ b/aiida/storage/sqlite_zip/migrations/legacy/v04_to_v05.py
@@ -24,7 +24,27 @@
Where id is a SQLA id and migration-name is the name of the particular migration.
"""
# pylint: disable=invalid-name
-from ..utils import remove_fields, update_metadata, verify_metadata_version # pylint: disable=no-name-in-module
+from ..utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module
+
+
+def remove_fields(metadata, data, entities, fields):
+ """Remove fields under entities from data.json and metadata.json.
+
+ :param metadata: the content of an export archive metadata.json file
+ :param data: the content of an export archive data.json file
+ :param entities: list of ORM entities
+ :param fields: list of fields to be removed from the export archive files
+ """
+ # data.json
+ for entity in entities:
+ for content in data['export_data'].get(entity, {}).values():
+ for field in fields:
+ content.pop(field, None)
+
+ # metadata.json
+ for entity in entities:
+ for field in fields:
+ metadata['all_fields_info'][entity].pop(field, None)
def migration_drop_node_columns_nodeversion_public(metadata, data):
diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v05_to_v06.py b/aiida/storage/sqlite_zip/migrations/legacy/v05_to_v06.py
similarity index 98%
rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v05_to_v06.py
rename to aiida/storage/sqlite_zip/migrations/legacy/v05_to_v06.py
index 3f6a7ea9c5..934c03d4c7 100644
--- a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v05_to_v06.py
+++ b/aiida/storage/sqlite_zip/migrations/legacy/v05_to_v06.py
@@ -31,7 +31,7 @@
def migrate_deserialized_datetime(data, conversion):
"""Deserialize datetime strings from export archives, meaning to reattach the UTC timezone information."""
- from aiida.tools.archive.exceptions import ArchiveMigrationError
+ from aiida.common.exceptions import StorageMigrationError
ret_data: Union[str, dict, list]
@@ -62,7 +62,7 @@ def migrate_deserialized_datetime(data, conversion):
# Since we know that all strings will be UTC, here we are simply reattaching that information.
ret_data = f'{data}+00:00'
else:
- raise ArchiveMigrationError(f"Unknown convert_type '{conversion}'")
+ raise StorageMigrationError(f"Unknown convert_type '{conversion}'")
return ret_data
diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v06_to_v07.py b/aiida/storage/sqlite_zip/migrations/legacy/v06_to_v07.py
similarity index 92%
rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v06_to_v07.py
rename to aiida/storage/sqlite_zip/migrations/legacy/v06_to_v07.py
index 4b764140f6..c76d2f8e0c 100644
--- a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v06_to_v07.py
+++ b/aiida/storage/sqlite_zip/migrations/legacy/v06_to_v07.py
@@ -46,14 +46,14 @@ def data_migration_legacy_process_attributes(data):
`process_state` attribute. If they have it, it is checked whether the state is active or not, if not, the `sealed`
attribute is created and set to `True`.
- :raises `~aiida.tools.archive.exceptions.CorruptArchive`: if a Node, found to have attributes,
+ :raises `~aiida.common.exceptions.CorruptStorage`: if a Node, found to have attributes,
cannot be found in the list of exported entities.
- :raises `~aiida.tools.archive.exceptions.CorruptArchive`: if the 'sealed' attribute does not exist and
+ :raises `~aiida.common.exceptions.CorruptStorage`: if the 'sealed' attribute does not exist and
the ProcessNode is in an active state, i.e. `process_state` is one of ('created', 'running', 'waiting').
A log-file, listing all illegal ProcessNodes, will be produced in the current directory.
"""
+ from aiida.common.exceptions import CorruptStorage
from aiida.storage.psql_dos.migrations.utils.integrity import write_database_integrity_violation
- from aiida.tools.archive.exceptions import CorruptArchive
attrs_to_remove = ['_sealed', '_finished', '_failed', '_aborted', '_do_abort']
active_states = {'created', 'running', 'waiting'}
@@ -68,7 +68,7 @@ def data_migration_legacy_process_attributes(data):
if process_state in active_states:
# The ProcessNode is in an active state, and should therefore never have been allowed
# to be exported. The Node will be added to a log that is saved in the working directory,
- # then a CorruptArchive will be raised, since the archive needs to be migrated manually.
+ # then a CorruptStorage will be raised, since the archive needs to be migrated manually.
uuid_pk = data['export_data']['Node'][node_pk].get('uuid', node_pk)
illegal_cases.append([uuid_pk, process_state])
continue # No reason to do more now
@@ -81,7 +81,7 @@ def data_migration_legacy_process_attributes(data):
for attr in attrs_to_remove:
content.pop(attr, None)
except KeyError as exc:
- raise CorruptArchive(f'Your export archive is corrupt! Org. exception: {exc}')
+ raise CorruptStorage(f'Your export archive is corrupt! Org. exception: {exc}')
if illegal_cases:
headers = ['UUID/PK', 'process_state']
@@ -89,7 +89,7 @@ def data_migration_legacy_process_attributes(data):
'that should never have been allowed to be exported.'
write_database_integrity_violation(illegal_cases, headers, warning_message)
- raise CorruptArchive(
+ raise CorruptStorage(
'Your export archive is corrupt! '
'Please see the log-file in your current directory for more details.'
)
diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v07_to_v08.py b/aiida/storage/sqlite_zip/migrations/legacy/v07_to_v08.py
similarity index 100%
rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v07_to_v08.py
rename to aiida/storage/sqlite_zip/migrations/legacy/v07_to_v08.py
diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v08_to_v09.py b/aiida/storage/sqlite_zip/migrations/legacy/v08_to_v09.py
similarity index 100%
rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v08_to_v09.py
rename to aiida/storage/sqlite_zip/migrations/legacy/v08_to_v09.py
diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v09_to_v10.py b/aiida/storage/sqlite_zip/migrations/legacy/v09_to_v10.py
similarity index 100%
rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v09_to_v10.py
rename to aiida/storage/sqlite_zip/migrations/legacy/v09_to_v10.py
diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v10_to_v11.py b/aiida/storage/sqlite_zip/migrations/legacy/v10_to_v11.py
similarity index 100%
rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v10_to_v11.py
rename to aiida/storage/sqlite_zip/migrations/legacy/v10_to_v11.py
diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy/v11_to_v12.py b/aiida/storage/sqlite_zip/migrations/legacy/v11_to_v12.py
similarity index 100%
rename from aiida/tools/archive/implementations/sqlite/migrations/legacy/v11_to_v12.py
rename to aiida/storage/sqlite_zip/migrations/legacy/v11_to_v12.py
diff --git a/aiida/tools/archive/implementations/sqlite/migrations/legacy_to_new.py b/aiida/storage/sqlite_zip/migrations/legacy_to_main.py
similarity index 62%
rename from aiida/tools/archive/implementations/sqlite/migrations/legacy_to_new.py
rename to aiida/storage/sqlite_zip/migrations/legacy_to_main.py
index c770e9f233..27566bccc1 100644
--- a/aiida/tools/archive/implementations/sqlite/migrations/legacy_to_new.py
+++ b/aiida/storage/sqlite_zip/migrations/legacy_to_main.py
@@ -7,11 +7,10 @@
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
-"""Migration from legacy JSON format."""
+"""Migration from the "legacy" JSON format, to an sqlite database, and node uuid based repository to hash based."""
from contextlib import contextmanager
from datetime import datetime
from hashlib import sha256
-import json
from pathlib import Path, PurePosixPath
import shutil
import tarfile
@@ -21,14 +20,14 @@
from sqlalchemy import insert, select
from sqlalchemy.exc import IntegrityError
+from aiida.common.exceptions import CorruptStorage, StorageMigrationError
from aiida.common.hashing import chunked_file_hash
from aiida.common.progress_reporter import get_progress_reporter
from aiida.repository.common import File, FileType
-from aiida.tools.archive.common import MIGRATE_LOGGER, batch_iter
-from aiida.tools.archive.exceptions import CorruptArchive, MigrationValidationError
+from aiida.storage.log import MIGRATE_LOGGER
-from . import v1_db_schema as db
-from ..common import DB_FILENAME, META_FILENAME, REPO_FOLDER, create_sqla_engine
+from . import v1_db_schema as v1_schema
+from ..utils import DB_FILENAME, REPO_FOLDER, create_sqla_engine
from .utils import update_metadata
_NODE_ENTITY_NAME = 'Node'
@@ -57,18 +56,26 @@
}
aiida_orm_to_backend = {
- _USER_ENTITY_NAME: db.DbUser,
- _GROUP_ENTITY_NAME: db.DbGroup,
- _NODE_ENTITY_NAME: db.DbNode,
- _COMMENT_ENTITY_NAME: db.DbComment,
- _COMPUTER_ENTITY_NAME: db.DbComputer,
- _LOG_ENTITY_NAME: db.DbLog,
+ _USER_ENTITY_NAME: v1_schema.DbUser,
+ _GROUP_ENTITY_NAME: v1_schema.DbGroup,
+ _NODE_ENTITY_NAME: v1_schema.DbNode,
+ _COMMENT_ENTITY_NAME: v1_schema.DbComment,
+ _COMPUTER_ENTITY_NAME: v1_schema.DbComputer,
+ _LOG_ENTITY_NAME: v1_schema.DbLog,
}
+LEGACY_TO_MAIN_REVISION = 'main_0000'
+
def perform_v1_migration( # pylint: disable=too-many-locals
- inpath: Path, working: Path, archive_name: str, is_tar: bool, metadata: dict, data: dict, compression: int
-) -> str:
+ inpath: Path,
+ working: Path,
+ new_zip: ZipPath,
+ central_dir: Dict[str, Any],
+ is_tar: bool,
+ metadata: dict,
+ data: dict,
+) -> Path:
"""Perform the repository and JSON to SQLite migration.
1. Iterate though the repository paths in the archive
@@ -78,10 +85,11 @@ def perform_v1_migration( # pylint: disable=too-many-locals
:param inpath: the input path to the old archive
:param metadata: the metadata to migrate
:param data: the data to migrate
+
+ :returns:the path to the sqlite database file
"""
MIGRATE_LOGGER.report('Initialising new archive...')
node_repos: Dict[str, List[Tuple[str, Optional[str]]]] = {}
- central_dir: Dict[str, Any] = {}
if is_tar:
# we cannot stream from a tar file performantly, so we extract it to disk first
@contextmanager
@@ -95,65 +103,56 @@ def in_archive_context(_inpath):
shutil.rmtree(temp_folder)
else:
in_archive_context = ZipPath # type: ignore
- with ZipPath(
- working / archive_name,
- mode='w',
- compresslevel=compression,
- name_to_info=central_dir,
- info_order=(META_FILENAME, DB_FILENAME)
- ) as new_path:
- with in_archive_context(inpath) as path:
- length = sum(1 for _ in path.glob('**/*'))
- base_parts = len(path.parts)
- with get_progress_reporter()(desc='Converting repo', total=length) as progress:
- for subpath in path.glob('**/*'):
- progress.update()
- parts = subpath.parts[base_parts:]
- # repository file are stored in the legacy archive as `nodes/uuid[0:2]/uuid[2:4]/uuid[4:]/path/...`
- if len(parts) < 6 or parts[0] != 'nodes' or parts[4] not in ('raw_input', 'path'):
- continue
- uuid = ''.join(parts[1:4])
- posix_rel = PurePosixPath(*parts[5:])
- hashkey = None
- if subpath.is_file():
+
+ with in_archive_context(inpath) as path:
+ length = sum(1 for _ in path.glob('**/*'))
+ base_parts = len(path.parts)
+ with get_progress_reporter()(desc='Converting repo', total=length) as progress:
+ for subpath in path.glob('**/*'):
+ progress.update()
+ parts = subpath.parts[base_parts:]
+ # repository file are stored in the legacy archive as `nodes/uuid[0:2]/uuid[2:4]/uuid[4:]/path/...`
+ if len(parts) < 6 or parts[0] != 'nodes' or parts[4] not in ('raw_input', 'path'):
+ continue
+ uuid = ''.join(parts[1:4])
+ posix_rel = PurePosixPath(*parts[5:])
+ hashkey = None
+ if subpath.is_file():
+ with subpath.open('rb') as handle:
+ hashkey = chunked_file_hash(handle, sha256)
+ if f'{REPO_FOLDER}/{hashkey}' not in central_dir:
with subpath.open('rb') as handle:
- hashkey = chunked_file_hash(handle, sha256)
- if f'{REPO_FOLDER}/{hashkey}' not in central_dir:
- with subpath.open('rb') as handle:
- with (new_path / f'{REPO_FOLDER}/{hashkey}').open(mode='wb') as handle2:
- shutil.copyfileobj(handle, handle2)
- node_repos.setdefault(uuid, []).append((posix_rel.as_posix(), hashkey))
- MIGRATE_LOGGER.report(f'Unique files written: {len(central_dir)}')
-
- _json_to_sqlite(working / DB_FILENAME, data, node_repos)
-
- MIGRATE_LOGGER.report('Finalising archive')
- with (working / DB_FILENAME).open('rb') as handle:
- with (new_path / DB_FILENAME).open(mode='wb') as handle2:
- shutil.copyfileobj(handle, handle2)
-
- # remove legacy keys from metadata and store
- metadata.pop('unique_identifiers', None)
- metadata.pop('all_fields_info', None)
- # remove legacy key nesting
- metadata['creation_parameters'] = metadata.pop('export_parameters', {})
- metadata['compression'] = compression
- metadata['key_format'] = 'sha256'
- metadata['mtime'] = datetime.now().isoformat()
- update_metadata(metadata, '1.0')
- (new_path / META_FILENAME).write_text(json.dumps(metadata))
-
- return '1.0'
-
-
-def _json_to_sqlite(
+ with (new_zip / f'{REPO_FOLDER}/{hashkey}').open(mode='wb') as handle2:
+ shutil.copyfileobj(handle, handle2)
+ node_repos.setdefault(uuid, []).append((posix_rel.as_posix(), hashkey))
+ MIGRATE_LOGGER.report(f'Unique repository files written: {len(central_dir)}')
+
+ # convert the JSON database to SQLite
+ _json_to_sqlite(working / DB_FILENAME, data, node_repos)
+
+ # remove legacy keys from metadata and store
+ metadata.pop('unique_identifiers', None)
+ metadata.pop('all_fields_info', None)
+ # remove legacy key nesting
+ metadata['creation_parameters'] = metadata.pop('export_parameters', {})
+ metadata['key_format'] = 'sha256'
+
+ # update the version in the metadata
+ update_metadata(metadata, LEGACY_TO_MAIN_REVISION)
+
+ return working / DB_FILENAME
+
+
+def _json_to_sqlite( # pylint: disable=too-many-branches,too-many-locals
outpath: Path, data: dict, node_repos: Dict[str, List[Tuple[str, Optional[str]]]], batch_size: int = 100
) -> None:
"""Convert a JSON archive format to SQLite."""
+ from aiida.tools.archive.common import batch_iter
+
MIGRATE_LOGGER.report('Converting DB to SQLite')
engine = create_sqla_engine(outpath)
- db.ArchiveV1Base.metadata.create_all(engine)
+ v1_schema.ArchiveV1Base.metadata.create_all(engine)
with engine.begin() as connection:
# proceed in order of relationships
@@ -168,10 +167,11 @@ def _json_to_sqlite(
with get_progress_reporter()(desc=f'Adding {entity_type}s', total=length) as progress:
for nrows, rows in batch_iter(_iter_entity_fields(data, entity_type, node_repos), batch_size):
# to-do check for unused keys?
+ # to-do handle null values?
try:
connection.execute(insert(backend_cls.__table__), rows) # type: ignore
except IntegrityError as exc:
- raise MigrationValidationError(f'Database integrity error: {exc}') from exc
+ raise StorageMigrationError(f'Database integrity error: {exc}') from exc
progress.update(nrows)
if not (data['groups_uuid'] or data['links_uuid']):
@@ -180,43 +180,59 @@ def _json_to_sqlite(
with engine.begin() as connection:
# get mapping of node IDs to node UUIDs
- node_uuid_map = {uuid: pk for uuid, pk in connection.execute(select(db.DbNode.uuid, db.DbNode.id))} # pylint: disable=unnecessary-comprehension
+ node_uuid_map = {
+ uuid: pk for uuid, pk in connection.execute(select(v1_schema.DbNode.uuid, v1_schema.DbNode.id)) # pylint: disable=unnecessary-comprehension
+ }
# links
if data['links_uuid']:
def _transform_link(link_row):
+ try:
+ input_id = node_uuid_map[link_row['input']]
+ except KeyError:
+ raise StorageMigrationError(f'Database contains link with unknown input node: {link_row}')
+ try:
+ output_id = node_uuid_map[link_row['output']]
+ except KeyError:
+ raise StorageMigrationError(f'Database contains link with unknown output node: {link_row}')
return {
- 'input_id': node_uuid_map[link_row['input']],
- 'output_id': node_uuid_map[link_row['output']],
+ 'input_id': input_id,
+ 'output_id': output_id,
'label': link_row['label'],
'type': link_row['type']
}
with get_progress_reporter()(desc='Adding Links', total=len(data['links_uuid'])) as progress:
for nrows, rows in batch_iter(data['links_uuid'], batch_size, transform=_transform_link):
- connection.execute(insert(db.DbLink.__table__), rows)
+ connection.execute(insert(v1_schema.DbLink.__table__), rows)
progress.update(nrows)
# groups to nodes
if data['groups_uuid']:
# get mapping of node IDs to node UUIDs
- group_uuid_map = {uuid: pk for uuid, pk in connection.execute(select(db.DbGroup.uuid, db.DbGroup.id))} # pylint: disable=unnecessary-comprehension
+ group_uuid_map = {
+ uuid: pk for uuid, pk in connection.execute(select(v1_schema.DbGroup.uuid, v1_schema.DbGroup.id)) # pylint: disable=unnecessary-comprehension
+ }
length = sum(len(uuids) for uuids in data['groups_uuid'].values())
+ unknown_nodes: Dict[str, set] = {}
with get_progress_reporter()(desc='Adding Group-Nodes', total=length) as progress:
for group_uuid, node_uuids in data['groups_uuid'].items():
group_id = group_uuid_map[group_uuid]
- connection.execute(
- insert(db.DbGroupNodes.__table__), [{
- 'dbnode_id': node_uuid_map[uuid],
- 'dbgroup_id': group_id
- } for uuid in node_uuids]
- )
+ rows = []
+ for uuid in node_uuids:
+ if uuid in node_uuid_map:
+ rows.append({'dbnode_id': node_uuid_map[uuid], 'dbgroup_id': group_id})
+ else:
+ unknown_nodes.setdefault(group_uuid, set()).add(uuid)
+ connection.execute(insert(v1_schema.DbGroupNodes.__table__), rows)
progress.update(len(node_uuids))
+ if unknown_nodes:
+ MIGRATE_LOGGER.warning(f'Dropped unknown nodes in groups: {unknown_nodes}')
def _convert_datetime(key, value):
- if key in ('time', 'ctime', 'mtime'):
+ if key in ('time', 'ctime', 'mtime') and value is not None:
return datetime.strptime(value, '%Y-%m-%dT%H:%M:%S.%f')
return value
@@ -234,9 +250,9 @@ def _iter_entity_fields(
extras = data.get('node_extras', {})
for pk, all_fields in data['export_data'].get(name, {}).items():
if pk not in attributes:
- raise CorruptArchive(f'Unable to find attributes info for Node with Pk={pk}')
+ raise CorruptStorage(f'Unable to find attributes info for Node with Pk={pk}')
if pk not in extras:
- raise CorruptArchive(f'Unable to find extra info for Node with Pk={pk}')
+ raise CorruptStorage(f'Unable to find extra info for Node with Pk={pk}')
uuid = all_fields['uuid']
repository_metadata = _create_repo_metadata(node_repos[uuid]) if uuid in node_repos else {}
yield {
diff --git a/aiida/storage/sqlite_zip/migrations/script.py.mako b/aiida/storage/sqlite_zip/migrations/script.py.mako
new file mode 100644
index 0000000000..b0e41c2687
--- /dev/null
+++ b/aiida/storage/sqlite_zip/migrations/script.py.mako
@@ -0,0 +1,26 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision = ${repr(up_revision)}
+down_revision = ${repr(down_revision)}
+branch_labels = ${repr(branch_labels)}
+depends_on = ${repr(depends_on)}
+
+
+def upgrade():
+ """Migrations for the upgrade."""
+ ${upgrades if upgrades else "pass"}
+
+
+def downgrade():
+ """Migrations for the downgrade."""
+ ${downgrades if downgrades else "pass"}
diff --git a/aiida/tools/archive/implementations/sqlite/common.py b/aiida/storage/sqlite_zip/migrations/utils.py
similarity index 79%
rename from aiida/tools/archive/implementations/sqlite/common.py
rename to aiida/storage/sqlite_zip/migrations/utils.py
index a375cf7c26..dfd72ec6ca 100644
--- a/aiida/tools/archive/implementations/sqlite/common.py
+++ b/aiida/storage/sqlite_zip/migrations/utils.py
@@ -12,41 +12,55 @@
from pathlib import Path
import shutil
import tempfile
-from typing import Callable, Sequence, Union
+from typing import Callable, Sequence
from archive_path import TarPath, ZipPath
-from sqlalchemy import event
-from sqlalchemy.future.engine import Engine, create_engine
-from aiida.common import json
+from aiida.common import exceptions
from aiida.common.progress_reporter import create_callback, get_progress_reporter
-META_FILENAME = 'metadata.json'
-DB_FILENAME = 'db.sqlite3'
-# folder to store repository files in
-REPO_FOLDER = 'repo'
-
-
-def sqlite_enforce_foreign_keys(dbapi_connection, _):
- """Enforce foreign key constraints, when using sqlite backend (off by default)"""
- cursor = dbapi_connection.cursor()
- cursor.execute('PRAGMA foreign_keys=ON;')
- cursor.close()
-
-
-def create_sqla_engine(path: Union[str, Path], *, enforce_foreign_keys: bool = True, **kwargs) -> Engine:
- """Create a new engine instance."""
- engine = create_engine(
- f'sqlite:///{path}',
- json_serializer=json.dumps,
- json_deserializer=json.loads,
- encoding='utf-8',
- future=True,
- **kwargs
- )
- if enforce_foreign_keys:
- event.listen(engine, 'connect', sqlite_enforce_foreign_keys)
- return engine
+
+def update_metadata(metadata, version):
+ """Update the metadata with a new version number and a notification of the conversion that was executed.
+
+ :param metadata: the content of an export archive metadata.json file
+ :param version: string version number that the updated metadata should get
+ """
+ from aiida import get_version
+
+ old_version = metadata['export_version']
+ conversion_info = metadata.get('conversion_info', [])
+
+ conversion_message = f'Converted from version {old_version} to {version} with AiiDA v{get_version()}'
+ conversion_info.append(conversion_message)
+
+ metadata['aiida_version'] = get_version()
+ metadata['export_version'] = version
+ metadata['conversion_info'] = conversion_info
+
+
+def verify_metadata_version(metadata, version=None):
+ """Utility function to verify that the metadata has the correct version number.
+
+ If no version number is passed, it will just extract the version number and return it.
+
+ :param metadata: the content of an export archive metadata.json file
+ :param version: string version number that the metadata is expected to have
+ """
+ try:
+ metadata_version = metadata['export_version']
+ except KeyError:
+ raise exceptions.StorageMigrationError("metadata is missing the 'export_version' key")
+
+ if version is None:
+ return metadata_version
+
+ if metadata_version != version:
+ raise exceptions.StorageMigrationError(
+ f'expected archive file with version {version} but found version {metadata_version}'
+ )
+
+ return None
def copy_zip_to_zip(
diff --git a/aiida/storage/sqlite_zip/migrations/v1_db_schema.py b/aiida/storage/sqlite_zip/migrations/v1_db_schema.py
new file mode 100644
index 0000000000..bad4f14ac0
--- /dev/null
+++ b/aiida/storage/sqlite_zip/migrations/v1_db_schema.py
@@ -0,0 +1,216 @@
+# -*- coding: utf-8 -*-
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved. #
+# This file is part of the AiiDA code. #
+# #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file #
+# For further information please visit http://www.aiida.net #
+###########################################################################
+"""This is the sqlite DB schema, coresponding to the `main_0000` revision of the `sqlite_zip` backend,
+see: `versions/main_0000_initial.py`
+
+For normal operation of the archive,
+we auto-generate the schema from the models in ``aiida.storage.psql_dos.models``.
+However, when migrating an archive from the old format, we require a fixed revision of the schema.
+
+The only difference between the PostGreSQL schema and SQLite one,
+is the replacement of ``JSONB`` with ``JSON``, and ``UUID`` with ``CHAR(32)``.
+"""
+from sqlalchemy import ForeignKey, MetaData, orm
+from sqlalchemy.dialects.sqlite import JSON
+from sqlalchemy.schema import Column, UniqueConstraint
+from sqlalchemy.types import CHAR, Boolean, DateTime, Integer, String, Text
+
+from aiida.common import timezone
+from aiida.common.utils import get_new_uuid
+
+# see https://alembic.sqlalchemy.org/en/latest/naming.html
+naming_convention = (
+ ('pk', '%(table_name)s_pkey'),
+ ('ix', 'ix_%(table_name)s_%(column_0_N_label)s'),
+ ('uq', 'uq_%(table_name)s_%(column_0_N_name)s'),
+ ('ck', 'ck_%(table_name)s_%(constraint_name)s'),
+ ('fk', 'fk_%(table_name)s_%(column_0_N_name)s_%(referred_table_name)s'),
+)
+
+ArchiveV1Base = orm.declarative_base(metadata=MetaData(naming_convention=dict(naming_convention)))
+
+
+class DbAuthInfo(ArchiveV1Base):
+ """Class that keeps the authentication data."""
+
+ __tablename__ = 'db_dbauthinfo'
+ __table_args__ = (UniqueConstraint('aiidauser_id', 'dbcomputer_id'),)
+
+ id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
+ aiidauser_id = Column(
+ Integer,
+ ForeignKey('db_dbuser.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'),
+ nullable=True,
+ index=True
+ )
+ dbcomputer_id = Column(
+ Integer,
+ ForeignKey('db_dbcomputer.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'),
+ nullable=True,
+ index=True
+ )
+ _metadata = Column('metadata', JSON, default=dict, nullable=True)
+ auth_params = Column(JSON, default=dict, nullable=True)
+ enabled = Column(Boolean, default=True, nullable=True)
+
+
+class DbComment(ArchiveV1Base):
+ """Class to store comments."""
+
+ __tablename__ = 'db_dbcomment'
+
+ id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
+ uuid = Column(CHAR(32), default=get_new_uuid, nullable=False, unique=True)
+ dbnode_id = Column(
+ Integer,
+ ForeignKey('db_dbnode.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'),
+ nullable=True,
+ index=True
+ )
+ ctime = Column(DateTime(timezone=True), default=timezone.now, nullable=True)
+ mtime = Column(DateTime(timezone=True), default=timezone.now, nullable=True)
+ user_id = Column(
+ Integer,
+ ForeignKey('db_dbuser.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'),
+ nullable=True,
+ index=True
+ )
+ content = Column(Text, default='', nullable=True)
+
+
+class DbComputer(ArchiveV1Base):
+ """Class to store computers."""
+ __tablename__ = 'db_dbcomputer'
+
+ id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
+ uuid = Column(CHAR(32), default=get_new_uuid, nullable=False, unique=True)
+ label = Column(String(255), unique=True, nullable=False)
+ hostname = Column(String(255), default='', nullable=True)
+ description = Column(Text, default='', nullable=True)
+ scheduler_type = Column(String(255), default='', nullable=True)
+ transport_type = Column(String(255), default='', nullable=True)
+ _metadata = Column('metadata', JSON, default=dict, nullable=True)
+
+
+class DbGroupNodes(ArchiveV1Base):
+ """Class to store join table for group -> nodes."""
+
+ __tablename__ = 'db_dbgroup_dbnodes'
+ __table_args__ = (UniqueConstraint('dbgroup_id', 'dbnode_id'),)
+
+ id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
+ dbnode_id = Column(
+ Integer, ForeignKey('db_dbnode.id', deferrable=True, initially='DEFERRED'), nullable=False, index=True
+ )
+ dbgroup_id = Column(
+ Integer, ForeignKey('db_dbgroup.id', deferrable=True, initially='DEFERRED'), nullable=False, index=True
+ )
+
+
+class DbGroup(ArchiveV1Base):
+ """Class to store groups."""
+
+ __tablename__ = 'db_dbgroup'
+ __table_args__ = (UniqueConstraint('label', 'type_string'),)
+
+ id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
+ uuid = Column(CHAR(32), default=get_new_uuid, nullable=False, unique=True)
+ label = Column(String(255), nullable=False, index=True)
+ type_string = Column(String(255), default='', nullable=True, index=True)
+ time = Column(DateTime(timezone=True), default=timezone.now, nullable=True)
+ description = Column(Text, default='', nullable=True)
+ extras = Column(JSON, default=dict, nullable=False)
+ user_id = Column(
+ Integer,
+ ForeignKey('db_dbuser.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'),
+ nullable=False,
+ index=True
+ )
+
+
+class DbLog(ArchiveV1Base):
+ """Class to store logs."""
+
+ __tablename__ = 'db_dblog'
+
+ id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
+ uuid = Column(CHAR(32), default=get_new_uuid, nullable=False, unique=True)
+ time = Column(DateTime(timezone=True), default=timezone.now, nullable=True)
+ loggername = Column(String(255), default='', nullable=True, index=True)
+ levelname = Column(String(50), default='', nullable=True, index=True)
+ dbnode_id = Column(
+ Integer,
+ ForeignKey('db_dbnode.id', deferrable=True, initially='DEFERRED', ondelete='CASCADE'),
+ nullable=False,
+ index=True
+ )
+ message = Column(Text(), default='', nullable=True)
+ _metadata = Column('metadata', JSON, default=dict, nullable=True)
+
+
+class DbNode(ArchiveV1Base):
+ """Class to store nodes."""
+
+ __tablename__ = 'db_dbnode'
+
+ id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
+ uuid = Column(CHAR(32), default=get_new_uuid, nullable=False, unique=True)
+ node_type = Column(String(255), default='', nullable=False, index=True)
+ process_type = Column(String(255), index=True)
+ label = Column(String(255), default='', index=True, nullable=True)
+ description = Column(Text(), default='', nullable=True)
+ ctime = Column(DateTime(timezone=True), default=timezone.now, nullable=True, index=True)
+ mtime = Column(DateTime(timezone=True), default=timezone.now, nullable=True, index=True)
+ attributes = Column(JSON)
+ extras = Column(JSON)
+ repository_metadata = Column(JSON, nullable=False, default=dict, server_default='{}')
+ dbcomputer_id = Column(
+ Integer,
+ ForeignKey('db_dbcomputer.id', deferrable=True, initially='DEFERRED', ondelete='RESTRICT'),
+ nullable=True,
+ index=True
+ )
+ user_id = Column(
+ Integer,
+ ForeignKey('db_dbuser.id', deferrable=True, initially='DEFERRED', ondelete='restrict'),
+ nullable=False,
+ index=True
+ )
+
+
+class DbLink(ArchiveV1Base):
+ """Class to store links between nodes."""
+
+ __tablename__ = 'db_dblink'
+
+ id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
+ input_id = Column(
+ Integer, ForeignKey('db_dbnode.id', deferrable=True, initially='DEFERRED'), nullable=False, index=True
+ )
+ output_id = Column(
+ Integer,
+ ForeignKey('db_dbnode.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'),
+ nullable=False,
+ index=True
+ )
+ label = Column(String(255), default='', nullable=False, index=True)
+ type = Column(String(255), nullable=False, index=True)
+
+
+class DbUser(ArchiveV1Base):
+ """Class to store users."""
+
+ __tablename__ = 'db_dbuser'
+
+ id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
+ email = Column(String(254), nullable=False, unique=True)
+ first_name = Column(String(254), default='', nullable=True)
+ last_name = Column(String(254), default='', nullable=True)
+ institution = Column(String(254), default='', nullable=True)
diff --git a/aiida/storage/sqlite_zip/migrations/versions/__init__.py b/aiida/storage/sqlite_zip/migrations/versions/__init__.py
new file mode 100644
index 0000000000..2776a55f97
--- /dev/null
+++ b/aiida/storage/sqlite_zip/migrations/versions/__init__.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8 -*-
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved. #
+# This file is part of the AiiDA code. #
+# #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file #
+# For further information please visit http://www.aiida.net #
+###########################################################################
diff --git a/aiida/storage/sqlite_zip/migrations/versions/main_0000_initial.py b/aiida/storage/sqlite_zip/migrations/versions/main_0000_initial.py
new file mode 100644
index 0000000000..d45772daaa
--- /dev/null
+++ b/aiida/storage/sqlite_zip/migrations/versions/main_0000_initial.py
@@ -0,0 +1,204 @@
+# -*- coding: utf-8 -*-
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved. #
+# This file is part of the AiiDA code. #
+# #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file #
+# For further information please visit http://www.aiida.net #
+###########################################################################
+# pylint: disable=invalid-name,no-member
+"""Initial main branch schema
+
+This schema is mainly equivalent to the `main_0001` schema of the `psql_dos` backend.
+The difference are:
+
+1. Data types: the replacement of ``JSONB`` with ``JSON``, and ``UUID`` with ``CHAR(32)``.
+2. Some more fields are nullable, to allow migrations from legacy to main.
+ The nullable fields are then filled with default values, and set to non-nullable, in subsequent migrations.
+
+Revision ID: main_0000
+Revises:
+Create Date: 2021-02-02
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects.sqlite import JSON
+
+revision = 'main_0000'
+down_revision = None
+branch_labels = ('main',)
+depends_on = None
+
+
+def upgrade():
+ """Migrations for the upgrade."""
+ op.create_table(
+ 'db_dbcomputer',
+ sa.Column('id', sa.Integer(), nullable=False, primary_key=True),
+ sa.Column('uuid', sa.CHAR(32), nullable=False, unique=True),
+ sa.Column('label', sa.String(length=255), nullable=False, unique=True),
+ sa.Column('hostname', sa.String(length=255), nullable=False),
+ sa.Column('description', sa.Text(), nullable=False),
+ sa.Column('scheduler_type', sa.String(length=255), nullable=False),
+ sa.Column('transport_type', sa.String(length=255), nullable=False),
+ sa.Column('metadata', JSON(), nullable=False),
+ )
+ op.create_table(
+ 'db_dbuser',
+ sa.Column('id', sa.Integer(), nullable=False, primary_key=True),
+ sa.Column('email', sa.String(length=254), nullable=False, unique=True),
+ sa.Column('first_name', sa.String(length=254), nullable=False),
+ sa.Column('last_name', sa.String(length=254), nullable=False),
+ sa.Column('institution', sa.String(length=254), nullable=False),
+ )
+ op.create_table(
+ 'db_dbauthinfo',
+ sa.Column('id', sa.Integer(), nullable=False, primary_key=True),
+ sa.Column('aiidauser_id', sa.Integer(), nullable=False, index=True),
+ sa.Column('dbcomputer_id', sa.Integer(), nullable=False, index=True),
+ sa.Column('metadata', JSON(), nullable=False),
+ sa.Column('auth_params', JSON(), nullable=False),
+ sa.Column('enabled', sa.Boolean(), nullable=False),
+ sa.ForeignKeyConstraint(
+ ['aiidauser_id'],
+ ['db_dbuser.id'],
+ ondelete='CASCADE',
+ initially='DEFERRED',
+ deferrable=True,
+ ),
+ sa.ForeignKeyConstraint(
+ ['dbcomputer_id'],
+ ['db_dbcomputer.id'],
+ ondelete='CASCADE',
+ initially='DEFERRED',
+ deferrable=True,
+ ),
+ sa.UniqueConstraint('aiidauser_id', 'dbcomputer_id'),
+ )
+ op.create_table(
+ 'db_dbgroup',
+ sa.Column('id', sa.Integer(), nullable=False, primary_key=True),
+ sa.Column('uuid', sa.CHAR(32), nullable=False, unique=True),
+ sa.Column('label', sa.String(length=255), nullable=False, index=True),
+ sa.Column('type_string', sa.String(length=255), nullable=False, index=True),
+ sa.Column('time', sa.DateTime(timezone=True), nullable=False),
+ sa.Column('description', sa.Text(), nullable=False),
+ sa.Column('extras', JSON(), nullable=False),
+ sa.Column('user_id', sa.Integer(), nullable=False, index=True),
+ sa.ForeignKeyConstraint(
+ ['user_id'],
+ ['db_dbuser.id'],
+ ondelete='CASCADE',
+ initially='DEFERRED',
+ deferrable=True,
+ ),
+ sa.UniqueConstraint('label', 'type_string'),
+ )
+
+ op.create_table(
+ 'db_dbnode',
+ sa.Column('id', sa.Integer(), nullable=False, primary_key=True),
+ sa.Column('uuid', sa.CHAR(32), nullable=False, unique=True),
+ sa.Column('node_type', sa.String(length=255), nullable=False, index=True),
+ sa.Column('process_type', sa.String(length=255), nullable=True, index=True),
+ sa.Column('label', sa.String(length=255), nullable=False, index=True),
+ sa.Column('description', sa.Text(), nullable=False),
+ sa.Column('ctime', sa.DateTime(timezone=True), nullable=False, index=True),
+ sa.Column('mtime', sa.DateTime(timezone=True), nullable=False, index=True),
+ sa.Column('attributes', JSON(), nullable=True),
+ sa.Column('extras', JSON(), nullable=True),
+ sa.Column('repository_metadata', JSON(), nullable=False),
+ sa.Column('dbcomputer_id', sa.Integer(), nullable=True, index=True),
+ sa.Column('user_id', sa.Integer(), nullable=False, index=True),
+ sa.ForeignKeyConstraint(
+ ['dbcomputer_id'],
+ ['db_dbcomputer.id'],
+ ondelete='RESTRICT',
+ initially='DEFERRED',
+ deferrable=True,
+ ),
+ sa.ForeignKeyConstraint(
+ ['user_id'],
+ ['db_dbuser.id'],
+ ondelete='restrict',
+ initially='DEFERRED',
+ deferrable=True,
+ ),
+ )
+
+ op.create_table(
+ 'db_dbcomment',
+ sa.Column('id', sa.Integer(), nullable=False, primary_key=True),
+ sa.Column('uuid', sa.CHAR(32), nullable=False, unique=True),
+ sa.Column('dbnode_id', sa.Integer(), nullable=False, index=True),
+ sa.Column('ctime', sa.DateTime(timezone=True), nullable=False),
+ sa.Column('mtime', sa.DateTime(timezone=True), nullable=False),
+ sa.Column('user_id', sa.Integer(), nullable=False, index=True),
+ sa.Column('content', sa.Text(), nullable=False),
+ sa.ForeignKeyConstraint(
+ ['dbnode_id'],
+ ['db_dbnode.id'],
+ ondelete='CASCADE',
+ initially='DEFERRED',
+ deferrable=True,
+ ),
+ sa.ForeignKeyConstraint(
+ ['user_id'],
+ ['db_dbuser.id'],
+ ondelete='CASCADE',
+ initially='DEFERRED',
+ deferrable=True,
+ ),
+ )
+
+ op.create_table(
+ 'db_dbgroup_dbnodes',
+ sa.Column('id', sa.Integer(), nullable=False, primary_key=True),
+ sa.Column('dbnode_id', sa.Integer(), nullable=False, index=True),
+ sa.Column('dbgroup_id', sa.Integer(), nullable=False, index=True),
+ sa.ForeignKeyConstraint(['dbgroup_id'], ['db_dbgroup.id'], initially='DEFERRED', deferrable=True),
+ sa.ForeignKeyConstraint(['dbnode_id'], ['db_dbnode.id'], initially='DEFERRED', deferrable=True),
+ sa.UniqueConstraint('dbgroup_id', 'dbnode_id'),
+ )
+ op.create_table(
+ 'db_dblink',
+ sa.Column('id', sa.Integer(), nullable=False, primary_key=True),
+ sa.Column('input_id', sa.Integer(), nullable=False, index=True),
+ sa.Column('output_id', sa.Integer(), nullable=False, index=True),
+ sa.Column('label', sa.String(length=255), nullable=False, index=True),
+ sa.Column('type', sa.String(length=255), nullable=False, index=True),
+ sa.ForeignKeyConstraint(['input_id'], ['db_dbnode.id'], initially='DEFERRED', deferrable=True),
+ sa.ForeignKeyConstraint(
+ ['output_id'],
+ ['db_dbnode.id'],
+ ondelete='CASCADE',
+ initially='DEFERRED',
+ deferrable=True,
+ ),
+ )
+
+ op.create_table(
+ 'db_dblog',
+ sa.Column('id', sa.Integer(), nullable=False, primary_key=True),
+ sa.Column('uuid', sa.CHAR(32), nullable=False, unique=True),
+ sa.Column('time', sa.DateTime(timezone=True), nullable=False),
+ sa.Column('loggername', sa.String(length=255), nullable=False, index=True),
+ sa.Column('levelname', sa.String(length=50), nullable=False, index=True),
+ sa.Column('dbnode_id', sa.Integer(), nullable=False, index=True),
+ sa.Column('message', sa.Text(), nullable=False),
+ sa.Column('metadata', JSON(), nullable=False),
+ sa.ForeignKeyConstraint(
+ ['dbnode_id'],
+ ['db_dbnode.id'],
+ ondelete='CASCADE',
+ initially='DEFERRED',
+ deferrable=True,
+ ),
+ )
+
+
+def downgrade():
+ """Migrations for the downgrade."""
+ raise NotImplementedError('Downgrade of main_0000.')
diff --git a/aiida/storage/sqlite_zip/migrations/versions/main_0000a_replace_nulls.py b/aiida/storage/sqlite_zip/migrations/versions/main_0000a_replace_nulls.py
new file mode 100644
index 0000000000..7d5fa87463
--- /dev/null
+++ b/aiida/storage/sqlite_zip/migrations/versions/main_0000a_replace_nulls.py
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved. #
+# This file is part of the AiiDA code. #
+# #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file #
+# For further information please visit http://www.aiida.net #
+###########################################################################
+# pylint: disable=invalid-name,no-member
+"""Replace null values with defaults
+
+Revision ID: main_0000a
+Revises: main_0000
+Create Date: 2022-03-04
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+from aiida.common import timezone
+
+# revision identifiers, used by Alembic.
+revision = 'main_0000a'
+down_revision = 'main_0000'
+branch_labels = None
+depends_on = None
+
+
+def upgrade(): # pylint: disable=too-many-statements
+ """Convert null values to default values.
+
+ This migration is performed in preparation for the next migration,
+ which will make these fields non-nullable.
+ """
+ db_dbauthinfo = sa.sql.table(
+ 'db_dbauthinfo',
+ sa.sql.column('aiidauser_id', sa.Integer),
+ sa.sql.column('dbcomputer_id', sa.Integer),
+ sa.Column('enabled', sa.Boolean),
+ sa.Column('auth_params', sa.JSON),
+ sa.Column('metadata', sa.JSON()),
+ )
+
+ # remove rows with null values, which may have previously resulted from deletion of a user or computer
+ op.execute(db_dbauthinfo.delete().where(db_dbauthinfo.c.aiidauser_id.is_(None))) # type: ignore[arg-type]
+ op.execute(db_dbauthinfo.delete().where(db_dbauthinfo.c.dbcomputer_id.is_(None))) # type: ignore[arg-type]
+
+ op.execute(db_dbauthinfo.update().where(db_dbauthinfo.c.enabled.is_(None)).values(enabled=True))
+ op.execute(db_dbauthinfo.update().where(db_dbauthinfo.c.auth_params.is_(None)).values(auth_params={}))
+ op.execute(db_dbauthinfo.update().where(db_dbauthinfo.c.metadata.is_(None)).values(metadata={}))
+
+ db_dbcomment = sa.sql.table(
+ 'db_dbcomment',
+ sa.sql.column('dbnode_id', sa.Integer),
+ sa.sql.column('user_id', sa.Integer),
+ sa.Column('content', sa.Text),
+ sa.Column('ctime', sa.DateTime(timezone=True)),
+ sa.Column('mtime', sa.DateTime(timezone=True)),
+ sa.Column('uuid', sa.CHAR(32)),
+ )
+
+ # remove rows with null values, which may have previously resulted from deletion of a node or user
+ op.execute(db_dbcomment.delete().where(db_dbcomment.c.dbnode_id.is_(None))) # type: ignore[arg-type]
+ op.execute(db_dbcomment.delete().where(db_dbcomment.c.user_id.is_(None))) # type: ignore[arg-type]
+
+ op.execute(db_dbcomment.update().where(db_dbcomment.c.content.is_(None)).values(content=''))
+ op.execute(db_dbcomment.update().where(db_dbcomment.c.ctime.is_(None)).values(ctime=timezone.now()))
+ op.execute(db_dbcomment.update().where(db_dbcomment.c.mtime.is_(None)).values(mtime=timezone.now()))
+
+ db_dbcomputer = sa.sql.table(
+ 'db_dbcomputer',
+ sa.Column('description', sa.Text),
+ sa.Column('hostname', sa.String(255)),
+ sa.Column('metadata', sa.JSON()),
+ sa.Column('scheduler_type', sa.String(255)),
+ sa.Column('transport_type', sa.String(255)),
+ sa.Column('uuid', sa.CHAR(32)),
+ )
+
+ op.execute(db_dbcomputer.update().where(db_dbcomputer.c.description.is_(None)).values(description=''))
+ op.execute(db_dbcomputer.update().where(db_dbcomputer.c.hostname.is_(None)).values(hostname=''))
+ op.execute(db_dbcomputer.update().where(db_dbcomputer.c.metadata.is_(None)).values(metadata={}))
+ op.execute(db_dbcomputer.update().where(db_dbcomputer.c.scheduler_type.is_(None)).values(scheduler_type=''))
+ op.execute(db_dbcomputer.update().where(db_dbcomputer.c.transport_type.is_(None)).values(transport_type=''))
+
+ db_dbgroup = sa.sql.table(
+ 'db_dbgroup',
+ sa.Column('description', sa.Text),
+ sa.Column('label', sa.String(255)),
+ sa.Column('time', sa.DateTime(timezone=True)),
+ sa.Column('type_string', sa.String(255)),
+ sa.Column('uuid', sa.CHAR(32)),
+ )
+
+ op.execute(db_dbgroup.update().where(db_dbgroup.c.description.is_(None)).values(description=''))
+ op.execute(db_dbgroup.update().where(db_dbgroup.c.time.is_(None)).values(time=timezone.now()))
+ op.execute(db_dbgroup.update().where(db_dbgroup.c.type_string.is_(None)).values(type_string='core'))
+
+ db_dblog = sa.sql.table(
+ 'db_dblog',
+ sa.Column('levelname', sa.String(255)),
+ sa.Column('loggername', sa.String(255)),
+ sa.Column('message', sa.Text),
+ sa.Column('metadata', sa.JSON()),
+ sa.Column('time', sa.DateTime(timezone=True)),
+ sa.Column('uuid', sa.CHAR(32)),
+ )
+
+ op.execute(db_dblog.update().where(db_dblog.c.levelname.is_(None)).values(levelname=''))
+ op.execute(db_dblog.update().where(db_dblog.c.loggername.is_(None)).values(loggername=''))
+ op.execute(db_dblog.update().where(db_dblog.c.message.is_(None)).values(message=''))
+ op.execute(db_dblog.update().where(db_dblog.c.metadata.is_(None)).values(metadata={}))
+ op.execute(db_dblog.update().where(db_dblog.c.time.is_(None)).values(time=timezone.now()))
+
+ db_dbnode = sa.sql.table(
+ 'db_dbnode',
+ sa.Column('ctime', sa.DateTime(timezone=True)),
+ sa.Column('description', sa.Text),
+ sa.Column('label', sa.String(255)),
+ sa.Column('mtime', sa.DateTime(timezone=True)),
+ sa.Column('node_type', sa.String(255)),
+ sa.Column('uuid', sa.CHAR(32)),
+ )
+
+ op.execute(db_dbnode.update().where(db_dbnode.c.ctime.is_(None)).values(ctime=timezone.now()))
+ op.execute(db_dbnode.update().where(db_dbnode.c.description.is_(None)).values(description=''))
+ op.execute(db_dbnode.update().where(db_dbnode.c.label.is_(None)).values(label=''))
+ op.execute(db_dbnode.update().where(db_dbnode.c.mtime.is_(None)).values(mtime=timezone.now()))
+
+ db_dbuser = sa.sql.table(
+ 'db_dbuser',
+ sa.Column('email', sa.String(254)),
+ sa.Column('first_name', sa.String(254)),
+ sa.Column('last_name', sa.String(254)),
+ sa.Column('institution', sa.String(254)),
+ )
+
+ op.execute(db_dbuser.update().where(db_dbuser.c.first_name.is_(None)).values(first_name=''))
+ op.execute(db_dbuser.update().where(db_dbuser.c.last_name.is_(None)).values(last_name=''))
+ op.execute(db_dbuser.update().where(db_dbuser.c.institution.is_(None)).values(institution=''))
+
+
+def downgrade():
+ """Downgrade database schema."""
+ raise NotImplementedError('Downgrade of main_0000a.')
diff --git a/aiida/storage/sqlite_zip/migrations/versions/main_0000b_non_nullable.py b/aiida/storage/sqlite_zip/migrations/versions/main_0000b_non_nullable.py
new file mode 100644
index 0000000000..69d0119c8e
--- /dev/null
+++ b/aiida/storage/sqlite_zip/migrations/versions/main_0000b_non_nullable.py
@@ -0,0 +1,79 @@
+# -*- coding: utf-8 -*-
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved. #
+# This file is part of the AiiDA code. #
+# #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file #
+# For further information please visit http://www.aiida.net #
+###########################################################################
+# pylint: disable=invalid-name,no-member
+"""Alter columns to be non-nullable (to bring inline with psql_dos main_0001).
+
+Revision ID: main_0000b
+Revises: main_0000a
+Create Date: 2022-03-04
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = 'main_0000b'
+down_revision = 'main_0000a'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+ """Upgrade database schema."""
+ # see https://alembic.sqlalchemy.org/en/latest/batch.html#running-batch-migrations-for-sqlite-and-other-databases
+ # for why we run these in batches
+ with op.batch_alter_table('db_dbauthinfo') as batch_op:
+ batch_op.alter_column('aiidauser_id', existing_type=sa.INTEGER(), nullable=False)
+ batch_op.alter_column('dbcomputer_id', existing_type=sa.INTEGER(), nullable=False)
+ batch_op.alter_column('metadata', existing_type=sa.JSON(), nullable=False)
+ batch_op.alter_column('auth_params', existing_type=sa.JSON(), nullable=False)
+ batch_op.alter_column('enabled', existing_type=sa.BOOLEAN(), nullable=False)
+
+ with op.batch_alter_table('db_dbcomment') as batch_op:
+ batch_op.alter_column('dbnode_id', existing_type=sa.INTEGER(), nullable=False)
+ batch_op.alter_column('user_id', existing_type=sa.INTEGER(), nullable=False)
+ batch_op.alter_column('content', existing_type=sa.TEXT(), nullable=False)
+ batch_op.alter_column('ctime', existing_type=sa.DateTime(timezone=True), nullable=False)
+ batch_op.alter_column('mtime', existing_type=sa.DateTime(timezone=True), nullable=False)
+
+ with op.batch_alter_table('db_dbcomputer') as batch_op:
+ batch_op.alter_column('description', existing_type=sa.TEXT(), nullable=False)
+ batch_op.alter_column('hostname', existing_type=sa.String(255), nullable=False)
+ batch_op.alter_column('metadata', existing_type=sa.JSON(), nullable=False)
+ batch_op.alter_column('scheduler_type', existing_type=sa.String(255), nullable=False)
+ batch_op.alter_column('transport_type', existing_type=sa.String(255), nullable=False)
+
+ with op.batch_alter_table('db_dbgroup') as batch_op:
+ batch_op.alter_column('description', existing_type=sa.TEXT(), nullable=False)
+ batch_op.alter_column('time', existing_type=sa.DateTime(timezone=True), nullable=False)
+ batch_op.alter_column('type_string', existing_type=sa.String(255), nullable=False)
+
+ with op.batch_alter_table('db_dblog') as batch_op:
+ batch_op.alter_column('levelname', existing_type=sa.String(50), nullable=False)
+ batch_op.alter_column('loggername', existing_type=sa.String(255), nullable=False)
+ batch_op.alter_column('message', existing_type=sa.TEXT(), nullable=False)
+ batch_op.alter_column('time', existing_type=sa.DateTime(timezone=True), nullable=False)
+ batch_op.alter_column('metadata', existing_type=sa.JSON(), nullable=False)
+
+ with op.batch_alter_table('db_dbnode') as batch_op:
+ batch_op.alter_column('ctime', existing_type=sa.DateTime(timezone=True), nullable=False)
+ batch_op.alter_column('description', existing_type=sa.TEXT(), nullable=False)
+ batch_op.alter_column('label', existing_type=sa.String(255), nullable=False)
+ batch_op.alter_column('mtime', existing_type=sa.DateTime(timezone=True), nullable=False)
+
+ with op.batch_alter_table('db_dbuser') as batch_op:
+ batch_op.alter_column('first_name', existing_type=sa.String(254), nullable=False)
+ batch_op.alter_column('last_name', existing_type=sa.String(254), nullable=False)
+ batch_op.alter_column('institution', existing_type=sa.String(254), nullable=False)
+
+
+def downgrade():
+ """Downgrade database schema."""
+ raise NotImplementedError('Downgrade of main_0000b.')
diff --git a/aiida/storage/sqlite_zip/migrations/versions/main_0001.py b/aiida/storage/sqlite_zip/migrations/versions/main_0001.py
new file mode 100644
index 0000000000..706fc1c25e
--- /dev/null
+++ b/aiida/storage/sqlite_zip/migrations/versions/main_0001.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved. #
+# This file is part of the AiiDA code. #
+# #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file #
+# For further information please visit http://www.aiida.net #
+###########################################################################
+# pylint: disable=invalid-name,no-member
+"""Bring schema inline with psql_dos main_0001
+
+Revision ID: main_0001
+Revises:
+Create Date: 2021-02-02
+
+"""
+revision = 'main_0001'
+down_revision = 'main_0000b'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+ """Migrations for the upgrade."""
+
+
+def downgrade():
+ """Migrations for the downgrade."""
+ raise NotImplementedError('Downgrade of main_0001.')
diff --git a/aiida/storage/sqlite_zip/migrator.py b/aiida/storage/sqlite_zip/migrator.py
new file mode 100644
index 0000000000..52cd81a91a
--- /dev/null
+++ b/aiida/storage/sqlite_zip/migrator.py
@@ -0,0 +1,375 @@
+# -*- coding: utf-8 -*-
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved. #
+# This file is part of the AiiDA code. #
+# #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file #
+# For further information please visit http://www.aiida.net #
+###########################################################################
+"""Versioning and migration implementation for the sqlite_zip format."""
+import contextlib
+from datetime import datetime
+import os
+from pathlib import Path
+import shutil
+import tarfile
+import tempfile
+from typing import Any, Dict, Iterator, List, Optional, Union
+import zipfile
+
+from alembic.command import upgrade
+from alembic.config import Config
+from alembic.runtime.environment import EnvironmentContext
+from alembic.runtime.migration import MigrationContext, MigrationInfo
+from alembic.script import ScriptDirectory
+from archive_path import ZipPath, extract_file_in_zip, open_file_in_tar, open_file_in_zip
+
+from aiida.common import json
+from aiida.common.exceptions import CorruptStorage, IncompatibleStorageSchema, StorageMigrationError
+from aiida.common.progress_reporter import get_progress_reporter
+from aiida.storage.log import MIGRATE_LOGGER
+
+from .migrations.legacy import FINAL_LEGACY_VERSION, LEGACY_MIGRATE_FUNCTIONS
+from .migrations.legacy_to_main import LEGACY_TO_MAIN_REVISION, perform_v1_migration
+from .migrations.utils import copy_tar_to_zip, copy_zip_to_zip, update_metadata
+from .utils import DB_FILENAME, META_FILENAME, REPO_FOLDER, create_sqla_engine, extract_metadata, read_version
+
+
+def get_schema_version_head() -> str:
+ """Return the head schema version for this storage, i.e. the latest schema this storage can be migrated to."""
+ return _alembic_script().revision_map.get_current_head('main') or ''
+
+
+def list_versions() -> List[str]:
+ """Return all available schema versions (oldest to latest)."""
+ legacy_versions = list(LEGACY_MIGRATE_FUNCTIONS) + [FINAL_LEGACY_VERSION]
+ alembic_versions = [entry.revision for entry in reversed(list(_alembic_script().walk_revisions()))]
+ return legacy_versions + alembic_versions
+
+
+def validate_storage(inpath: Path) -> None:
+ """Validate that the storage is at the head version.
+
+ :raises: :class:`aiida.common.exceptions.UnreachableStorage` if the file does not exist
+ :raises: :class:`aiida.common.exceptions.CorruptStorage`
+ if the version cannot be read from the storage.
+ :raises: :class:`aiida.common.exceptions.IncompatibleStorageSchema`
+ if the storage is not compatible with the code API.
+ """
+ schema_version_code = get_schema_version_head()
+ schema_version_archive = read_version(inpath)
+ if schema_version_archive != schema_version_code:
+ raise IncompatibleStorageSchema(
+ f'Archive schema version `{schema_version_archive}` '
+ f'is incompatible with the required schema version `{schema_version_code}`. '
+ 'To migrate the archive schema version to the current one, '
+ f'run the following command: verdi archive migrate {str(inpath)!r}'
+ )
+
+
+def migrate( # pylint: disable=too-many-branches,too-many-statements,too-many-locals
+ inpath: Union[str, Path],
+ outpath: Union[str, Path],
+ version: str,
+ *,
+ force: bool = False,
+ compression: int = 6
+) -> None:
+ """Migrate an `sqlite_zip` storage file to a specific version.
+
+ Historically, this format could be a zip or a tar file,
+ contained the database as a bespoke JSON format, and the repository files in the "legacy" per-node format.
+ For these versions, we first migrate the JSON database to the final legacy schema,
+ then we convert this file to the SQLite database, whilst sequentially migrating the repository files.
+
+ Once any legacy migrations have been performed, we can then migrate the SQLite database to the final schema,
+ using alembic.
+
+ Note that, to minimise disk space usage, we never fully extract/uncompress the input file
+ (except when migrating from a legacy tar file, whereby we cannot extract individual files):
+
+ 1. The sqlite database is extracted to a temporary location and migrated
+ 2. A new zip file is opened, within a temporary folder
+ 3. The repository files are "streamed" directly between the input file and the new zip file
+ 4. The sqlite database and metadata JSON are written to the new zip file
+ 5. The new zip file is closed (which writes its final central directory)
+ 6. The new zip file is moved to the output location, removing any existing file if `force=True`
+
+ :param path: Path to the file
+ :param outpath: Path to output the migrated file
+ :param version: Target version
+ :param force: If True, overwrite the output file if it exists
+ :param compression: Compression level for the output file
+ """
+ inpath = Path(inpath)
+ outpath = Path(outpath)
+
+ # halt immediately, if we could not write to the output file
+ if outpath.exists() and not force:
+ raise StorageMigrationError('Output path already exists and force=False')
+ if outpath.exists() and not outpath.is_file():
+ raise StorageMigrationError('Existing output path is not a file')
+
+ # the file should be either a tar (legacy only) or zip file
+ if tarfile.is_tarfile(str(inpath)):
+ is_tar = True
+ elif zipfile.is_zipfile(str(inpath)):
+ is_tar = False
+ else:
+ raise CorruptStorage(f'The input file is neither a tar nor a zip file: {inpath}')
+
+ # read the metadata.json which should always be present
+ metadata = extract_metadata(inpath, search_limit=None)
+
+ # obtain the current version from the metadata
+ if 'export_version' not in metadata:
+ raise CorruptStorage('No export_version found in metadata')
+ current_version = metadata['export_version']
+ # update the modified time of the file and the compression
+ metadata['mtime'] = datetime.now().isoformat()
+ metadata['compression'] = compression
+
+ # check versions are valid
+ # versions 0.1, 0.2, 0.3 are no longer supported,
+ # since 0.3 -> 0.4 requires costly migrations of repo files (you would need to unpack all of them)
+ if current_version in ('0.1', '0.2', '0.3') or version in ('0.1', '0.2', '0.3'):
+ raise StorageMigrationError(
+ f"Legacy migration from '{current_version}' -> '{version}' is not supported in aiida-core v2. "
+ 'First migrate them to the latest version in aiida-core v1.'
+ )
+ all_versions = list_versions()
+ if current_version not in all_versions:
+ raise StorageMigrationError(f"Unknown current version '{current_version}'")
+ if version not in all_versions:
+ raise StorageMigrationError(f"Unknown target version '{version}'")
+
+ # if we are already at the desired version, then no migration is required, so simply copy the file if necessary
+ if current_version == version:
+ if inpath != outpath:
+ if outpath.exists() and force:
+ outpath.unlink()
+ shutil.copyfile(inpath, outpath)
+ return
+
+ # if the archive is a "legacy" format, i.e. has a data.json file, migrate it to the target/final legacy schema
+ data: Optional[Dict[str, Any]] = None
+ if current_version in LEGACY_MIGRATE_FUNCTIONS:
+ MIGRATE_LOGGER.report(f'Legacy migrations required from {"tar" if is_tar else "zip"} format')
+ MIGRATE_LOGGER.report('Extracting data.json ...')
+ # read the data.json file
+ data = _read_json(inpath, 'data.json', is_tar)
+ to_version = FINAL_LEGACY_VERSION if version not in LEGACY_MIGRATE_FUNCTIONS else version
+ current_version = _perform_legacy_migrations(current_version, to_version, metadata, data)
+
+ # if we are now at the target version, then write the updated files to a new zip file and exit
+ if current_version == version:
+ # create new legacy archive with updated metadata & data
+ def path_callback(inpath, outpath) -> bool:
+ if inpath.name == 'metadata.json':
+ outpath.write_text(json.dumps(metadata))
+ return True
+ if inpath.name == 'data.json':
+ outpath.write_text(json.dumps(data))
+ return True
+ return False
+
+ func = copy_tar_to_zip if is_tar else copy_zip_to_zip
+
+ func(
+ inpath,
+ outpath,
+ path_callback,
+ overwrite=force,
+ compression=compression,
+ title='Writing migrated legacy archive',
+ info_order=('metadata.json', 'data.json')
+ )
+ return
+
+ # open the temporary directory, to perform further migrations
+ with tempfile.TemporaryDirectory() as tmpdirname:
+
+ # open the new zip file, within which to write the migrated content
+ new_zip_path = Path(tmpdirname) / 'new.zip'
+ central_dir: Dict[str, Any] = {}
+ with ZipPath(
+ new_zip_path,
+ mode='w',
+ compresslevel=compression,
+ name_to_info=central_dir,
+ # this ensures that the metadata and database files are written above the repository files,
+ # in in the central directory, so that they can be accessed easily
+ info_order=(META_FILENAME, DB_FILENAME)
+ ) as new_zip:
+
+ written_repo = False
+ if current_version == FINAL_LEGACY_VERSION:
+ # migrate from the legacy format,
+ # streaming the repository files directly to the new zip file
+ MIGRATE_LOGGER.report(
+ f'legacy {FINAL_LEGACY_VERSION!r} -> {LEGACY_TO_MAIN_REVISION!r} conversion required'
+ )
+ if data is None:
+ MIGRATE_LOGGER.report('Extracting data.json ...')
+ data = _read_json(inpath, 'data.json', is_tar)
+ db_path = perform_v1_migration(inpath, Path(tmpdirname), new_zip, central_dir, is_tar, metadata, data)
+ # the migration includes adding the repository files to the new zip file
+ written_repo = True
+ current_version = LEGACY_TO_MAIN_REVISION
+ else:
+ if is_tar:
+ raise CorruptStorage('Tar files are not supported for this format')
+ # extract the sqlite database, for alembic migrations
+ db_path = Path(tmpdirname) / DB_FILENAME
+ with db_path.open('wb') as handle:
+ try:
+ extract_file_in_zip(inpath, DB_FILENAME, handle)
+ except Exception as exc:
+ raise CorruptStorage(f'database could not be read: {exc}') from exc
+
+ # perform alembic migrations
+ # note, we do this before writing the repository files (unless a legacy migration),
+ # so that we don't waste time doing that (which could be slow), only for alembic to fail
+ if current_version != version:
+ MIGRATE_LOGGER.report('Performing SQLite migrations:')
+ with _migration_context(db_path) as context:
+ assert context.script is not None
+ context.stamp(context.script, current_version)
+ context.connection.commit() # type: ignore
+ # see https://alembic.sqlalchemy.org/en/latest/batch.html#dealing-with-referencing-foreign-keys
+ # for why we do not enforce foreign keys here
+ with _alembic_connect(db_path, enforce_foreign_keys=False) as config:
+ upgrade(config, version)
+ update_metadata(metadata, version)
+
+ if not written_repo:
+ # stream the repository files directly to the new zip file
+ with ZipPath(inpath, mode='r') as old_zip:
+ length = sum(1 for _ in old_zip.glob('**/*', include_virtual=False))
+ title = 'Copying repository files'
+ with get_progress_reporter()(desc=title, total=length) as progress:
+ for subpath in old_zip.glob('**/*', include_virtual=False):
+ new_path_sub = new_zip.joinpath(subpath.at)
+ if subpath.parts[0] == REPO_FOLDER:
+ if subpath.is_dir():
+ new_path_sub.mkdir(exist_ok=True)
+ else:
+ new_path_sub.putfile(subpath)
+ progress.update()
+
+ MIGRATE_LOGGER.report('Finalising the migration ...')
+
+ # write the final database file to the new zip file
+ with db_path.open('rb') as handle:
+ with (new_zip / DB_FILENAME).open(mode='wb') as handle2:
+ shutil.copyfileobj(handle, handle2)
+
+ # write the final metadata.json file to the new zip file
+ (new_zip / META_FILENAME).write_text(json.dumps(metadata))
+
+ # on exiting the the ZipPath context, the zip file is closed and the central directory written
+
+ # move the new zip file to the final location
+ if outpath.exists() and force:
+ outpath.unlink()
+ shutil.move(new_zip_path, outpath) # type: ignore[arg-type]
+
+
+def _read_json(inpath: Path, filename: str, is_tar: bool) -> Dict[str, Any]:
+ """Read a JSON file from the archive."""
+ if is_tar:
+ with open_file_in_tar(inpath, filename) as handle:
+ data = json.load(handle)
+ else:
+ with open_file_in_zip(inpath, filename) as handle:
+ data = json.load(handle)
+ return data
+
+
+def _perform_legacy_migrations(current_version: str, to_version: str, metadata: dict, data: dict) -> str:
+ """Perform legacy migrations from the current version to the desired version.
+
+ Legacy archives use the old ``data.json`` format for storing the database.
+ These migrations simply manipulate the metadata and data in-place.
+
+ :param current_version: current version of the archive
+ :param to_version: version to migrate to
+ :param metadata: the metadata to migrate
+ :param data: the data to migrate
+ :return: the new version of the archive
+ """
+ # compute the migration pathway
+ prev_version = current_version
+ pathway: List[str] = []
+ while prev_version != to_version:
+ if prev_version not in LEGACY_MIGRATE_FUNCTIONS:
+ raise StorageMigrationError(f"No migration pathway available for '{current_version}' to '{to_version}'")
+ if prev_version in pathway:
+ raise StorageMigrationError(
+ f'cyclic migration pathway encountered: {" -> ".join(pathway + [prev_version])}'
+ )
+ pathway.append(prev_version)
+ prev_version = LEGACY_MIGRATE_FUNCTIONS[prev_version][0]
+
+ if not pathway:
+ MIGRATE_LOGGER.report('No migration required')
+ return to_version
+
+ MIGRATE_LOGGER.report('Legacy migration pathway: %s', ' -> '.join(pathway + [to_version]))
+
+ with get_progress_reporter()(total=len(pathway), desc='Performing migrations: ') as progress:
+ for from_version in pathway:
+ to_version = LEGACY_MIGRATE_FUNCTIONS[from_version][0]
+ progress.set_description_str(f'Performing migrations: {from_version} -> {to_version}', refresh=True)
+ LEGACY_MIGRATE_FUNCTIONS[from_version][1](metadata, data)
+ progress.update()
+
+ return to_version
+
+
+def _alembic_config() -> Config:
+ """Return an instance of an Alembic `Config`."""
+ config = Config()
+ config.set_main_option('script_location', str(Path(os.path.realpath(__file__)).parent / 'migrations'))
+ return config
+
+
+def _alembic_script() -> ScriptDirectory:
+ """Return an instance of an Alembic `ScriptDirectory`."""
+ return ScriptDirectory.from_config(_alembic_config())
+
+
+@contextlib.contextmanager
+def _alembic_connect(db_path: Path, enforce_foreign_keys=True) -> Iterator[Config]:
+ """Context manager to return an instance of an Alembic configuration.
+
+ The profiles's database connection is added in the `attributes` property, through which it can then also be
+ retrieved, also in the `env.py` file, which is run when the database is migrated.
+ """
+ with create_sqla_engine(db_path, enforce_foreign_keys=enforce_foreign_keys).connect() as connection:
+ config = _alembic_config()
+ config.attributes['connection'] = connection # pylint: disable=unsupported-assignment-operation
+
+ def _callback(step: MigrationInfo, **kwargs): # pylint: disable=unused-argument
+ """Callback to be called after a migration step is executed."""
+ from_rev = step.down_revision_ids[0] if step.down_revision_ids else ''
+ MIGRATE_LOGGER.report(f'- {from_rev} -> {step.up_revision_id}')
+
+ config.attributes['on_version_apply'] = _callback # pylint: disable=unsupported-assignment-operation
+
+ yield config
+
+
+@contextlib.contextmanager
+def _migration_context(db_path: Path) -> Iterator[MigrationContext]:
+ """Context manager to return an instance of an Alembic migration context.
+
+ This migration context will have been configured with the current database connection, which allows this context
+ to be used to inspect the contents of the database, such as the current revision.
+ """
+ with _alembic_connect(db_path) as config:
+ script = ScriptDirectory.from_config(config)
+ with EnvironmentContext(config, script) as context:
+ context.configure(context.config.attributes['connection'])
+ yield context.get_context()
diff --git a/aiida/storage/sqlite_zip/models.py b/aiida/storage/sqlite_zip/models.py
new file mode 100644
index 0000000000..7e637e4bb1
--- /dev/null
+++ b/aiida/storage/sqlite_zip/models.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved. #
+# This file is part of the AiiDA code. #
+# #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file #
+# For further information please visit http://www.aiida.net #
+###########################################################################
+"""This module contains the SQLAlchemy models for the SQLite backend.
+
+These models are intended to be identical to those of the `psql_dos` backend,
+except for changes to the database specific types:
+
+- UUID -> CHAR(32)
+- DateTime -> TZDateTime
+- JSONB -> JSON
+
+Also, `varchar_pattern_ops` indexes are not possible in sqlite.
+"""
+from datetime import datetime
+import functools
+from typing import Any, Optional, Set, Tuple
+
+import pytz
+import sqlalchemy as sa
+from sqlalchemy import orm as sa_orm
+from sqlalchemy.dialects.postgresql import JSONB, UUID
+from sqlalchemy.dialects.sqlite import JSON
+
+from aiida.orm.entities import EntityTypes
+# we need to import all models, to ensure they are loaded on the SQLA Metadata
+from aiida.storage.psql_dos.models import authinfo, base, comment, computer, group, log, node, user
+
+
+class SqliteModel:
+ """Represent a row in an sqlite database table"""
+
+ def __repr__(self) -> str:
+ """Return a representation of the row columns"""
+ string = f'<{self.__class__.__name__}'
+ for col in self.__table__.columns: # type: ignore[attr-defined] # pylint: disable=no-member
+ # don't include columns with potentially large values
+ if isinstance(col.type, (JSON, sa.Text)):
+ continue
+ string += f' {col.name}={getattr(self, col.name)}'
+ return string + '>'
+
+
+class TZDateTime(sa.TypeDecorator): # pylint: disable=abstract-method
+ """A timezone naive UTC ``DateTime`` implementation for SQLite.
+
+ see: https://docs.sqlalchemy.org/en/14/core/custom_types.html#store-timezone-aware-timestamps-as-timezone-naive-utc
+ """
+ impl = sa.DateTime
+ cache_ok = True
+
+ def process_bind_param(self, value: Optional[datetime], dialect):
+ """Process before writing to database."""
+ if value is None:
+ return value
+ if value.tzinfo is None:
+ value = value.astimezone(pytz.utc)
+ value = value.astimezone(pytz.utc).replace(tzinfo=None)
+ return value
+
+ def process_result_value(self, value: Optional[datetime], dialect):
+ """Process when returning from database."""
+ if value is None:
+ return value
+ if value.tzinfo is None:
+ return value.replace(tzinfo=pytz.utc)
+ return value.astimezone(pytz.utc)
+
+
+SqliteBase = sa.orm.declarative_base(
+ cls=SqliteModel, name='SqliteModel', metadata=sa.MetaData(naming_convention=dict(base.naming_convention))
+)
+
+
+def pg_to_sqlite(pg_table: sa.Table):
+ """Convert a model intended for PostGreSQL to one compatible with SQLite"""
+ new = pg_table.to_metadata(SqliteBase.metadata)
+ for column in new.columns:
+ if isinstance(column.type, UUID):
+ column.type = sa.String(32)
+ elif isinstance(column.type, sa.DateTime):
+ column.type = TZDateTime()
+ elif isinstance(column.type, JSONB):
+ column.type = JSON()
+ # remove any postgresql specific indexes, e.g. varchar_pattern_ops
+ new.indexes.difference_update([idx for idx in new.indexes if idx.dialect_kwargs])
+ return new
+
+
+def create_orm_cls(klass: base.Base) -> SqliteBase:
+ """Create an ORM class from an existing table in the declarative meta"""
+ tbl = SqliteBase.metadata.tables[klass.__tablename__]
+ return type( # type: ignore[return-value]
+ klass.__name__,
+ (SqliteBase,),
+ {
+ '__tablename__': tbl.name,
+ '__table__': tbl,
+ **{col.name if col.name != 'metadata' else '_metadata': col for col in tbl.columns},
+ },
+ )
+
+
+for table in base.Base.metadata.sorted_tables:
+ pg_to_sqlite(table)
+
+DbUser = create_orm_cls(user.DbUser)
+DbComputer = create_orm_cls(computer.DbComputer)
+DbAuthInfo = create_orm_cls(authinfo.DbAuthInfo)
+DbGroup = create_orm_cls(group.DbGroup)
+DbNode = create_orm_cls(node.DbNode)
+DbGroupNodes = create_orm_cls(group.DbGroupNode)
+DbComment = create_orm_cls(comment.DbComment)
+DbLog = create_orm_cls(log.DbLog)
+DbLink = create_orm_cls(node.DbLink)
+
+# to-do ideally these relationships should be auto-generated in `create_orm_cls`, but this proved difficult
+DbAuthInfo.aiidauser = sa_orm.relationship( # type: ignore[attr-defined]
+ 'DbUser', backref=sa_orm.backref('authinfos', passive_deletes=True, cascade='all, delete')
+)
+DbAuthInfo.dbcomputer = sa_orm.relationship( # type: ignore[attr-defined]
+ 'DbComputer', backref=sa_orm.backref('authinfos', passive_deletes=True, cascade='all, delete')
+)
+DbComment.dbnode = sa_orm.relationship('DbNode', backref='dbcomments') # type: ignore[attr-defined]
+DbComment.user = sa_orm.relationship('DbUser') # type: ignore[attr-defined]
+DbGroup.user = sa_orm.relationship( # type: ignore[attr-defined]
+ 'DbUser', backref=sa_orm.backref('dbgroups', cascade='merge')
+)
+DbGroup.dbnodes = sa_orm.relationship( # type: ignore[attr-defined]
+ 'DbNode', secondary='db_dbgroup_dbnodes', backref='dbgroups', lazy='dynamic'
+)
+DbLog.dbnode = sa_orm.relationship( # type: ignore[attr-defined]
+ 'DbNode', backref=sa_orm.backref('dblogs', passive_deletes='all', cascade='merge')
+)
+DbNode.dbcomputer = sa_orm.relationship( # type: ignore[attr-defined]
+ 'DbComputer', backref=sa_orm.backref('dbnodes', passive_deletes='all', cascade='merge')
+)
+DbNode.user = sa_orm.relationship('DbUser', backref=sa_orm.backref( # type: ignore[attr-defined]
+ 'dbnodes',
+ passive_deletes='all',
+ cascade='merge',
+))
+
+
+@functools.lru_cache(maxsize=10)
+def get_model_from_entity(entity_type: EntityTypes) -> Tuple[Any, Set[str]]:
+ """Return the Sqlalchemy model and column names corresponding to the given entity."""
+ model = {
+ EntityTypes.USER: DbUser,
+ EntityTypes.AUTHINFO: DbAuthInfo,
+ EntityTypes.GROUP: DbGroup,
+ EntityTypes.NODE: DbNode,
+ EntityTypes.COMMENT: DbComment,
+ EntityTypes.COMPUTER: DbComputer,
+ EntityTypes.LOG: DbLog,
+ EntityTypes.LINK: DbLink,
+ EntityTypes.GROUP_NODE: DbGroupNodes
+ }[entity_type]
+ mapper = sa.inspect(model).mapper
+ column_names = {col.name for col in mapper.c.values()}
+ return model, column_names
diff --git a/aiida/storage/sqlite_zip/utils.py b/aiida/storage/sqlite_zip/utils.py
new file mode 100644
index 0000000000..cd2838314e
--- /dev/null
+++ b/aiida/storage/sqlite_zip/utils.py
@@ -0,0 +1,104 @@
+# -*- coding: utf-8 -*-
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved. #
+# This file is part of the AiiDA code. #
+# #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file #
+# For further information please visit http://www.aiida.net #
+###########################################################################
+"""Utilities for this backend."""
+from pathlib import Path
+import tarfile
+from typing import Any, Dict, Optional, Union
+import zipfile
+
+from archive_path import read_file_in_tar, read_file_in_zip
+from sqlalchemy import event
+from sqlalchemy.future.engine import Engine, create_engine
+
+from aiida.common import json
+from aiida.common.exceptions import CorruptStorage, UnreachableStorage
+
+META_FILENAME = 'metadata.json'
+"""The filename containing meta information about the storage instance."""
+
+DB_FILENAME = 'db.sqlite3'
+"""The filename of the SQLite database."""
+
+REPO_FOLDER = 'repo'
+"""The name of the folder containing the repository files."""
+
+
+def sqlite_enforce_foreign_keys(dbapi_connection, _):
+ """Enforce foreign key constraints, when using sqlite backend (off by default)"""
+ cursor = dbapi_connection.cursor()
+ cursor.execute('PRAGMA foreign_keys=ON;')
+ cursor.close()
+
+
+def create_sqla_engine(path: Union[str, Path], *, enforce_foreign_keys: bool = True, **kwargs) -> Engine:
+ """Create a new engine instance."""
+ engine = create_engine(
+ f'sqlite:///{path}',
+ json_serializer=json.dumps,
+ json_deserializer=json.loads,
+ encoding='utf-8',
+ future=True,
+ **kwargs
+ )
+ if enforce_foreign_keys:
+ event.listen(engine, 'connect', sqlite_enforce_foreign_keys)
+ return engine
+
+
+def extract_metadata(path: Union[str, Path], *, search_limit: Optional[int] = 10) -> Dict[str, Any]:
+ """Extract the metadata dictionary from the archive.
+
+ :param search_limit: the maximum number of records to search for the metadata file in a zip file.
+ """
+ path = Path(path)
+ if not path.exists():
+ raise UnreachableStorage(f'path not found: {path}')
+
+ if path.is_dir():
+ if not path.joinpath(META_FILENAME).is_file():
+ raise CorruptStorage('Could not find metadata file')
+ try:
+ metadata = json.loads(path.joinpath(META_FILENAME).read_text(encoding='utf8'))
+ except Exception as exc:
+ raise CorruptStorage(f'Could not read metadata: {exc}') from exc
+ elif path.is_file() and zipfile.is_zipfile(path):
+ try:
+ metadata = json.loads(read_file_in_zip(path, META_FILENAME, search_limit=search_limit))
+ except Exception as exc:
+ raise CorruptStorage(f'Could not read metadata: {exc}') from exc
+ elif path.is_file() and tarfile.is_tarfile(path):
+ try:
+ metadata = json.loads(read_file_in_tar(path, META_FILENAME))
+ except Exception as exc:
+ raise CorruptStorage(f'Could not read metadata: {exc}') from exc
+ else:
+ raise CorruptStorage('Path not a folder, zip or tar file')
+
+ if not isinstance(metadata, dict):
+ raise CorruptStorage(f'Metadata is not a dictionary: {type(metadata)}')
+
+ return metadata
+
+
+def read_version(path: Union[str, Path], *, search_limit: Optional[int] = None) -> str:
+ """Read the version of the storage instance from the path.
+
+ This is intended to work for all versions of the storage format.
+
+ :param path: path to storage instance, either a folder, zip file or tar file.
+ :param search_limit: the maximum number of records to search for the metadata file in a zip file.
+
+ :raises: ``UnreachableStorage`` if a version cannot be read from the file
+ """
+ metadata = extract_metadata(path, search_limit=search_limit)
+ if 'export_version' in metadata:
+ return metadata['export_version']
+
+ raise CorruptStorage("Metadata does not contain 'export_version' key")
diff --git a/aiida/tools/archive/__init__.py b/aiida/tools/archive/__init__.py
index 4252c80745..735e4dc43d 100644
--- a/aiida/tools/archive/__init__.py
+++ b/aiida/tools/archive/__init__.py
@@ -17,7 +17,6 @@
# pylint: disable=wildcard-import
from .abstract import *
-from .common import *
from .create import *
from .exceptions import *
from .implementations import *
@@ -28,10 +27,8 @@
'ArchiveFormatAbstract',
'ArchiveFormatSqlZip',
'ArchiveImportError',
- 'ArchiveMigrationError',
'ArchiveReaderAbstract',
'ArchiveWriterAbstract',
- 'CorruptArchive',
'EXPORT_LOGGER',
'ExportImportException',
'ExportValidationError',
@@ -39,9 +36,6 @@
'ImportTestRun',
'ImportUniquenessError',
'ImportValidationError',
- 'IncompatibleArchiveVersionError',
- 'MIGRATE_LOGGER',
- 'MigrationValidationError',
'create_archive',
'get_format',
'import_archive',
diff --git a/aiida/tools/archive/abstract.py b/aiida/tools/archive/abstract.py
index b45eded9a6..08a5cb9ad8 100644
--- a/aiida/tools/archive/abstract.py
+++ b/aiida/tools/archive/abstract.py
@@ -141,7 +141,7 @@ def __exit__(self, *args, **kwargs) -> None:
def get_metadata(self) -> Dict[str, Any]:
"""Return the top-level metadata.
- :raises: ``UnreadableArchiveError`` if the top-level metadata cannot be read from the archive
+ :raises: ``CorruptStorage`` if the top-level metadata cannot be read from the archive
"""
@abstractmethod
@@ -180,13 +180,8 @@ class ArchiveFormatAbstract(ABC):
@property
@abstractmethod
- def versions(self) -> List[str]:
- """Return ordered list of versions of the archive format, oldest -> latest."""
-
- @property
def latest_version(self) -> str:
- """Return the latest version of the archive format."""
- return self.versions[-1]
+ """Return the latest schema version of the archive format."""
@property
@abstractmethod
@@ -201,8 +196,8 @@ def read_version(self, path: Union[str, Path]) -> str:
:param path: archive path
- :raises: ``FileNotFoundError`` if the file does not exist
- :raises: ``UnreadableArchiveError`` if a version cannot be read from the archive
+ :raises: ``UnreachableStorage`` if the file does not exist
+ :raises: ``CorruptStorage`` if a version cannot be read from the archive
"""
@overload
@@ -279,13 +274,13 @@ def migrate(
"""
-def get_format(name: str = 'sqlitezip') -> ArchiveFormatAbstract:
+def get_format(name: str = 'sqlite_zip') -> ArchiveFormatAbstract:
"""Get the archive format instance.
:param name: name of the archive format
:return: archive format instance
"""
# to-do entry point for archive formats?
- assert name == 'sqlitezip'
- from aiida.tools.archive.implementations.sqlite.main import ArchiveFormatSqlZip
+ assert name == 'sqlite_zip'
+ from aiida.tools.archive.implementations.sqlite_zip.main import ArchiveFormatSqlZip
return ArchiveFormatSqlZip()
diff --git a/aiida/tools/archive/common.py b/aiida/tools/archive/common.py
index a6bdce8094..0411dd2bcc 100644
--- a/aiida/tools/archive/common.py
+++ b/aiida/tools/archive/common.py
@@ -13,14 +13,9 @@
import urllib.parse
import urllib.request
-from aiida.common.log import AIIDA_LOGGER
from aiida.orm import AuthInfo, Comment, Computer, Entity, Group, Log, Node, User
from aiida.orm.entities import EntityTypes
-__all__ = ('MIGRATE_LOGGER',)
-
-MIGRATE_LOGGER = AIIDA_LOGGER.getChild('migrate')
-
# Mapping from entity names to AiiDA classes
entity_type_to_orm: Dict[EntityTypes, Type[Entity]] = {
EntityTypes.AUTHINFO: AuthInfo,
diff --git a/aiida/tools/archive/create.py b/aiida/tools/archive/create.py
index edd60d5132..acb5a200fe 100644
--- a/aiida/tools/archive/create.py
+++ b/aiida/tools/archive/create.py
@@ -36,7 +36,7 @@
from .abstract import ArchiveFormatAbstract, ArchiveWriterAbstract
from .common import batch_iter, entity_type_to_orm
from .exceptions import ArchiveExportError, ExportValidationError
-from .implementations.sqlite import ArchiveFormatSqlZip
+from .implementations.sqlite_zip import ArchiveFormatSqlZip
__all__ = ('create_archive', 'EXPORT_LOGGER')
@@ -281,13 +281,12 @@ def create_archive(
writer.update_metadata({
'ctime': datetime.now().isoformat(),
'creation_parameters': {
- 'entities_starting_set':
+ 'entities_starting_set': None if entities is None else
{etype.value: list(unique) for etype, unique in starting_uuids.items() if unique},
'include_authinfos': include_authinfos,
'include_comments': include_comments,
'include_logs': include_logs,
'graph_traversal_rules': full_traversal_rules,
- 'entity_counts': dict(count_summary), # type: ignore
}
})
# stream entity data to the archive
diff --git a/aiida/tools/archive/exceptions.py b/aiida/tools/archive/exceptions.py
index 1ad358308f..05db839a36 100644
--- a/aiida/tools/archive/exceptions.py
+++ b/aiida/tools/archive/exceptions.py
@@ -19,11 +19,7 @@
'ExportImportException',
'ArchiveExportError',
'ExportValidationError',
- 'CorruptArchive',
- 'ArchiveMigrationError',
- 'MigrationValidationError',
'ArchiveImportError',
- 'IncompatibleArchiveVersionError',
'ImportValidationError',
'ImportUniquenessError',
'ImportTestRun',
@@ -42,22 +38,10 @@ class ExportValidationError(ArchiveExportError):
"""Raised when validation fails during export, e.g. for non-sealed ``ProcessNode`` s."""
-class UnreadableArchiveError(ArchiveExportError):
- """Raised when the version cannot be extracted from the archive."""
-
-
-class CorruptArchive(ExportImportException):
- """Raised when an operation is applied to a corrupt export archive, e.g. missing files or invalid formats."""
-
-
class ArchiveImportError(ExportImportException):
"""Base class for all AiiDA import exceptions."""
-class IncompatibleArchiveVersionError(ExportImportException):
- """Raised when trying to import an export archive with an incompatible schema version."""
-
-
class ImportUniquenessError(ArchiveImportError):
"""Raised when the user tries to violate a uniqueness constraint.
@@ -71,25 +55,3 @@ class ImportValidationError(ArchiveImportError):
class ImportTestRun(ArchiveImportError):
"""Raised during an import, before the transaction is commited."""
-
-
-class ArchiveMigrationError(ExportImportException):
- """Base class for all AiiDA export archive migration exceptions."""
-
-
-class MigrationValidationError(ArchiveMigrationError):
- """Raised when validation fails during migration of export archives."""
-
-
-class ReadOnlyError(IOError):
- """Raised when a write operation is called on a read-only archive."""
-
- def __init__(self, msg='Archive is read-only'): # pylint: disable=useless-super-delegation
- super().__init__(msg)
-
-
-class ArchiveClosedError(IOError):
- """Raised when the archive is closed."""
-
- def __init__(self, msg='Archive is closed'): # pylint: disable=useless-super-delegation
- super().__init__(msg)
diff --git a/aiida/tools/archive/implementations/__init__.py b/aiida/tools/archive/implementations/__init__.py
index 6f85411389..fed227acb2 100644
--- a/aiida/tools/archive/implementations/__init__.py
+++ b/aiida/tools/archive/implementations/__init__.py
@@ -14,7 +14,7 @@
# yapf: disable
# pylint: disable=wildcard-import
-from .sqlite import *
+from .sqlite_zip import *
__all__ = (
'ArchiveFormatSqlZip',
diff --git a/aiida/tools/archive/implementations/sqlite/backend.py b/aiida/tools/archive/implementations/sqlite/backend.py
deleted file mode 100644
index 934dd2bf1b..0000000000
--- a/aiida/tools/archive/implementations/sqlite/backend.py
+++ /dev/null
@@ -1,469 +0,0 @@
-# -*- coding: utf-8 -*-
-###########################################################################
-# Copyright (c), The AiiDA team. All rights reserved. #
-# This file is part of the AiiDA code. #
-# #
-# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
-# For further information on the license, see the LICENSE.txt file #
-# For further information please visit http://www.aiida.net #
-###########################################################################
-"""The table models are dynamically generated from the sqlalchemy backend models."""
-from contextlib import contextmanager
-from datetime import datetime
-from functools import singledispatch
-from pathlib import Path
-import tempfile
-from typing import BinaryIO, Iterable, Iterator, List, Optional, Sequence, Tuple, Type, cast
-import zipfile
-from zipfile import ZipFile
-
-from archive_path import extract_file_in_zip
-import pytz
-from sqlalchemy import CHAR, Text, orm, types
-from sqlalchemy.dialects.postgresql import JSONB, UUID
-from sqlalchemy.dialects.sqlite import JSON
-from sqlalchemy.sql.schema import Table
-
-from aiida.common.exceptions import UnreachableStorage
-from aiida.manage import Profile
-from aiida.orm.entities import EntityTypes
-from aiida.orm.implementation import StorageBackend
-from aiida.repository.backend.abstract import AbstractRepositoryBackend
-# we need to import all models, to ensure they are loaded on the SQLA Metadata
-from aiida.storage.psql_dos.models import authinfo, base, comment, computer, group, log, node, user
-from aiida.storage.psql_dos.orm import authinfos, comments, computers, entities, groups, logs, nodes, users
-from aiida.storage.psql_dos.orm.querybuilder import SqlaQueryBuilder
-from aiida.storage.psql_dos.orm.utils import ModelWrapper
-from aiida.tools.archive.exceptions import ArchiveClosedError, CorruptArchive, ReadOnlyError
-
-from .common import DB_FILENAME, REPO_FOLDER, create_sqla_engine
-
-
-class SqliteModel:
- """Represent a row in an sqlite database table"""
-
- def __repr__(self) -> str:
- """Return a representation of the row columns"""
- string = f'<{self.__class__.__name__}'
- for col in self.__table__.columns: # type: ignore[attr-defined] # pylint: disable=no-member
- # don't include columns with potentially large values
- if isinstance(col.type, (JSON, Text)):
- continue
- string += f' {col.name}={getattr(self, col.name)}'
- return string + '>'
-
-
-class TZDateTime(types.TypeDecorator): # pylint: disable=abstract-method
- """A timezone naive UTC ``DateTime`` implementation for SQLite.
-
- see: https://docs.sqlalchemy.org/en/14/core/custom_types.html#store-timezone-aware-timestamps-as-timezone-naive-utc
- """
- impl = types.DateTime
- cache_ok = True
-
- def process_bind_param(self, value: Optional[datetime], dialect):
- """Process before writing to database."""
- if value is None:
- return value
- if value.tzinfo is None:
- value = value.astimezone(pytz.utc)
- value = value.astimezone(pytz.utc).replace(tzinfo=None)
- return value
-
- def process_result_value(self, value: Optional[datetime], dialect):
- """Process when returning from database."""
- if value is None:
- return value
- if value.tzinfo is None:
- return value.replace(tzinfo=pytz.utc)
- return value.astimezone(pytz.utc)
-
-
-ArchiveDbBase = orm.declarative_base(cls=SqliteModel, name='SqliteModel')
-
-
-def pg_to_sqlite(pg_table: Table):
- """Convert a model intended for PostGreSQL to one compatible with SQLite"""
- new = pg_table.to_metadata(ArchiveDbBase.metadata)
- for column in new.columns:
- if isinstance(column.type, UUID):
- column.type = CHAR(32)
- elif isinstance(column.type, types.DateTime):
- column.type = TZDateTime()
- elif isinstance(column.type, JSONB):
- column.type = JSON()
- return new
-
-
-def create_orm_cls(klass: base.Base) -> ArchiveDbBase:
- """Create an ORM class from an existing table in the declarative meta"""
- tbl = ArchiveDbBase.metadata.tables[klass.__tablename__]
- return type( # type: ignore[return-value]
- klass.__name__,
- (ArchiveDbBase,),
- {
- '__tablename__': tbl.name,
- '__table__': tbl,
- **{col.name if col.name != 'metadata' else '_metadata': col for col in tbl.columns},
- },
- )
-
-
-for table in base.Base.metadata.sorted_tables:
- pg_to_sqlite(table)
-
-DbUser = create_orm_cls(user.DbUser)
-DbComputer = create_orm_cls(computer.DbComputer)
-DbAuthInfo = create_orm_cls(authinfo.DbAuthInfo)
-DbGroup = create_orm_cls(group.DbGroup)
-DbNode = create_orm_cls(node.DbNode)
-DbGroupNodes = create_orm_cls(group.DbGroupNode)
-DbComment = create_orm_cls(comment.DbComment)
-DbLog = create_orm_cls(log.DbLog)
-DbLink = create_orm_cls(node.DbLink)
-
-# to-do This was the minimum for creating a graph, but really all relationships should be copied
-DbNode.dbcomputer = orm.relationship('DbComputer', backref='dbnodes') # type: ignore[attr-defined]
-DbGroup.dbnodes = orm.relationship( # type: ignore[attr-defined]
- 'DbNode', secondary='db_dbgroup_dbnodes', backref='dbgroups', lazy='dynamic'
-)
-
-
-class ZipfileBackendRepository(AbstractRepositoryBackend):
- """A read-only backend for an open zip file."""
-
- def __init__(self, file: ZipFile):
- self._zipfile = file
-
- @property
- def zipfile(self) -> ZipFile:
- if self._zipfile.fp is None:
- raise ArchiveClosedError()
- return self._zipfile
-
- @property
- def uuid(self) -> Optional[str]:
- return None
-
- @property
- def key_format(self) -> Optional[str]:
- return 'sha256'
-
- def initialise(self, **kwargs) -> None:
- pass
-
- @property
- def is_initialised(self) -> bool:
- return True
-
- def erase(self) -> None:
- raise ReadOnlyError()
-
- def _put_object_from_filelike(self, handle: BinaryIO) -> str:
- raise ReadOnlyError()
-
- def has_object(self, key: str) -> bool:
- try:
- self.zipfile.getinfo(f'{REPO_FOLDER}/{key}')
- except KeyError:
- return False
- return True
-
- def has_objects(self, keys: List[str]) -> List[bool]:
- return [self.has_object(key) for key in keys]
-
- def list_objects(self) -> Iterable[str]:
- for name in self.zipfile.namelist():
- if name.startswith(REPO_FOLDER + '/') and name[len(REPO_FOLDER) + 1:]:
- yield name[len(REPO_FOLDER) + 1:]
-
- @contextmanager
- def open(self, key: str) -> Iterator[BinaryIO]:
- try:
- handle = self.zipfile.open(f'{REPO_FOLDER}/{key}')
- yield cast(BinaryIO, handle)
- except KeyError:
- raise FileNotFoundError(f'object with key `{key}` does not exist.')
- finally:
- handle.close()
-
- def iter_object_streams(self, keys: List[str]) -> Iterator[Tuple[str, BinaryIO]]:
- for key in keys:
- with self.open(key) as handle: # pylint: disable=not-context-manager
- yield key, handle
-
- def delete_objects(self, keys: List[str]) -> None:
- raise ReadOnlyError()
-
- def get_object_hash(self, key: str) -> str:
- return key
-
- def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None:
- raise NotImplementedError
-
- def get_info(self, statistics: bool = False, **kwargs) -> dict:
- return {'objects': {'count': len(list(self.list_objects()))}}
-
-
-class ArchiveBackendQueryBuilder(SqlaQueryBuilder):
- """Archive query builder"""
-
- @property
- def Node(self):
- return DbNode
-
- @property
- def Link(self):
- return DbLink
-
- @property
- def Computer(self):
- return DbComputer
-
- @property
- def User(self):
- return DbUser
-
- @property
- def Group(self):
- return DbGroup
-
- @property
- def AuthInfo(self):
- return DbAuthInfo
-
- @property
- def Comment(self):
- return DbComment
-
- @property
- def Log(self):
- return DbLog
-
- @property
- def table_groups_nodes(self):
- return DbGroupNodes.__table__ # type: ignore[attr-defined] # pylint: disable=no-member
-
-
-class ArchiveReadOnlyBackend(StorageBackend): # pylint: disable=too-many-public-methods
- """A read-only backend for the archive."""
-
- @classmethod
- def version_head(cls) -> str:
- raise NotImplementedError
-
- @classmethod
- def version_profile(cls, profile: Profile) -> None:
- raise NotImplementedError
-
- @classmethod
- def migrate(cls, profile: Profile):
- raise ReadOnlyError()
-
- def __init__(self, profile: Profile):
- super().__init__(profile)
- self._path = Path(profile.storage_config['path'])
- if not self._path.is_file():
- raise UnreachableStorage(f'archive file `{self._path}` does not exist.')
- # lazy open the archive zipfile and extract the database file
- self._db_file: Optional[Path] = None
- self._session: Optional[orm.Session] = None
- self._zipfile: Optional[zipfile.ZipFile] = None
- self._closed = False
-
- def __str__(self) -> str:
- state = 'closed' if self.is_closed else 'open'
- return f'Aiida archive (read-only) [{state}] @ {self._path}'
-
- @property
- def is_closed(self) -> bool:
- return self._closed
-
- def close(self):
- """Close the backend"""
- if self._session:
- self._session.close()
- if self._db_file and self._db_file.exists():
- self._db_file.unlink()
- if self._zipfile:
- self._zipfile.close()
- self._session = None
- self._db_file = None
- self._zipfile = None
- self._closed = True
-
- def get_session(self) -> orm.Session:
- """Return an SQLAlchemy session."""
- if self._closed:
- raise ArchiveClosedError()
- if self._db_file is None:
- _, path = tempfile.mkstemp()
- self._db_file = Path(path)
- with self._db_file.open('wb') as handle:
- try:
- extract_file_in_zip(self._path, DB_FILENAME, handle, search_limit=4)
- except Exception as exc:
- raise CorruptArchive(f'database could not be read: {exc}') from exc
- if self._session is None:
- self._session = orm.Session(create_sqla_engine(self._db_file))
- return self._session
-
- def get_repository(self) -> ZipfileBackendRepository:
- if self._closed:
- raise ArchiveClosedError()
- if self._zipfile is None:
- self._zipfile = ZipFile(self._path, mode='r') # pylint: disable=consider-using-with
- return ZipfileBackendRepository(self._zipfile)
-
- def query(self) -> ArchiveBackendQueryBuilder:
- return ArchiveBackendQueryBuilder(self)
-
- def get_backend_entity(self, res): # pylint: disable=no-self-use
- """Return the backend entity that corresponds to the given Model instance."""
- klass = get_backend_entity(res)
- return klass(self, res)
-
- @property
- def authinfos(self):
- return create_backend_collection(authinfos.SqlaAuthInfoCollection, self, authinfos.SqlaAuthInfo, DbAuthInfo)
-
- @property
- def comments(self):
- return create_backend_collection(comments.SqlaCommentCollection, self, comments.SqlaComment, DbComment)
-
- @property
- def computers(self):
- return create_backend_collection(computers.SqlaComputerCollection, self, computers.SqlaComputer, DbComputer)
-
- @property
- def groups(self):
- return create_backend_collection(groups.SqlaGroupCollection, self, groups.SqlaGroup, DbGroup)
-
- @property
- def logs(self):
- return create_backend_collection(logs.SqlaLogCollection, self, logs.SqlaLog, DbLog)
-
- @property
- def nodes(self):
- return create_backend_collection(nodes.SqlaNodeCollection, self, nodes.SqlaNode, DbNode)
-
- @property
- def users(self):
- return create_backend_collection(users.SqlaUserCollection, self, users.SqlaUser, DbUser)
-
- def _clear(self, recreate_user: bool = True) -> None:
- raise ReadOnlyError()
-
- def transaction(self):
- raise ReadOnlyError()
-
- @property
- def in_transaction(self) -> bool:
- return False
-
- def bulk_insert(self, entity_type: EntityTypes, rows: List[dict], allow_defaults: bool = False) -> List[int]:
- raise ReadOnlyError()
-
- def bulk_update(self, entity_type: EntityTypes, rows: List[dict]) -> None:
- raise ReadOnlyError()
-
- def delete_nodes_and_connections(self, pks_to_delete: Sequence[int]):
- raise ReadOnlyError()
-
- def get_global_variable(self, key: str):
- raise NotImplementedError
-
- def set_global_variable(self, key: str, value, description: Optional[str] = None, overwrite=True) -> None:
- raise ReadOnlyError()
-
- def maintain(self, full: bool = False, dry_run: bool = False, **kwargs) -> None:
- raise NotImplementedError
-
- def get_info(self, statistics: bool = False) -> dict:
- results = super().get_info(statistics=statistics)
- results['repository'] = self.get_repository().get_info(statistics)
- return results
-
-
-def create_backend_cls(base_class, model_cls):
- """Create an archive backend class for the given model class."""
-
- class ReadOnlyEntityBackend(base_class): # type: ignore
- """Backend class for the read-only archive."""
-
- MODEL_CLASS = model_cls
-
- def __init__(self, _backend, model):
- """Initialise the backend entity."""
- self._backend = _backend
- self._model = ModelWrapper(model, _backend)
-
- @property
- def model(self) -> ModelWrapper:
- """Return an ORM model that correctly updates and flushes the data model when getting or setting a field."""
- return self._model
-
- @property
- def bare_model(self):
- """Return the underlying SQLAlchemy ORM model for this entity."""
- return self.model._model # pylint: disable=protected-access
-
- @classmethod
- def from_dbmodel(cls, model, _backend):
- return cls(_backend, model)
-
- @property
- def is_stored(self):
- return True
-
- def store(self): # pylint: disable=no-self-use
- return ReadOnlyError()
-
- return ReadOnlyEntityBackend
-
-
-def create_backend_collection(cls, _backend, entity_cls, model):
- collection = cls(_backend)
- new_cls = create_backend_cls(entity_cls, model)
- collection.ENTITY_CLASS = new_cls
- return collection
-
-
-@singledispatch
-def get_backend_entity(dbmodel) -> Type[entities.SqlaModelEntity]: # pylint: disable=unused-argument
- raise TypeError(f'Cannot get backend entity for {dbmodel}')
-
-
-@get_backend_entity.register(DbAuthInfo) # type: ignore[call-overload]
-def _(dbmodel):
- return create_backend_cls(authinfos.SqlaAuthInfo, dbmodel.__class__)
-
-
-@get_backend_entity.register(DbComment) # type: ignore[call-overload]
-def _(dbmodel):
- return create_backend_cls(comments.SqlaComment, dbmodel.__class__)
-
-
-@get_backend_entity.register(DbComputer) # type: ignore[call-overload]
-def _(dbmodel):
- return create_backend_cls(computers.SqlaComputer, dbmodel.__class__)
-
-
-@get_backend_entity.register(DbGroup) # type: ignore[call-overload]
-def _(dbmodel):
- return create_backend_cls(groups.SqlaGroup, dbmodel.__class__)
-
-
-@get_backend_entity.register(DbLog) # type: ignore[call-overload]
-def _(dbmodel):
- return create_backend_cls(logs.SqlaLog, dbmodel.__class__)
-
-
-@get_backend_entity.register(DbNode) # type: ignore[call-overload]
-def _(dbmodel):
- return create_backend_cls(nodes.SqlaNode, dbmodel.__class__)
-
-
-@get_backend_entity.register(DbUser) # type: ignore[call-overload]
-def _(dbmodel):
- return create_backend_cls(users.SqlaUser, dbmodel.__class__)
diff --git a/aiida/tools/archive/implementations/sqlite/migrations/main.py b/aiida/tools/archive/implementations/sqlite/migrations/main.py
deleted file mode 100644
index b0a69ac0f9..0000000000
--- a/aiida/tools/archive/implementations/sqlite/migrations/main.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# -*- coding: utf-8 -*-
-###########################################################################
-# Copyright (c), The AiiDA team. All rights reserved. #
-# This file is part of the AiiDA code. #
-# #
-# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
-# For further information on the license, see the LICENSE.txt file #
-# For further information please visit http://www.aiida.net #
-###########################################################################
-"""AiiDA archive migrator implementation."""
-from pathlib import Path
-import shutil
-import tarfile
-import tempfile
-from typing import Any, Dict, List, Optional, Union
-import zipfile
-
-from archive_path import open_file_in_tar, open_file_in_zip
-
-from aiida.common import json
-from aiida.common.progress_reporter import get_progress_reporter
-from aiida.tools.archive.common import MIGRATE_LOGGER
-from aiida.tools.archive.exceptions import ArchiveMigrationError, CorruptArchive
-
-from ..common import copy_tar_to_zip, copy_zip_to_zip
-from .legacy import FINAL_LEGACY_VERSION, LEGACY_MIGRATE_FUNCTIONS
-from .legacy_to_new import perform_v1_migration
-
-ALL_VERSIONS = ['0.4', '0.5', '0.6', '0.7', '0.8', '0.9', '0.10', '0.11', '0.12', '1.0']
-
-
-def migrate( # pylint: disable=too-many-branches,too-many-statements
- inpath: Union[str, Path],
- outpath: Union[str, Path],
- current_version: str,
- version: str,
- *,
- force: bool = False,
- compression: int = 6
-) -> None:
- """Migrate an archive to a specific version.
-
- :param path: archive path
- """
- inpath = Path(inpath)
- outpath = Path(outpath)
-
- if outpath.exists() and not force:
- raise IOError('Output path already exists and force=False')
- if outpath.exists() and not outpath.is_file():
- raise IOError('Existing output path is not a file')
-
- # check versions are valid
- # versions 0.1, 0.2, 0.3 are no longer supported,
- # since 0.3 -> 0.4 requires costly migrations of repo files (you would need to unpack all of them)
- if current_version in ('0.1', '0.2', '0.3') or version in ('0.1', '0.2', '0.3'):
- raise ArchiveMigrationError(
- f"Migration from '{current_version}' -> '{version}' is not supported in aiida-core v2"
- )
- if current_version not in ALL_VERSIONS:
- raise ArchiveMigrationError(f"Unknown current version '{current_version}'")
- if version not in ALL_VERSIONS:
- raise ArchiveMigrationError(f"Unknown target version '{version}'")
-
- # if we are already at the desired version, then no migration is required
- if current_version == version:
- if inpath != outpath:
- if outpath.exists() and force:
- outpath.unlink()
- shutil.copyfile(inpath, outpath)
- return
-
- # the file should be either a tar (legacy only) or zip file
- if tarfile.is_tarfile(str(inpath)):
- is_tar = True
- elif zipfile.is_zipfile(str(inpath)):
- is_tar = False
- else:
- raise CorruptArchive(f'The input file is neither a tar nor a zip file: {inpath}')
-
- # read the metadata.json which should always be present
- metadata = _read_json(inpath, 'metadata.json', is_tar)
- # data.json will only be read from legacy archives
- data: Optional[Dict[str, Any]] = None
-
- # if the archive is a "legacy" format, i.e. has a data.json file, migrate to latest one
- if current_version in LEGACY_MIGRATE_FUNCTIONS:
- MIGRATE_LOGGER.report('Legacy migrations required')
- MIGRATE_LOGGER.report('Extracting data.json ...')
- # read the data.json file
- data = _read_json(inpath, 'data.json', is_tar)
- to_version = FINAL_LEGACY_VERSION if version not in LEGACY_MIGRATE_FUNCTIONS else version
- current_version = _perform_legacy_migrations(current_version, to_version, metadata, data)
-
- if current_version == version:
- # create new legacy archive with updated metadata & data
- def path_callback(inpath, outpath) -> bool:
- if inpath.name == 'metadata.json':
- outpath.write_text(json.dumps(metadata))
- return True
- if inpath.name == 'data.json':
- outpath.write_text(json.dumps(data))
- return True
- return False
-
- func = copy_tar_to_zip if is_tar else copy_zip_to_zip
-
- func(
- inpath,
- outpath,
- path_callback,
- overwrite=force,
- compression=compression,
- title='Writing migrated legacy archive',
- info_order=('metadata.json', 'data.json')
- )
- return
-
- with tempfile.TemporaryDirectory() as tmpdirname:
-
- if current_version == FINAL_LEGACY_VERSION:
- MIGRATE_LOGGER.report('aiida-core v1 -> v2 migration required')
- if data is None:
- MIGRATE_LOGGER.report('Extracting data.json ...')
- data = _read_json(inpath, 'data.json', is_tar)
- current_version = perform_v1_migration(
- inpath, Path(tmpdirname), 'new.zip', is_tar, metadata, data, compression
- )
-
- if not current_version == version:
- raise ArchiveMigrationError(f"Migration from '{current_version}' -> '{version}' failed")
-
- if outpath.exists() and force:
- outpath.unlink()
- shutil.move(Path(tmpdirname) / 'new.zip', outpath) # type: ignore[arg-type]
-
-
-def _read_json(inpath: Path, filename: str, is_tar: bool) -> Dict[str, Any]:
- """Read a JSON file from the archive."""
- if is_tar:
- with open_file_in_tar(inpath, filename) as handle:
- data = json.load(handle)
- else:
- with open_file_in_zip(inpath, filename) as handle:
- data = json.load(handle)
- return data
-
-
-def _perform_legacy_migrations(current_version: str, to_version: str, metadata: dict, data: dict) -> str:
- """Perform legacy migrations from the current version to the desired version.
-
- Legacy archives use the old ``data.json`` format for storing the database.
- These migrations simply manipulate the metadata and data in-place.
-
- :param current_version: current version of the archive
- :param to_version: version to migrate to
- :param metadata: the metadata to migrate
- :param data: the data to migrate
- :return: the new version of the archive
- """
- # compute the migration pathway
- prev_version = current_version
- pathway: List[str] = []
- while prev_version != to_version:
- if prev_version not in LEGACY_MIGRATE_FUNCTIONS:
- raise ArchiveMigrationError(f"No migration pathway available for '{current_version}' to '{to_version}'")
- if prev_version in pathway:
- raise ArchiveMigrationError(
- f'cyclic migration pathway encountered: {" -> ".join(pathway + [prev_version])}'
- )
- pathway.append(prev_version)
- prev_version = LEGACY_MIGRATE_FUNCTIONS[prev_version][0]
-
- if not pathway:
- MIGRATE_LOGGER.report('No migration required')
- return to_version
-
- MIGRATE_LOGGER.report('Legacy migration pathway: %s', ' -> '.join(pathway + [to_version]))
-
- with get_progress_reporter()(total=len(pathway), desc='Performing migrations: ') as progress:
- for from_version in pathway:
- to_version = LEGACY_MIGRATE_FUNCTIONS[from_version][0]
- progress.set_description_str(f'Performing migrations: {from_version} -> {to_version}', refresh=True)
- LEGACY_MIGRATE_FUNCTIONS[from_version][1](metadata, data)
- progress.update()
-
- return to_version
diff --git a/aiida/tools/archive/implementations/sqlite/migrations/utils.py b/aiida/tools/archive/implementations/sqlite/migrations/utils.py
deleted file mode 100644
index e769de1bd4..0000000000
--- a/aiida/tools/archive/implementations/sqlite/migrations/utils.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# -*- coding: utf-8 -*-
-###########################################################################
-# Copyright (c), The AiiDA team. All rights reserved. #
-# This file is part of the AiiDA code. #
-# #
-# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
-# For further information on the license, see the LICENSE.txt file #
-# For further information please visit http://www.aiida.net #
-###########################################################################
-"""Utility functions for migration of export-files."""
-
-from aiida.tools.archive import exceptions
-
-
-def verify_metadata_version(metadata, version=None):
- """Utility function to verify that the metadata has the correct version number.
-
- If no version number is passed, it will just extract the version number and return it.
-
- :param metadata: the content of an export archive metadata.json file
- :param version: string version number that the metadata is expected to have
- """
- try:
- metadata_version = metadata['export_version']
- except KeyError:
- raise exceptions.ArchiveMigrationError("metadata is missing the 'export_version' key")
-
- if version is None:
- return metadata_version
-
- if metadata_version != version:
- raise exceptions.MigrationValidationError(
- f'expected archive file with version {version} but found version {metadata_version}'
- )
-
- return None
-
-
-def update_metadata(metadata, version):
- """Update the metadata with a new version number and a notification of the conversion that was executed.
-
- :param metadata: the content of an export archive metadata.json file
- :param version: string version number that the updated metadata should get
- """
- from aiida import get_version
-
- old_version = metadata['export_version']
- conversion_info = metadata.get('conversion_info', [])
-
- conversion_message = f'Converted from version {old_version} to {version} with AiiDA v{get_version()}'
- conversion_info.append(conversion_message)
-
- metadata['aiida_version'] = get_version()
- metadata['export_version'] = version
- metadata['conversion_info'] = conversion_info
-
-
-def remove_fields(metadata, data, entities, fields):
- """Remove fields under entities from data.json and metadata.json.
-
- :param metadata: the content of an export archive metadata.json file
- :param data: the content of an export archive data.json file
- :param entities: list of ORM entities
- :param fields: list of fields to be removed from the export archive files
- """
- # data.json
- for entity in entities:
- for content in data['export_data'].get(entity, {}).values():
- for field in fields:
- content.pop(field, None)
-
- # metadata.json
- for entity in entities:
- for field in fields:
- metadata['all_fields_info'][entity].pop(field, None)
diff --git a/aiida/tools/archive/implementations/sqlite/migrations/v1_db_schema.py b/aiida/tools/archive/implementations/sqlite/migrations/v1_db_schema.py
deleted file mode 100644
index 30baf8709f..0000000000
--- a/aiida/tools/archive/implementations/sqlite/migrations/v1_db_schema.py
+++ /dev/null
@@ -1,169 +0,0 @@
-# -*- coding: utf-8 -*-
-###########################################################################
-# Copyright (c), The AiiDA team. All rights reserved. #
-# This file is part of the AiiDA code. #
-# #
-# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
-# For further information on the license, see the LICENSE.txt file #
-# For further information please visit http://www.aiida.net #
-###########################################################################
-"""This is the sqlite DB schema, coresponding to the 34a831f4286d main DB revision.
-
-For normal operation of the archive,
-we auto-generate the schema from the models in ``aiida.storage.psql_dos.models``.
-However, when migrating an archive from the old format, we require a fixed revision of the schema.
-
-The only difference between the PostGreSQL schema and SQLite one,
-is the replacement of ``JSONB`` with ``JSON``, and ``UUID`` with ``CHAR(36)``.
-"""
-from sqlalchemy import ForeignKey, orm
-from sqlalchemy.dialects.sqlite import JSON
-from sqlalchemy.schema import Column, Index, UniqueConstraint
-from sqlalchemy.types import CHAR, Boolean, DateTime, Integer, String, Text
-
-ArchiveV1Base = orm.declarative_base()
-
-
-class DbAuthInfo(ArchiveV1Base):
- """Class that keeps the authernification data."""
-
- __tablename__ = 'db_dbauthinfo'
- __table_args__ = (UniqueConstraint('aiidauser_id', 'dbcomputer_id'),)
-
- id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
- aiidauser_id = Column(
- Integer, ForeignKey('db_dbuser.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED')
- )
- dbcomputer_id = Column(
- Integer, ForeignKey('db_dbcomputer.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED')
- )
- _metadata = Column('metadata', JSON)
- auth_params = Column(JSON)
- enabled = Column(Boolean, default=True)
-
-
-class DbComment(ArchiveV1Base):
- """Class to store comments."""
-
- __tablename__ = 'db_dbcomment'
-
- id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
- uuid = Column(CHAR(36), unique=True)
- dbnode_id = Column(Integer, ForeignKey('db_dbnode.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'))
- ctime = Column(DateTime(timezone=True))
- mtime = Column(DateTime(timezone=True))
- user_id = Column(Integer, ForeignKey('db_dbuser.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'))
- content = Column(Text, nullable=True)
-
-
-class DbComputer(ArchiveV1Base):
- """Class to store computers."""
- __tablename__ = 'db_dbcomputer'
-
- id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
- uuid = Column(CHAR(36), unique=True)
- label = Column(String(255), unique=True, nullable=False)
- hostname = Column(String(255))
- description = Column(Text, nullable=True)
- scheduler_type = Column(String(255))
- transport_type = Column(String(255))
- _metadata = Column('metadata', JSON)
-
-
-class DbGroupNodes(ArchiveV1Base):
- """Class to store join table for group -> nodes."""
-
- __tablename__ = 'db_dbgroup_dbnodes'
- __table_args__ = (UniqueConstraint('dbgroup_id', 'dbnode_id', name='db_dbgroup_dbnodes_dbgroup_id_dbnode_id_key'),)
-
- id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
- dbnode_id = Column(Integer, ForeignKey('db_dbnode.id', deferrable=True, initially='DEFERRED'))
- dbgroup_id = Column(Integer, ForeignKey('db_dbgroup.id', deferrable=True, initially='DEFERRED'))
-
-
-class DbGroup(ArchiveV1Base):
- """Class to store groups."""
-
- __tablename__ = 'db_dbgroup'
- __table_args__ = (UniqueConstraint('label', 'type_string'),)
-
- id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
- uuid = Column(CHAR(36), unique=True)
- label = Column(String(255), index=True)
- type_string = Column(String(255), default='', index=True)
- time = Column(DateTime(timezone=True))
- description = Column(Text, nullable=True)
- extras = Column(JSON, default=dict, nullable=False)
- user_id = Column(Integer, ForeignKey('db_dbuser.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'))
-
- Index('db_dbgroup_dbnodes_dbnode_id_idx', DbGroupNodes.dbnode_id)
- Index('db_dbgroup_dbnodes_dbgroup_id_idx', DbGroupNodes.dbgroup_id)
-
-
-class DbLog(ArchiveV1Base):
- """Class to store logs."""
-
- __tablename__ = 'db_dblog'
-
- id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
- uuid = Column(CHAR(36), unique=True)
- time = Column(DateTime(timezone=True))
- loggername = Column(String(255), index=True)
- levelname = Column(String(255), index=True)
- dbnode_id = Column(
- Integer, ForeignKey('db_dbnode.id', deferrable=True, initially='DEFERRED', ondelete='CASCADE'), nullable=False
- )
- message = Column(Text(), nullable=True)
- _metadata = Column('metadata', JSON)
-
-
-class DbNode(ArchiveV1Base):
- """Class to store nodes."""
-
- __tablename__ = 'db_dbnode'
-
- id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
- uuid = Column(CHAR(36), unique=True)
- node_type = Column(String(255), index=True)
- process_type = Column(String(255), index=True)
- label = Column(String(255), index=True, nullable=True, default='')
- description = Column(Text(), nullable=True, default='')
- ctime = Column(DateTime(timezone=True))
- mtime = Column(DateTime(timezone=True))
- attributes = Column(JSON)
- extras = Column(JSON)
- repository_metadata = Column(JSON, nullable=False, default=dict, server_default='{}')
- dbcomputer_id = Column(
- Integer,
- ForeignKey('db_dbcomputer.id', deferrable=True, initially='DEFERRED', ondelete='RESTRICT'),
- nullable=True
- )
- user_id = Column(
- Integer, ForeignKey('db_dbuser.id', deferrable=True, initially='DEFERRED', ondelete='restrict'), nullable=False
- )
-
-
-class DbLink(ArchiveV1Base):
- """Class to store links between nodes."""
-
- __tablename__ = 'db_dblink'
-
- id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
- input_id = Column(Integer, ForeignKey('db_dbnode.id', deferrable=True, initially='DEFERRED'), index=True)
- output_id = Column(
- Integer, ForeignKey('db_dbnode.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'), index=True
- )
- label = Column(String(255), index=True, nullable=False)
- type = Column(String(255), index=True)
-
-
-class DbUser(ArchiveV1Base):
- """Class to store users."""
-
- __tablename__ = 'db_dbuser'
-
- id = Column(Integer, primary_key=True) # pylint: disable=invalid-name
- email = Column(String(254), unique=True, index=True)
- first_name = Column(String(254), nullable=True)
- last_name = Column(String(254), nullable=True)
- institution = Column(String(254), nullable=True)
diff --git a/aiida/tools/archive/implementations/sqlite/reader.py b/aiida/tools/archive/implementations/sqlite/reader.py
deleted file mode 100644
index f3cdebbe74..0000000000
--- a/aiida/tools/archive/implementations/sqlite/reader.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# -*- coding: utf-8 -*-
-###########################################################################
-# Copyright (c), The AiiDA team. All rights reserved. #
-# This file is part of the AiiDA code. #
-# #
-# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
-# For further information on the license, see the LICENSE.txt file #
-# For further information please visit http://www.aiida.net #
-###########################################################################
-"""AiiDA archive reader implementation."""
-import json
-from pathlib import Path
-import tarfile
-from typing import Any, Dict, Optional, Union
-import zipfile
-
-from archive_path import read_file_in_tar, read_file_in_zip
-
-from aiida.manage import Profile
-from aiida.tools.archive.abstract import ArchiveReaderAbstract
-from aiida.tools.archive.exceptions import CorruptArchive, UnreadableArchiveError
-
-from . import backend as db
-from .common import META_FILENAME
-
-
-class ArchiveReaderSqlZip(ArchiveReaderAbstract):
- """An archive reader for the SQLite format."""
-
- def __init__(self, path: Union[str, Path], **kwargs: Any):
- super().__init__(path, **kwargs)
- self._in_context = False
- # we lazily create the storage backend, then clean up on exit
- self._backend: Optional[db.ArchiveReadOnlyBackend] = None
-
- def __enter__(self) -> 'ArchiveReaderSqlZip':
- self._in_context = True
- return self
-
- def __exit__(self, *args, **kwargs) -> None:
- """Close the archive backend."""
- super().__exit__(*args, **kwargs)
- if self._backend:
- self._backend.close()
- self._backend = None
- self._in_context = False
-
- def get_metadata(self) -> Dict[str, Any]:
- try:
- return extract_metadata(self.path)
- except Exception as exc:
- raise CorruptArchive('metadata could not be read') from exc
-
- def get_backend(self) -> db.ArchiveReadOnlyBackend:
- if not self._in_context:
- raise AssertionError('Not in context')
- if self._backend is not None:
- return self._backend
- profile = Profile(
- 'default', {
- 'storage': {
- 'backend': 'archive.sqlite',
- 'config': {
- 'path': str(self.path)
- }
- },
- 'process_control': {
- 'backend': 'null',
- 'config': {}
- }
- }
- )
- self._backend = db.ArchiveReadOnlyBackend(profile)
- return self._backend
-
-
-def extract_metadata(path: Union[str, Path], search_limit: Optional[int] = 10) -> Dict[str, Any]:
- """Extract the metadata dictionary from the archive"""
- # we fail if not one of the first record in central directory (as expected)
- # so we don't have to iter all repo files to fail
- return json.loads(read_file_in_zip(path, META_FILENAME, 'utf8', search_limit=search_limit))
-
-
-def read_version(path: Union[str, Path]) -> str:
- """Read the version of the archive from the file.
-
- Intended to work for all versions of the archive format.
-
- :param path: archive path
-
- :raises: ``FileNotFoundError`` if the file does not exist
- :raises: ``UnreadableArchiveError`` if a version cannot be read from the archive
- """
- path = Path(path)
- if not path.is_file():
- raise FileNotFoundError('archive file not found')
- # check the file is at least a zip or tar file
- if zipfile.is_zipfile(path):
- try:
- metadata = extract_metadata(path, search_limit=None)
- except Exception as exc:
- raise UnreadableArchiveError(f'Could not read metadata for version: {exc}') from exc
- elif tarfile.is_tarfile(path):
- try:
- metadata = json.loads(read_file_in_tar(path, META_FILENAME))
- except Exception as exc:
- raise UnreadableArchiveError(f'Could not read metadata for version: {exc}') from exc
- else:
- raise UnreadableArchiveError('Not a zip or tar file')
- if 'export_version' in metadata:
- return metadata['export_version']
- raise UnreadableArchiveError("Metadata does not contain 'export_version' key")
diff --git a/aiida/tools/archive/implementations/sqlite/__init__.py b/aiida/tools/archive/implementations/sqlite_zip/__init__.py
similarity index 100%
rename from aiida/tools/archive/implementations/sqlite/__init__.py
rename to aiida/tools/archive/implementations/sqlite_zip/__init__.py
diff --git a/aiida/tools/archive/implementations/sqlite/main.py b/aiida/tools/archive/implementations/sqlite_zip/main.py
similarity index 88%
rename from aiida/tools/archive/implementations/sqlite/main.py
rename to aiida/tools/archive/implementations/sqlite_zip/main.py
index 85b1242991..a86dc5dff1 100644
--- a/aiida/tools/archive/implementations/sqlite/main.py
+++ b/aiida/tools/archive/implementations/sqlite_zip/main.py
@@ -9,12 +9,13 @@
###########################################################################
"""The file format implementation"""
from pathlib import Path
-from typing import Any, List, Literal, Union, overload
+from typing import Any, Literal, Union, overload
+from aiida.storage.sqlite_zip.migrator import get_schema_version_head, migrate
+from aiida.storage.sqlite_zip.utils import read_version
from aiida.tools.archive.abstract import ArchiveFormatAbstract
-from .migrations.main import ALL_VERSIONS, migrate
-from .reader import ArchiveReaderSqlZip, read_version
+from .reader import ArchiveReaderSqlZip
from .writer import ArchiveAppenderSqlZip, ArchiveWriterSqlZip
__all__ = ('ArchiveFormatSqlZip',)
@@ -36,8 +37,8 @@ class ArchiveFormatSqlZip(ArchiveFormatAbstract):
"""
@property
- def versions(self) -> List[str]:
- return ALL_VERSIONS
+ def latest_version(self) -> str:
+ return get_schema_version_head()
def read_version(self, path: Union[str, Path]) -> str:
return read_version(path)
@@ -106,5 +107,4 @@ def migrate(
:param path: archive path
"""
- current_version = self.read_version(inpath)
- return migrate(inpath, outpath, current_version, version, force=force, compression=compression)
+ return migrate(inpath, outpath, version, force=force, compression=compression)
diff --git a/aiida/tools/archive/implementations/sqlite_zip/reader.py b/aiida/tools/archive/implementations/sqlite_zip/reader.py
new file mode 100644
index 0000000000..e5b73c18e4
--- /dev/null
+++ b/aiida/tools/archive/implementations/sqlite_zip/reader.py
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved. #
+# This file is part of the AiiDA code. #
+# #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file #
+# For further information please visit http://www.aiida.net #
+###########################################################################
+"""AiiDA archive reader implementation."""
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+
+from aiida.common.exceptions import CorruptStorage
+from aiida.storage.sqlite_zip.backend import SqliteZipBackend
+from aiida.storage.sqlite_zip.utils import extract_metadata
+from aiida.tools.archive.abstract import ArchiveReaderAbstract
+
+
+class ArchiveReaderSqlZip(ArchiveReaderAbstract):
+ """An archive reader for the SQLite format."""
+
+ def __init__(self, path: Union[str, Path], **kwargs: Any):
+ super().__init__(path, **kwargs)
+ self._in_context = False
+ # we lazily create the storage backend, then clean up on exit
+ self._backend: Optional[SqliteZipBackend] = None
+
+ def __enter__(self) -> 'ArchiveReaderSqlZip':
+ self._in_context = True
+ return self
+
+ def __exit__(self, *args, **kwargs) -> None:
+ """Close the archive backend."""
+ super().__exit__(*args, **kwargs)
+ if self._backend:
+ self._backend.close()
+ self._backend = None
+ self._in_context = False
+
+ def get_metadata(self) -> Dict[str, Any]:
+ try:
+ return extract_metadata(self.path)
+ except Exception as exc:
+ raise CorruptStorage('metadata could not be read') from exc
+
+ def get_backend(self) -> SqliteZipBackend:
+ if not self._in_context:
+ raise AssertionError('Not in context')
+ if self._backend is not None:
+ return self._backend
+ profile = SqliteZipBackend.create_profile(self.path)
+ self._backend = SqliteZipBackend(profile)
+ return self._backend
diff --git a/aiida/tools/archive/implementations/sqlite/writer.py b/aiida/tools/archive/implementations/sqlite_zip/writer.py
similarity index 86%
rename from aiida/tools/archive/implementations/sqlite/writer.py
rename to aiida/tools/archive/implementations/sqlite_zip/writer.py
index 4ee7358c84..2e4315b1da 100644
--- a/aiida/tools/archive/implementations/sqlite/writer.py
+++ b/aiida/tools/archive/implementations/sqlite_zip/writer.py
@@ -9,7 +9,6 @@
###########################################################################
"""AiiDA archive writer implementation."""
from datetime import datetime
-import functools
import hashlib
from io import BytesIO
import json
@@ -20,46 +19,24 @@
import zipfile
from archive_path import NOTSET, ZipPath, extract_file_in_zip, read_file_in_zip
-from sqlalchemy import insert, inspect
+from sqlalchemy import insert
from sqlalchemy.exc import IntegrityError as SqlaIntegrityError
from sqlalchemy.future.engine import Connection
from aiida import get_version
-from aiida.common.exceptions import IntegrityError
+from aiida.common.exceptions import CorruptStorage, IncompatibleStorageSchema, IntegrityError
from aiida.common.hashing import chunked_file_hash
from aiida.common.progress_reporter import get_progress_reporter
from aiida.orm.entities import EntityTypes
+from aiida.storage.sqlite_zip import models, utils
from aiida.tools.archive.abstract import ArchiveFormatAbstract, ArchiveWriterAbstract
-from aiida.tools.archive.exceptions import CorruptArchive, IncompatibleArchiveVersionError
-
-from . import backend as db
-from .common import DB_FILENAME, META_FILENAME, REPO_FOLDER, create_sqla_engine
-
-
-@functools.lru_cache(maxsize=10)
-def _get_model_from_entity(entity_type: EntityTypes):
- """Return the Sqlalchemy model and column names corresponding to the given entity."""
- model = {
- EntityTypes.USER: db.DbUser,
- EntityTypes.AUTHINFO: db.DbAuthInfo,
- EntityTypes.GROUP: db.DbGroup,
- EntityTypes.NODE: db.DbNode,
- EntityTypes.COMMENT: db.DbComment,
- EntityTypes.COMPUTER: db.DbComputer,
- EntityTypes.LOG: db.DbLog,
- EntityTypes.LINK: db.DbLink,
- EntityTypes.GROUP_NODE: db.DbGroupNodes
- }[entity_type]
- mapper = inspect(model).mapper
- column_names = {col.name for col in mapper.c.values()}
- return model, column_names
class ArchiveWriterSqlZip(ArchiveWriterAbstract):
"""AiiDA archive writer implementation."""
- meta_name = META_FILENAME
- db_name = DB_FILENAME
+ meta_name = utils.META_FILENAME
+ db_name = utils.DB_FILENAME
def __init__(
self,
@@ -106,10 +83,10 @@ def __enter__(self) -> 'ArchiveWriterSqlZip':
info_order=(self.meta_name, self.db_name),
name_to_info=self._central_dir,
)
- engine = create_sqla_engine(
+ engine = utils.create_sqla_engine(
self._work_dir / self.db_name, enforce_foreign_keys=self._enforce_foreign_keys, echo=self._debug
)
- db.ArchiveDbBase.metadata.create_all(engine)
+ models.SqliteBase.metadata.create_all(engine)
self._conn = engine.connect()
self._in_context = True
return self
@@ -150,7 +127,7 @@ def bulk_insert(
return
self._assert_in_context()
assert self._conn is not None
- model, col_keys = _get_model_from_entity(entity_type)
+ model, col_keys = models.get_model_from_entity(entity_type)
if allow_defaults:
for row in rows:
if not col_keys.issuperset(row):
@@ -197,8 +174,8 @@ def put_object(self, stream: BinaryIO, *, buffer_size: Optional[int] = None, key
if key is None:
key = chunked_file_hash(stream, hashlib.sha256)
stream.seek(0)
- if f'{REPO_FOLDER}/{key}' not in self._central_dir:
- self._stream_binary(f'{REPO_FOLDER}/{key}', stream, buffer_size=buffer_size)
+ if f'{utils.REPO_FOLDER}/{key}' not in self._central_dir:
+ self._stream_binary(f'{utils.REPO_FOLDER}/{key}', stream, buffer_size=buffer_size)
return key
def delete_object(self, key: str) -> None:
@@ -210,9 +187,9 @@ class ArchiveAppenderSqlZip(ArchiveWriterSqlZip):
def delete_object(self, key: str) -> None:
self._assert_in_context()
- if f'{REPO_FOLDER}/{key}' in self._central_dir:
+ if f'{utils.REPO_FOLDER}/{key}' in self._central_dir:
raise IOError(f'Cannot delete object {key!r} that has been added in the same append context')
- self._deleted_paths.add(f'{REPO_FOLDER}/{key}')
+ self._deleted_paths.add(f'{utils.REPO_FOLDER}/{key}')
def __enter__(self) -> 'ArchiveAppenderSqlZip':
"""Start appending to the archive"""
@@ -222,11 +199,11 @@ def __enter__(self) -> 'ArchiveAppenderSqlZip':
# the file should be an archive with the correct version
version = self._format.read_version(self._path)
if not version == self._format.latest_version:
- raise IncompatibleArchiveVersionError(
+ raise IncompatibleStorageSchema(
f'Archive is version {version!r} but expected {self._format.latest_version!r}'
)
# load the metadata
- self._metadata = json.loads(read_file_in_zip(self._path, META_FILENAME, 'utf8', search_limit=4))
+ self._metadata = json.loads(read_file_in_zip(self._path, utils.META_FILENAME, 'utf8', search_limit=4))
# overwrite metadata
self._metadata['mtime'] = datetime.now().isoformat()
self._metadata['compression'] = self._compression
@@ -247,11 +224,11 @@ def __enter__(self) -> 'ArchiveAppenderSqlZip':
db_file = self._work_dir / self.db_name
with db_file.open('wb') as handle:
try:
- extract_file_in_zip(self.path, DB_FILENAME, handle, search_limit=4)
+ extract_file_in_zip(self.path, utils.DB_FILENAME, handle, search_limit=4)
except Exception as exc:
- raise CorruptArchive(f'database could not be read: {exc}') from exc
+ raise CorruptStorage(f'archive database could not be read: {exc}') from exc
# open a connection to the database
- engine = create_sqla_engine(
+ engine = utils.create_sqla_engine(
self._work_dir / self.db_name, enforce_foreign_keys=self._enforce_foreign_keys, echo=self._debug
)
# to-do could check that the database has correct schema:
diff --git a/aiida/tools/archive/imports.py b/aiida/tools/archive/imports.py
index 7c0b002bf9..f5b0f332e6 100644
--- a/aiida/tools/archive/imports.py
+++ b/aiida/tools/archive/imports.py
@@ -16,6 +16,7 @@
from aiida import orm
from aiida.common import timezone
+from aiida.common.exceptions import IncompatibleStorageSchema
from aiida.common.lang import type_check
from aiida.common.links import LinkType
from aiida.common.log import AIIDA_LOGGER
@@ -28,8 +29,8 @@
from .abstract import ArchiveFormatAbstract
from .common import batch_iter, entity_type_to_orm
-from .exceptions import ImportTestRun, ImportUniquenessError, ImportValidationError, IncompatibleArchiveVersionError
-from .implementations.sqlite import ArchiveFormatSqlZip
+from .exceptions import ImportTestRun, ImportUniquenessError, ImportValidationError
+from .implementations.sqlite_zip import ArchiveFormatSqlZip
__all__ = ('IMPORT_LOGGER', 'import_archive')
@@ -95,10 +96,9 @@ def import_archive(
:returns: Primary Key of the import Group
- :raises `~aiida.tools.archive.exceptions.IncompatibleArchiveVersionError`: if the provided archive's
- version is not equal to the version of AiiDA at the moment of import.
- :raises `~aiida.tools.archive.exceptions.ImportValidationError`: if parameters or the contents of
- :raises `~aiida.tools.archive.exceptions.CorruptArchive`: if the provided archive cannot be read.
+ :raises `~aiida.common.exceptions.CorruptStorage`: if the provided archive cannot be read.
+ :raises `~aiida.common.exceptions.IncompatibleStorageSchema`: if the archive version is not at head.
+ :raises `~aiida.tools.archive.exceptions.ImportValidationError`: if invalid entities are found in the archive.
:raises `~aiida.tools.archive.exceptions.ImportUniquenessError`: if a new unique entity can not be created.
"""
archive_format = archive_format or ArchiveFormatSqlZip()
@@ -126,9 +126,9 @@ def import_archive(
# i.e. its not whether the version is the latest that matters, it is that it is compatible with the backend version
# its a bit weird at the moment because django/sqlalchemy have different versioning
if not archive_format.read_version(path) == archive_format.latest_version:
- raise IncompatibleArchiveVersionError(
- f'The archive version {archive_format.read_version(path)} '
- f'is not the latest version {archive_format.latest_version}'
+ raise IncompatibleStorageSchema(
+ f'The archive version {archive_format.read_version(path)!r} '
+ f'is not the latest version {archive_format.latest_version!r}'
)
IMPORT_LOGGER.report(
diff --git a/docs/source/nitpick-exceptions b/docs/source/nitpick-exceptions
index bc87aa31aa..0464c9e354 100644
--- a/docs/source/nitpick-exceptions
+++ b/docs/source/nitpick-exceptions
@@ -52,6 +52,7 @@ py:class SelfType
py:class Profile
py:class PsqlDosBackend
py:class str | list[str]
+py:class str | Path
### AiiDA
@@ -70,7 +71,6 @@ py:class aiida.tools.groups.paths.WalkNodeResult
py:meth aiida.orm.groups.GroupCollection.delete
py:class AbstractRepositoryBackend
-py:class Backend
py:class BackendEntity
py:class BackendEntityType
py:class BackendNode
@@ -116,6 +116,7 @@ py:class ReturnType
py:class Runner
py:class Scheduler
py:class SelfType
+py:class StorageBackend
py:class TransactionType
py:class Transport
py:class TransportQueue
@@ -127,6 +128,7 @@ py:class orm.implementation.Backend
py:class aiida.common.exceptions.UnreachableStorage
py:class aiida.common.exceptions.IncompatibleDatabaseSchema
py:class aiida.common.exceptions.DatabaseMigrationError
+py:class aiida.storage.sqlite_zip.models.DbGroupNode
py:class AuthInfoCollection
py:class CommentCollection
diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst
index 7043a13b40..a01950b053 100644
--- a/docs/source/reference/command_line.rst
+++ b/docs/source/reference/command_line.rst
@@ -25,10 +25,11 @@ Below is a list with all available subcommands.
--help Show this message and exit.
Commands:
- create Write subsets of the provenance graph to a single file.
- import Import data from an AiiDA archive file.
- inspect Inspect contents of an archive without importing it.
- migrate Migrate an export archive to a more recent format version.
+ create Create an archive from all or part of a profiles's data.
+ import Import archived data to a profile.
+ info Summarise the contents of an archive.
+ migrate Migrate an archive to a more recent schema version.
+ version Print the current version of an archive's schema.
.. _reference:command-line:verdi-calcjob:
diff --git a/tests/cmdline/commands/test_archive_create.py b/tests/cmdline/commands/test_archive_create.py
index 51a2229ce0..9f4ebf5cb9 100644
--- a/tests/cmdline/commands/test_archive_create.py
+++ b/tests/cmdline/commands/test_archive_create.py
@@ -7,7 +7,7 @@
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
-"""Tests for `verdi export`."""
+"""Tests for `verdi archive`."""
import shutil
import zipfile
@@ -15,6 +15,7 @@
from aiida.cmdline.commands import cmd_archive
from aiida.orm import Code, Computer, Dict, Group
+from aiida.storage.sqlite_zip.migrator import list_versions
from aiida.tools.archive import ArchiveFormatSqlZip
from tests.utils.archives import get_archive_file
@@ -85,10 +86,10 @@ def test_create_basic(run_cli_command, tmp_path):
assert archive.querybuilder().append(Dict, project=['uuid']).all(flat=True) == [node.uuid]
-@pytest.mark.parametrize('version', ArchiveFormatSqlZip().versions[:-1])
+@pytest.mark.parametrize('version', ('0.4', '0.5', '0.6', '0.7', '0.8', '0.9', '0.10', '0.11', '0.12'))
def test_migrate_versions_old(run_cli_command, tmp_path, version):
"""Migrating archives with a version older than the current should work."""
- archive = f'export_v{version}_simple.aiida'
+ archive = f'export_{version}_simple.aiida'
filename_input = get_archive_file(archive, filepath='export/migrate')
filename_output = tmp_path / 'archive.aiida'
@@ -100,7 +101,7 @@ def test_migrate_versions_old(run_cli_command, tmp_path, version):
def test_migrate_version_specific(run_cli_command, tmp_path):
"""Test the `-v/--version` option to migrate to a specific version instead of the latest."""
- archive = 'export_v0.5_simple.aiida'
+ archive = 'export_0.5_simple.aiida'
target_version = '0.8'
filename_input = get_archive_file(archive, filepath='export/migrate')
filename_output = tmp_path / 'archive.aiida'
@@ -117,7 +118,7 @@ def test_migrate_file_already_exists(run_cli_command, tmp_path):
"""Test that using a file that already exists will raise."""
outpath = tmp_path / 'archive.aiida'
outpath.touch()
- filename_input = get_archive_file('export_v0.6_simple.aiida', filepath='export/migrate')
+ filename_input = get_archive_file('export_0.6_simple.aiida', filepath='export/migrate')
options = [filename_input, outpath]
run_cli_command(cmd_archive.migrate, options, raises=True)
@@ -126,7 +127,7 @@ def test_migrate_force(run_cli_command, tmp_path):
"""Test that using a file that already exists will work when the ``-f/--force`` parameter is used."""
outpath = tmp_path / 'archive.aiida'
outpath.touch()
- filename_input = get_archive_file('export_v0.6_simple.aiida', filepath='export/migrate')
+ filename_input = get_archive_file('export_0.6_simple.aiida', filepath='export/migrate')
options = ['--force', filename_input, outpath]
run_cli_command(cmd_archive.migrate, options)
assert ArchiveFormatSqlZip().read_version(outpath) == ArchiveFormatSqlZip().latest_version
@@ -134,7 +135,7 @@ def test_migrate_force(run_cli_command, tmp_path):
def test_migrate_in_place(run_cli_command, tmp_path):
"""Test that passing the -i/--in-place option will overwrite the passed file."""
- archive = 'export_v0.6_simple.aiida'
+ archive = 'export_0.6_simple.aiida'
target_version = '0.8'
filename_input = get_archive_file(archive, filepath='export/migrate')
filename_clone = tmp_path / 'archive.aiida'
@@ -166,7 +167,7 @@ def test_migrate_low_verbosity(run_cli_command, tmp_path):
Note that we use the ``config_with_profile`` fixture to create a dummy profile, since the ``--verbosity`` option
will change the profile configuration which could potentially influence the other tests.
"""
- filename_input = get_archive_file('export_v0.6_simple.aiida', filepath='export/migrate')
+ filename_input = get_archive_file('export_0.6_simple.aiida', filepath='export/migrate')
filename_output = tmp_path / 'archive.aiida'
options = ['--verbosity', 'WARNING', filename_input, filename_output]
@@ -177,36 +178,36 @@ def test_migrate_low_verbosity(run_cli_command, tmp_path):
assert ArchiveFormatSqlZip().read_version(filename_output) == ArchiveFormatSqlZip().latest_version
-@pytest.mark.parametrize('version', ArchiveFormatSqlZip().versions)
-def test_inspect_version(run_cli_command, version):
- """Test the functionality of `verdi export inspect --version`."""
- archive = f'export_v{version}_simple.aiida'
+@pytest.mark.parametrize('version', [v for v in list_versions() if v not in ('main_0000a', 'main_0000b')])
+def test_version(run_cli_command, version):
+ """Test the functionality of `verdi archive version`."""
+ archive = f'export_{version}_simple.aiida'
filename_input = get_archive_file(archive, filepath='export/migrate')
- options = ['--version', filename_input]
- result = run_cli_command(cmd_archive.inspect, options)
- assert result.output.strip() == f'{version}'
+ options = [filename_input]
+ result = run_cli_command(cmd_archive.archive_version, options)
+ assert version in result.output
-def test_inspect_metadata(run_cli_command):
- """Test the functionality of `verdi export inspect --meta-data`."""
- archive = f'export_v{ArchiveFormatSqlZip().latest_version}_simple.aiida'
+def test_info(run_cli_command):
+ """Test the functionality of `verdi archive info`."""
+ archive = f'export_{ArchiveFormatSqlZip().latest_version}_simple.aiida'
filename_input = get_archive_file(archive, filepath='export/migrate')
- options = ['--meta-data', filename_input]
- result = run_cli_command(cmd_archive.inspect, options)
+ options = [filename_input]
+ result = run_cli_command(cmd_archive.archive_info, options)
assert 'export_version' in result.output
-def test_inspect_database(run_cli_command):
- """Test the functionality of `verdi export inspect --meta-data`."""
- archive = f'export_v{ArchiveFormatSqlZip().latest_version}_simple.aiida'
+def test_info_detailed(run_cli_command):
+ """Test the functionality of `verdi archive info --statistics`."""
+ archive = f'export_{ArchiveFormatSqlZip().latest_version}_simple.aiida'
filename_input = get_archive_file(archive, filepath='export/migrate')
- options = ['--database', filename_input]
- result = run_cli_command(cmd_archive.inspect, options)
+ options = ['--statistics', filename_input]
+ result = run_cli_command(cmd_archive.archive_info, options)
assert 'Nodes:' in result.output
-def test_inspect_empty_archive(run_cli_command):
- """Test the functionality of `verdi export inspect` for an empty archive."""
+def test_info_empty_archive(run_cli_command):
+ """Test the functionality of `verdi archive info` for an empty archive."""
filename_input = get_archive_file('empty.aiida', filepath='export/migrate')
- result = run_cli_command(cmd_archive.inspect, [filename_input], raises=True)
- assert 'archive file of unknown format' in result.output
+ result = run_cli_command(cmd_archive.archive_info, [filename_input], raises=True)
+ assert 'archive file unreadable' in result.output
diff --git a/tests/cmdline/commands/test_archive_import.py b/tests/cmdline/commands/test_archive_import.py
index 7ea347e882..ddad778313 100644
--- a/tests/cmdline/commands/test_archive_import.py
+++ b/tests/cmdline/commands/test_archive_import.py
@@ -14,9 +14,12 @@
from aiida.cmdline.commands import cmd_archive
from aiida.orm import Group
+from aiida.storage.sqlite_zip.migrator import list_versions
from aiida.tools.archive import ArchiveFormatSqlZip
from tests.utils.archives import get_archive_file
+ARCHIVE_PATH = 'export/migrate'
+
class TestVerdiImport:
"""Tests for `verdi import`."""
@@ -29,8 +32,7 @@ def init_cls(self, aiida_profile_clean): # pylint: disable=unused-argument
# Helper variables
self.url_path = 'https://raw.githubusercontent.com/aiidateam/aiida-core/' \
'0599dabf0887bee172a04f308307e99e3c3f3ff2/aiida/backends/tests/fixtures/export/migrate/'
- self.archive_path = 'export/migrate'
- self.newest_archive = f'export_v{ArchiveFormatSqlZip().latest_version}_simple.aiida'
+ self.newest_archive = f'export_{ArchiveFormatSqlZip().latest_version}_simple.aiida'
def test_import_no_archives(self):
"""Test that passing no valid archives will lead to command failure."""
@@ -55,7 +57,7 @@ def test_import_archive(self):
"""
archives = [
get_archive_file('arithmetic.add.aiida', filepath='calcjob'),
- get_archive_file(self.newest_archive, filepath=self.archive_path)
+ get_archive_file(self.newest_archive, filepath=ARCHIVE_PATH)
]
options = [] + archives
@@ -71,7 +73,7 @@ def test_import_to_group(self):
"""
archives = [
get_archive_file('arithmetic.add.aiida', filepath='calcjob'),
- get_archive_file(self.newest_archive, filepath=self.archive_path)
+ get_archive_file(self.newest_archive, filepath=ARCHIVE_PATH)
]
group_label = 'import_madness'
@@ -115,7 +117,7 @@ def test_import_make_new_group(self):
"""Make sure imported entities are saved in new Group"""
# Initialization
group_label = 'new_group_for_verdi_import'
- archives = [get_archive_file(self.newest_archive, filepath=self.archive_path)]
+ archives = [get_archive_file(self.newest_archive, filepath=ARCHIVE_PATH)]
# Check Group does not already exist
group_search = Group.objects.find(filters={'label': group_label})
@@ -134,7 +136,7 @@ def test_import_make_new_group(self):
def test_no_import_group(self):
"""Test '--import-group/--no-import-group' options."""
- archives = [get_archive_file(self.newest_archive, filepath=self.archive_path)]
+ archives = [get_archive_file(self.newest_archive, filepath=ARCHIVE_PATH)]
assert Group.objects.count() == 0, 'There should be no Groups.'
@@ -165,27 +167,13 @@ def test_no_import_group(self):
@pytest.mark.skip('Due to summary being logged, this can not be checked against `results.output`.') # pylint: disable=not-callable
def test_comment_mode(self):
"""Test toggling comment mode flag"""
- archives = [get_archive_file(self.newest_archive, filepath=self.archive_path)]
+ archives = [get_archive_file(self.newest_archive, filepath=ARCHIVE_PATH)]
for mode in ['leave', 'newest', 'overwrite']:
options = ['--comment-mode', mode] + archives
result = self.cli_runner.invoke(cmd_archive.import_archive, options)
assert result.exception is None, result.output
assert result.exit_code == 0, result.output
- def test_import_old_local_archives(self):
- """ Test import of old local archives
- Expected behavior: Automatically migrate to newest version and import correctly.
- """
- for version in ArchiveFormatSqlZip().versions:
- archive, version = (f'export_v{version}_simple.aiida', f'{version}')
- options = [get_archive_file(archive, filepath=self.archive_path)]
- result = self.cli_runner.invoke(cmd_archive.import_archive, options)
-
- assert result.exception is None, result.output
- assert result.exit_code == 0, result.output
- assert version in result.output, result.exception
- assert f'Success: imported archive {options[0]}' in result.output, result.exception
-
def test_import_old_url_archives(self):
""" Test import of old URL archives
Expected behavior: Automatically migrate to newest version and import correctly.
@@ -207,8 +195,8 @@ def test_import_url_and_local_archives(self):
local_archive = self.newest_archive
options = [
- get_archive_file(local_archive, filepath=self.archive_path), self.url_path + url_archive,
- get_archive_file(local_archive, filepath=self.archive_path)
+ get_archive_file(local_archive, filepath=ARCHIVE_PATH), self.url_path + url_archive,
+ get_archive_file(local_archive, filepath=ARCHIVE_PATH)
]
result = self.cli_runner.invoke(cmd_archive.import_archive, options)
@@ -243,7 +231,7 @@ def test_migration(self):
`migration` = True (default), Expected: No query, migrate
`migration` = False, Expected: No query, no migrate
"""
- archive = get_archive_file('export_v0.4_simple.aiida', filepath=self.archive_path)
+ archive = get_archive_file('export_0.4_simple.aiida', filepath=ARCHIVE_PATH)
success_message = f'Success: imported archive {archive}'
# Import "normally", but explicitly specifying `--migration`, make sure confirm message is present
@@ -267,3 +255,17 @@ def test_migration(self):
assert 'trying migration' not in result.output, result.exception
assert success_message not in result.output, result.exception
+
+
+@pytest.mark.usefixtures('aiida_profile_clean')
+@pytest.mark.parametrize('version', [v for v in list_versions() if v not in ('main_0000a', 'main_0000b')])
+def test_import_old_local_archives(version, run_cli_command):
+ """ Test import of old local archives
+ Expected behavior: Automatically migrate to newest version and import correctly.
+ """
+ archive, version = (f'export_{version}_simple.aiida', f'{version}')
+ options = [get_archive_file(archive, filepath=ARCHIVE_PATH)]
+ result = run_cli_command(cmd_archive.import_archive, options)
+
+ assert version in result.output, result.exception
+ assert f'Success: imported archive {options[0]}' in result.output, result.exception
diff --git a/tests/fixtures/export/migrate/export_v0.10_simple.aiida b/tests/fixtures/export/migrate/export_v0.10_simple.aiida
deleted file mode 100644
index dbeea937c1..0000000000
Binary files a/tests/fixtures/export/migrate/export_v0.10_simple.aiida and /dev/null differ
diff --git a/tests/static/calcjob/arithmetic.add.aiida b/tests/static/calcjob/arithmetic.add.aiida
index 9166ab33c4..5fdfab9548 100644
Binary files a/tests/static/calcjob/arithmetic.add.aiida and b/tests/static/calcjob/arithmetic.add.aiida differ
diff --git a/tests/static/calcjob/arithmetic.add_old.aiida b/tests/static/calcjob/arithmetic.add_old.aiida
index 7c3c1f985a..b5e5b01959 100644
Binary files a/tests/static/calcjob/arithmetic.add_old.aiida and b/tests/static/calcjob/arithmetic.add_old.aiida differ
diff --git a/tests/static/calcjob/container/config.json b/tests/static/calcjob/container/config.json
deleted file mode 100644
index 453ca5cce8..0000000000
--- a/tests/static/calcjob/container/config.json
+++ /dev/null
@@ -1 +0,0 @@
-{"container_version": 1, "loose_prefix_len": 2, "pack_size_target": 4294967296, "hash_type": "sha256", "container_id": "036ea0341e05499d9abde1cde49ee4ce", "compression_algorithm": "zlib+1"}
\ No newline at end of file
diff --git a/tests/static/calcjob/container/loose/04/bd777eeb8fb55b05d1ab72180cb56463c2897c7ff6fb93a5ebc5d64162e15b b/tests/static/calcjob/container/loose/04/bd777eeb8fb55b05d1ab72180cb56463c2897c7ff6fb93a5ebc5d64162e15b
deleted file mode 100644
index ee6f127f39..0000000000
Binary files a/tests/static/calcjob/container/loose/04/bd777eeb8fb55b05d1ab72180cb56463c2897c7ff6fb93a5ebc5d64162e15b and /dev/null differ
diff --git a/tests/static/calcjob/container/loose/33/7b794ce718a09a620090d53541c3b4640a64133bbee2188444810cd3169f81 b/tests/static/calcjob/container/loose/33/7b794ce718a09a620090d53541c3b4640a64133bbee2188444810cd3169f81
deleted file mode 100644
index 654d526942..0000000000
--- a/tests/static/calcjob/container/loose/33/7b794ce718a09a620090d53541c3b4640a64133bbee2188444810cd3169f81
+++ /dev/null
@@ -1 +0,0 @@
-2 3
diff --git a/tests/static/calcjob/container/loose/59/ad1048cf9741febe6085cdbd021d8395e9ef993fec33cbb6c34a73d6cf5372 b/tests/static/calcjob/container/loose/59/ad1048cf9741febe6085cdbd021d8395e9ef993fec33cbb6c34a73d6cf5372
deleted file mode 100644
index 6a5c41cdf4..0000000000
--- a/tests/static/calcjob/container/loose/59/ad1048cf9741febe6085cdbd021d8395e9ef993fec33cbb6c34a73d6cf5372
+++ /dev/null
@@ -1 +0,0 @@
-{"uuid": "9d3fda4f-6782-4441-a276-b8965aa3f97f", "codes_info": [{"cmdline_params": ["/home/candersen/virtualenv/new_tests/aiida/aiida_core/.ci/add.sh", "-in", "aiida.in"], "stdout_name": "aiida.out", "code_uuid": "8052fd27-f3ee-46cb-b23e-4ce5e446483e"}], "retrieve_list": ["aiida.out", "_scheduler-stdout.txt", "_scheduler-stderr.txt"], "local_copy_list": [], "remote_copy_list": []}
\ No newline at end of file
diff --git a/tests/static/calcjob/container/loose/95/f819ef2ea203bed2cacaf64df320ca1ce7d4a4d3a58f4f7920487a5ec7f532 b/tests/static/calcjob/container/loose/95/f819ef2ea203bed2cacaf64df320ca1ce7d4a4d3a58f4f7920487a5ec7f532
deleted file mode 100644
index cc936f6430..0000000000
--- a/tests/static/calcjob/container/loose/95/f819ef2ea203bed2cacaf64df320ca1ce7d4a4d3a58f4f7920487a5ec7f532
+++ /dev/null
@@ -1 +0,0 @@
-{"shebang": "#!/bin/bash", "submit_as_hold": false, "rerunnable": false, "job_environment": {}, "job_name": "aiida-30", "sched_output_path": "_scheduler-stdout.txt", "sched_error_path": "_scheduler-stderr.txt", "sched_join_files": false, "prepend_text": "", "append_text": "", "job_resource": {"num_cores_per_machine": null, "num_cores_per_mpiproc": null, "num_machines": 1, "num_mpiprocs_per_machine": 1}, "codes_info": [{"cmdline_params": ["/home/candersen/virtualenv/new_tests/aiida/aiida_core/.ci/add.sh", "-in", "aiida.in"], "stdout_name": "aiida.out", "code_uuid": "8052fd27-f3ee-46cb-b23e-4ce5e446483e"}], "codes_run_mode": 0, "import_sys_environment": true}
\ No newline at end of file
diff --git a/tests/static/calcjob/container/loose/d3/e14d6651a535b4689e0605c6b814f542bf6bb88d29ac4213e7bf6afce0501e b/tests/static/calcjob/container/loose/d3/e14d6651a535b4689e0605c6b814f542bf6bb88d29ac4213e7bf6afce0501e
deleted file mode 100644
index 2012bdf8a1..0000000000
--- a/tests/static/calcjob/container/loose/d3/e14d6651a535b4689e0605c6b814f542bf6bb88d29ac4213e7bf6afce0501e
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-exec > _scheduler-stdout.txt
-exec 2> _scheduler-stderr.txt
-
-
-'/home/candersen/virtualenv/new_tests/aiida/aiida_core/.ci/add.sh' '-in' 'aiida.in' > 'aiida.out'
diff --git a/tests/static/calcjob/container/loose/e1/49222b6bf7570a66c6d9d63c5304c00bf94a8e6b6a0db33c940d1f49667879 b/tests/static/calcjob/container/loose/e1/49222b6bf7570a66c6d9d63c5304c00bf94a8e6b6a0db33c940d1f49667879
deleted file mode 100644
index 524686cd87..0000000000
Binary files a/tests/static/calcjob/container/loose/e1/49222b6bf7570a66c6d9d63c5304c00bf94a8e6b6a0db33c940d1f49667879 and /dev/null differ
diff --git a/tests/static/calcjob/container/loose/e3/b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 b/tests/static/calcjob/container/loose/e3/b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/static/calcjob/container/loose/f0/b5c2c2211c8d67ed15e75e656c7862d086e9245420892a7de62cd9ec582a06 b/tests/static/calcjob/container/loose/f0/b5c2c2211c8d67ed15e75e656c7862d086e9245420892a7de62cd9ec582a06
deleted file mode 100644
index 7ed6ff82de..0000000000
--- a/tests/static/calcjob/container/loose/f0/b5c2c2211c8d67ed15e75e656c7862d086e9245420892a7de62cd9ec582a06
+++ /dev/null
@@ -1 +0,0 @@
-5
diff --git a/tests/static/calcjob/container/packs.idx b/tests/static/calcjob/container/packs.idx
deleted file mode 100644
index e47a083397..0000000000
Binary files a/tests/static/calcjob/container/packs.idx and /dev/null differ
diff --git a/tests/static/calcjob/container/packs/0 b/tests/static/calcjob/container/packs/0
deleted file mode 100644
index 5bb5ef7be5..0000000000
Binary files a/tests/static/calcjob/container/packs/0 and /dev/null differ
diff --git a/tests/static/calcjob/data.json b/tests/static/calcjob/data.json
deleted file mode 100644
index 0212ee0df8..0000000000
--- a/tests/static/calcjob/data.json
+++ /dev/null
@@ -1 +0,0 @@
-{"links_uuid": [{"input": "8052fd27-f3ee-46cb-b23e-4ce5e446483e", "label": "code", "type": "input_calc", "output": "9d3fda4f-6782-4441-a276-b8965aa3f97f"}, {"input": "4ab65a6b-d784-44d7-9739-498a1dc1d062", "label": "x", "type": "input_calc", "output": "9d3fda4f-6782-4441-a276-b8965aa3f97f"}, {"input": "becfc71f-fa63-433f-9919-1493f2e1cc76", "label": "y", "type": "input_calc", "output": "9d3fda4f-6782-4441-a276-b8965aa3f97f"}, {"input": "9d3fda4f-6782-4441-a276-b8965aa3f97f", "label": "sum", "type": "create", "output": "6ab73adc-625c-4158-aad4-4d67ea9b57b8"}, {"input": "9d3fda4f-6782-4441-a276-b8965aa3f97f", "label": "retrieved", "type": "create", "output": "2f8fe6c7-1312-4908-9fa0-0dac95c1823d"}, {"input": "9d3fda4f-6782-4441-a276-b8965aa3f97f", "label": "remote_folder", "type": "create", "output": "2b216c5e-951b-43d5-b66f-5469efac5374"}], "export_data": {"Node": {"24": {"ctime": "2019-04-09T14:07:07.435307", "description": "Add number together", "dbcomputer": 1, "process_type": null, "label": "arithmetic.add", "node_type": "data.core.code.Code.", "user": 2, "mtime": "2019-04-09T14:07:07.685356", "uuid": "8052fd27-f3ee-46cb-b23e-4ce5e446483e"}, "25": {"ctime": "2019-04-09T14:09:15.953832", "description": "", "dbcomputer": null, "process_type": null, "label": "", "node_type": "data.core.int.Int.", "user": 2, "mtime": "2019-04-09T14:09:15.957547", "uuid": "becfc71f-fa63-433f-9919-1493f2e1cc76"}, "26": {"ctime": "2019-04-09T14:09:22.401149", "description": "", "dbcomputer": null, "process_type": null, "label": "", "node_type": "data.core.int.Int.", "user": 2, "mtime": "2019-04-09T14:09:22.402785", "uuid": "4ab65a6b-d784-44d7-9739-498a1dc1d062"}, "33": {"ctime": "2019-04-09T14:31:58.884999", "description": "", "dbcomputer": null, "process_type": null, "label": "", "node_type": "data.core.int.Int.", "user": 2, "mtime": "2019-04-09T14:31:58.935272", "uuid": "6ab73adc-625c-4158-aad4-4d67ea9b57b8"}, "32": {"ctime": "2019-04-09T14:31:58.700914", "description": "", "dbcomputer": null, "process_type": null, "label": "", "node_type": "data.core.folder.FolderData.", "user": 2, "mtime": "2019-04-09T14:31:58.750472", "uuid": "2f8fe6c7-1312-4908-9fa0-0dac95c1823d", "repository_metadata": {"o": {"gzipped_data": {"k": "e149222b6bf7570a66c6d9d63c5304c00bf94a8e6b6a0db33c940d1f49667879"}, "_scheduler-stdout.txt": {"k": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"}, "_scheduler-stderr.txt": {"k": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"}, "aiida.out": {"k": "f0b5c2c2211c8d67ed15e75e656c7862d086e9245420892a7de62cd9ec582a06"}}}}, "31": {"ctime": "2019-04-09T14:31:51.914077", "description": "", "dbcomputer": 1, "process_type": null, "label": "", "node_type": "data.core.remote.RemoteData.", "user": 2, "mtime": "2019-04-09T14:31:51.944102", "uuid": "2b216c5e-951b-43d5-b66f-5469efac5374"}, "30": {"ctime": "2019-04-09T14:31:49.024479", "description": "", "dbcomputer": 1, "process_type": "aiida.calculations:arithmetic.add", "label": "", "node_type": "process.calculation.calcjob.CalcJobNode.", "user": 2, "mtime": "2019-04-09T14:31:58.993336", "uuid": "9d3fda4f-6782-4441-a276-b8965aa3f97f", "repository_metadata": {"o": {".aiida": {"o": {"job_tmpl.json": {"k": "95f819ef2ea203bed2cacaf64df320ca1ce7d4a4d3a58f4f7920487a5ec7f532"}, "calcinfo.json": {"k": "59ad1048cf9741febe6085cdbd021d8395e9ef993fec33cbb6c34a73d6cf5372"}}}, "aiida.in": {"k": "337b794ce718a09a620090d53541c3b4640a64133bbee2188444810cd3169f81"}, "in_gzipped_data": {"k": "04bd777eeb8fb55b05d1ab72180cb56463c2897c7ff6fb93a5ebc5d64162e15b"}, "_aiidasubmit.sh": {"k": "d3e14d6651a535b4689e0605c6b814f542bf6bb88d29ac4213e7bf6afce0501e"}}}}}, "Computer": {"1": {"uuid": "b20faf4c-1475-42a4-bac7-cf9ff0eb6799", "hostname": "localhost", "transport_type": "local", "metadata": {"workdir": "/scratch/candersen/aiida/", "shebang": "#!/bin/bash", "prepend_text": "", "mpirun_command": [], "default_mpiprocs_per_machine": 1, "append_text": ""}, "scheduler_type": "direct", "description": "localhost", "label": "localhost"}}, "User": {"2": {"first_name": "Casper", "last_name": "Andersen", "institution": "EPFL", "email": "casper.andersen@epfl.ch"}}}, "node_attributes": {"24": {"is_local": false, "append_text": "", "remote_exec_path": "/home/candersen/virtualenv/new_tests/aiida/aiida_core/.ci/add.sh", "prepend_text": "", "input_plugin": "arithmetic.add"}, "25": {"value": 3}, "26": {"value": 2}, "33": {"value": 5}, "32": {}, "31": {"remote_path": "/scratch/candersen/aiida/9d/3f/da4f-6782-4441-a276-b8965aa3f97f"}, "30": {"import_sys_environment": true, "process_state": "finished", "exit_status": 0, "output_filename": "aiida.out", "scheduler_state": "done", "custom_scheduler_commands": "", "input_filename": "aiida.in", "job_id": "27752", "prepend_text": "", "remote_workdir": "/scratch/candersen/aiida/9d/3f/da4f-6782-4441-a276-b8965aa3f97f", "append_text": "", "sealed": true, "retrieve_singlefile_list": [], "resources": {"num_machines": 1, "num_mpiprocs_per_machine": 1, "default_mpiprocs_per_machine": 1}, "retrieve_list": ["aiida.out", "_scheduler-stdout.txt", "_scheduler-stderr.txt"], "retrieve_temporary_list": [], "scheduler_lastchecktime": "2019-04-09T14:31:56.534041+00:00", "mpirun_extra_params": [], "scheduler_stderr": "_scheduler-stderr.txt", "parser_name": "arithmetic.add", "withmpi": false, "environment_variables": {}, "process_label": "ArithmeticAddCalculation", "scheduler_stdout": "_scheduler-stdout.txt"}}, "node_extras": {"24": {"_aiida_hash": "669f3bac68d2edba2f8772d536d7ced8d3a481d9dc96beaa70cbdb51f85a7713", "hidden": false}, "25": {"_aiida_hash": "8f1442527a4ce25bf6c8234f04301d3e3da9c0cf8d77cbffa973fd9b59a89b7b"}, "26": {"_aiida_hash": "10540cd8422b78634ef7853859b1b9494793b6cd37bf8c2fe6c63ee229d986fe"}, "33": {"_aiida_hash": "3d5cf2c3e2b56c24b5410b1f7666974fc1795f2c425390f4e9b00e47a8707e29"}, "32": {"_aiida_hash": "83d8e6d169a5c7625c371ea8695f0d38b568a38c3a43ce220c5407c947afc026"}, "31": {"_aiida_hash": "13bfaf327a85cc6d8436754fe702a6ffb2dcdbac611b3241c4e0082424b61443"}, "30": {"_aiida_hash": "dbf0f929ed4f14bcd542306563c03338d1e6ddd704341938dcf3beca4171fd5d"}}, "groups_uuid": {}}
\ No newline at end of file
diff --git a/tests/static/calcjob/metadata.json b/tests/static/calcjob/metadata.json
deleted file mode 100644
index 605c496389..0000000000
--- a/tests/static/calcjob/metadata.json
+++ /dev/null
@@ -1 +0,0 @@
-{"conversion_info": ["Converted from version 0.4 to 0.5 with AiiDA v1.0.0b3", "Converted from version 0.5 to 0.6 with AiiDA v1.0.0b4", "Converted from version 0.6 to 0.7 with AiiDA v1.0.0b4", "Converted from version 0.7 to 0.8 with AiiDA v1.0.0", "Converted from version 0.8 to 0.9 with AiiDA v1.1.1", "Converted from version 0.9 to 0.10 with AiiDA v1.4.2", "Converted from version 0.10 to 0.11 with AiiDA v1.5.2", "Converted from version 0.11 to 0.12 with AiiDA v1.6.2", "Converted from version 0.12 to 0.13 with AiiDA v2.0.0a1"], "export_version": "0.13", "aiida_version": "2.0.0a1", "unique_identifiers": {"Node": "uuid", "Comment": "uuid", "Group": "uuid", "Log": "uuid", "Computer": "uuid", "User": "email"}, "all_fields_info": {"Node": {"ctime": {"convert_type": "date"}, "description": {}, "dbcomputer": {"related_name": "dbnodes", "requires": "Computer"}, "process_type": {}, "label": {}, "node_type": {}, "user": {"related_name": "dbnodes", "requires": "User"}, "mtime": {"convert_type": "date"}, "uuid": {}, "attributes": {"convert_type": "jsonb"}, "extras": {"convert_type": "jsonb"}, "repository_metadata": {}}, "Comment": {"dbnode": {"related_name": "dbcomments", "requires": "Node"}, "uuid": {}, "content": {}, "user": {"related_name": "dbcomments", "requires": "User"}, "mtime": {"convert_type": "date"}, "ctime": {"convert_type": "date"}}, "Group": {"type_string": {}, "uuid": {}, "label": {}, "user": {"related_name": "dbgroups", "requires": "User"}, "time": {"convert_type": "date"}, "description": {}, "extras": {"convert_type": "jsonb"}}, "Log": {"dbnode": {"related_name": "dblogs", "requires": "Node"}, "uuid": {}, "loggername": {}, "time": {"convert_type": "date"}, "message": {}, "levelname": {}, "metadata": {}}, "Computer": {"uuid": {}, "hostname": {}, "transport_type": {}, "metadata": {}, "scheduler_type": {}, "description": {}, "label": {}}, "User": {"first_name": {}, "last_name": {}, "institution": {}, "email": {}}}}
\ No newline at end of file
diff --git a/tests/static/export/compare/django.aiida b/tests/static/export/compare/django.aiida
index 254e73e62d..08a035840e 100644
Binary files a/tests/static/export/compare/django.aiida and b/tests/static/export/compare/django.aiida differ
diff --git a/tests/static/export/compare/sqlalchemy.aiida b/tests/static/export/compare/sqlalchemy.aiida
index bd78b641e2..810d73ee4d 100644
Binary files a/tests/static/export/compare/sqlalchemy.aiida and b/tests/static/export/compare/sqlalchemy.aiida differ
diff --git a/tests/static/export/migrate/0.10_dangling_link.aiida b/tests/static/export/migrate/0.10_dangling_link.aiida
new file mode 100644
index 0000000000..7bb644795f
Binary files /dev/null and b/tests/static/export/migrate/0.10_dangling_link.aiida differ
diff --git a/tests/static/export/migrate/0.10_null_fields.aiida b/tests/static/export/migrate/0.10_null_fields.aiida
new file mode 100644
index 0000000000..59d1c07dbc
Binary files /dev/null and b/tests/static/export/migrate/0.10_null_fields.aiida differ
diff --git a/tests/static/export/migrate/0.10_unknown_nodes_in_group.aiida b/tests/static/export/migrate/0.10_unknown_nodes_in_group.aiida
new file mode 100644
index 0000000000..460d73e276
Binary files /dev/null and b/tests/static/export/migrate/0.10_unknown_nodes_in_group.aiida differ
diff --git a/tests/static/export/migrate/export_v0.10_simple.aiida b/tests/static/export/migrate/export_0.10_simple.aiida
similarity index 100%
rename from tests/static/export/migrate/export_v0.10_simple.aiida
rename to tests/static/export/migrate/export_0.10_simple.aiida
diff --git a/tests/static/export/migrate/export_v0.11_simple.aiida b/tests/static/export/migrate/export_0.11_simple.aiida
similarity index 100%
rename from tests/static/export/migrate/export_v0.11_simple.aiida
rename to tests/static/export/migrate/export_0.11_simple.aiida
diff --git a/tests/static/export/migrate/export_v0.12_simple.aiida b/tests/static/export/migrate/export_0.12_simple.aiida
similarity index 100%
rename from tests/static/export/migrate/export_v0.12_simple.aiida
rename to tests/static/export/migrate/export_0.12_simple.aiida
diff --git a/tests/static/export/migrate/export_v0.1_simple.aiida b/tests/static/export/migrate/export_0.1_simple.aiida
similarity index 100%
rename from tests/static/export/migrate/export_v0.1_simple.aiida
rename to tests/static/export/migrate/export_0.1_simple.aiida
diff --git a/tests/static/export/migrate/export_v0.2_simple.aiida b/tests/static/export/migrate/export_0.2_simple.aiida
similarity index 100%
rename from tests/static/export/migrate/export_v0.2_simple.aiida
rename to tests/static/export/migrate/export_0.2_simple.aiida
diff --git a/tests/static/export/migrate/export_v0.2_simple.tar.gz b/tests/static/export/migrate/export_0.2_simple.tar.gz
similarity index 100%
rename from tests/static/export/migrate/export_v0.2_simple.tar.gz
rename to tests/static/export/migrate/export_0.2_simple.tar.gz
diff --git a/tests/static/export/migrate/export_v0.3_simple.aiida b/tests/static/export/migrate/export_0.3_simple.aiida
similarity index 100%
rename from tests/static/export/migrate/export_v0.3_simple.aiida
rename to tests/static/export/migrate/export_0.3_simple.aiida
diff --git a/tests/static/export/migrate/export_v0.4_no_Nodes.aiida b/tests/static/export/migrate/export_0.4_no_Nodes.aiida
similarity index 100%
rename from tests/static/export/migrate/export_v0.4_no_Nodes.aiida
rename to tests/static/export/migrate/export_0.4_no_Nodes.aiida
diff --git a/tests/static/export/migrate/export_v0.4_simple.aiida b/tests/static/export/migrate/export_0.4_simple.aiida
similarity index 100%
rename from tests/static/export/migrate/export_v0.4_simple.aiida
rename to tests/static/export/migrate/export_0.4_simple.aiida
diff --git a/tests/static/export/migrate/export_v0.4_simple.tar.gz b/tests/static/export/migrate/export_0.4_simple.tar.gz
similarity index 100%
rename from tests/static/export/migrate/export_v0.4_simple.tar.gz
rename to tests/static/export/migrate/export_0.4_simple.tar.gz
diff --git a/tests/static/export/migrate/export_v0.5_simple.aiida b/tests/static/export/migrate/export_0.5_simple.aiida
similarity index 100%
rename from tests/static/export/migrate/export_v0.5_simple.aiida
rename to tests/static/export/migrate/export_0.5_simple.aiida
diff --git a/tests/static/export/migrate/export_v0.6_simple.aiida b/tests/static/export/migrate/export_0.6_simple.aiida
similarity index 100%
rename from tests/static/export/migrate/export_v0.6_simple.aiida
rename to tests/static/export/migrate/export_0.6_simple.aiida
diff --git a/tests/static/export/migrate/export_v0.7_simple.aiida b/tests/static/export/migrate/export_0.7_simple.aiida
similarity index 100%
rename from tests/static/export/migrate/export_v0.7_simple.aiida
rename to tests/static/export/migrate/export_0.7_simple.aiida
diff --git a/tests/static/export/migrate/export_v0.8_simple.aiida b/tests/static/export/migrate/export_0.8_simple.aiida
similarity index 100%
rename from tests/static/export/migrate/export_v0.8_simple.aiida
rename to tests/static/export/migrate/export_0.8_simple.aiida
diff --git a/tests/static/export/migrate/export_v0.9_simple.aiida b/tests/static/export/migrate/export_0.9_simple.aiida
similarity index 100%
rename from tests/static/export/migrate/export_v0.9_simple.aiida
rename to tests/static/export/migrate/export_0.9_simple.aiida
diff --git a/tests/static/export/migrate/export_main_0000_simple.aiida b/tests/static/export/migrate/export_main_0000_simple.aiida
new file mode 100644
index 0000000000..b24b062b11
Binary files /dev/null and b/tests/static/export/migrate/export_main_0000_simple.aiida differ
diff --git a/tests/static/export/migrate/export_v1.0_simple.aiida b/tests/static/export/migrate/export_main_0001_simple.aiida
similarity index 81%
rename from tests/static/export/migrate/export_v1.0_simple.aiida
rename to tests/static/export/migrate/export_main_0001_simple.aiida
index af95d66902..2c41a71f5a 100644
Binary files a/tests/static/export/migrate/export_v1.0_simple.aiida and b/tests/static/export/migrate/export_main_0001_simple.aiida differ
diff --git a/tests/tools/archive/migration/conftest.py b/tests/tools/archive/migration/conftest.py
index a6b9bccc4d..ef4ee36d0a 100644
--- a/tests/tools/archive/migration/conftest.py
+++ b/tests/tools/archive/migration/conftest.py
@@ -10,7 +10,7 @@
"""Module with tests for export archive migrations."""
import pytest
-from aiida.tools.archive.implementations.sqlite.migrations.utils import verify_metadata_version
+from aiida.storage.sqlite_zip.migrations.utils import verify_metadata_version
from tests.utils.archives import get_archive_file, read_json_files
diff --git a/tests/tools/archive/migration/test_legacy_funcs.py b/tests/tools/archive/migration/test_legacy_funcs.py
index 79aba89ab0..ba576f9c76 100644
--- a/tests/tools/archive/migration/test_legacy_funcs.py
+++ b/tests/tools/archive/migration/test_legacy_funcs.py
@@ -12,8 +12,8 @@
import pytest
from aiida import get_version
-from aiida.tools.archive.implementations.sqlite.migrations.legacy import LEGACY_MIGRATE_FUNCTIONS
-from aiida.tools.archive.implementations.sqlite.migrations.utils import verify_metadata_version
+from aiida.storage.sqlite_zip.migrations.legacy import LEGACY_MIGRATE_FUNCTIONS
+from aiida.storage.sqlite_zip.migrations.utils import verify_metadata_version
from tests.utils.archives import get_archive_file, read_json_files
@@ -26,13 +26,13 @@ def test_migrations(migration_data):
"""Test each migration method from the `aiida.tools.archive.archive.migrations` module."""
version_old, (version_new, migration_method) = migration_data
- filepath_archive_new = get_archive_file(f'export_v{version_new}_simple.aiida', filepath='export/migrate')
+ filepath_archive_new = get_archive_file(f'export_{version_new}_simple.aiida', filepath='export/migrate')
metadata_new = read_json_files(filepath_archive_new, names=['metadata.json'])[0]
verify_metadata_version(metadata_new, version=version_new)
data_new = read_json_files(filepath_archive_new, names=['data.json'])[0]
- filepath_archive_old = get_archive_file(f'export_v{version_old}_simple.aiida', filepath='export/migrate')
+ filepath_archive_old = get_archive_file(f'export_{version_old}_simple.aiida', filepath='export/migrate')
metadata_old, data_old = read_json_files(filepath_archive_old, names=['metadata.json', 'data.json']) # pylint: disable=unbalanced-tuple-unpacking
diff --git a/tests/tools/archive/migration/test_migration.py b/tests/tools/archive/migration/test_legacy_migrations.py
similarity index 92%
rename from tests/tools/archive/migration/test_migration.py
rename to tests/tools/archive/migration/test_legacy_migrations.py
index c998e1504f..9479117920 100644
--- a/tests/tools/archive/migration/test_migration.py
+++ b/tests/tools/archive/migration/test_legacy_migrations.py
@@ -12,7 +12,8 @@
import pytest
from aiida import orm
-from aiida.tools.archive import ArchiveFormatSqlZip, ArchiveMigrationError
+from aiida.common.exceptions import StorageMigrationError
+from aiida.tools.archive import ArchiveFormatSqlZip
from tests.utils.archives import get_archive_file
# archives to test migration against
@@ -47,7 +48,7 @@
}
-@pytest.mark.parametrize('archive_name', ('export_v0.4_simple.aiida', 'export_v0.4_simple.tar.gz'))
+@pytest.mark.parametrize('archive_name', ('export_0.4_simple.aiida', 'export_0.4_simple.tar.gz'))
def test_full_migration(tmp_path, core_archive, archive_name):
"""Test a migration from the first to newest archive version."""
@@ -77,17 +78,17 @@ def test_full_migration(tmp_path, core_archive, archive_name):
def test_partial_migrations(core_archive, tmp_path):
"""Test migrations from a specific version (0.5) to other versions."""
- filepath_archive = get_archive_file('export_v0.5_simple.aiida', **core_archive)
+ filepath_archive = get_archive_file('export_0.5_simple.aiida', **core_archive)
archive_format = ArchiveFormatSqlZip()
assert archive_format.read_version(filepath_archive) == '0.5'
new_archive = tmp_path / 'out.aiida'
- with pytest.raises(ArchiveMigrationError, match='Unknown target version'):
+ with pytest.raises(StorageMigrationError, match='Unknown target version'):
archive_format.migrate(filepath_archive, new_archive, 0.2)
- with pytest.raises(ArchiveMigrationError, match='No migration pathway available'):
+ with pytest.raises(StorageMigrationError, match='No migration pathway available'):
archive_format.migrate(filepath_archive, new_archive, '0.4')
archive_format.migrate(filepath_archive, new_archive, '0.7')
@@ -96,7 +97,7 @@ def test_partial_migrations(core_archive, tmp_path):
def test_no_node_migration(tmp_path, core_archive):
"""Test migration of archive file that has no Node entities."""
- filepath_archive = get_archive_file('export_v0.4_no_Nodes.aiida', **core_archive)
+ filepath_archive = get_archive_file('export_0.4_no_Nodes.aiida', **core_archive)
archive_format = ArchiveFormatSqlZip()
new_archive = tmp_path / 'output_file.aiida'
@@ -116,10 +117,10 @@ def test_no_node_migration(tmp_path, core_archive):
@pytest.mark.parametrize('version', ['0.0', '0.1.0', '0.99'])
def test_wrong_versions(core_archive, tmp_path, version):
"""Test correct errors are raised if archive files have wrong version numbers"""
- filepath_archive = get_archive_file('export_v0.4_simple.aiida', **core_archive)
+ filepath_archive = get_archive_file('export_0.4_simple.aiida', **core_archive)
archive_format = ArchiveFormatSqlZip()
new_archive = tmp_path / 'out.aiida'
- with pytest.raises(ArchiveMigrationError, match='Unknown target version'):
+ with pytest.raises(StorageMigrationError, match='Unknown target version'):
archive_format.migrate(filepath_archive, new_archive, version)
assert not new_archive.exists()
diff --git a/tests/tools/archive/migration/test_legacy_to_main.py b/tests/tools/archive/migration/test_legacy_to_main.py
new file mode 100644
index 0000000000..351ff9823f
--- /dev/null
+++ b/tests/tools/archive/migration/test_legacy_to_main.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+###########################################################################
+# Copyright (c), The AiiDA team. All rights reserved. #
+# This file is part of the AiiDA code. #
+# #
+# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
+# For further information on the license, see the LICENSE.txt file #
+# For further information please visit http://www.aiida.net #
+###########################################################################
+"""Test archive file migration from legacy format (JSON) to main format (SQLite)."""
+import pytest
+
+from aiida.common.exceptions import StorageMigrationError
+from aiida.storage.sqlite_zip.migrator import migrate
+from tests.utils.archives import get_archive_file
+
+
+def test_dangling_links(tmp_path):
+ """Test that links with node UUIDs that are not in the archive are correctly handled."""
+ filepath_archive = get_archive_file('0.10_dangling_link.aiida', 'export/migrate')
+ with pytest.raises(StorageMigrationError, match='Database contains link with unknown input node'):
+ migrate(filepath_archive, tmp_path / 'archive.aiida', 'main_0001')
+
+
+def test_missing_nodes_in_groups(tmp_path, aiida_caplog):
+ """Test that groups with listed node UUIDs that are not in the archive are correctly handled."""
+ filepath_archive = get_archive_file('0.10_unknown_nodes_in_group.aiida', 'export/migrate')
+ migrate(filepath_archive, tmp_path / 'archive.aiida', 'main_0001')
+ assert 'Dropped unknown nodes in groups' in aiida_caplog.text, aiida_caplog.text
+
+
+def test_fields_with_null_values(tmp_path):
+ """Test that fields with null values are correctly handled."""
+ filepath_archive = get_archive_file('0.10_null_fields.aiida', 'export/migrate')
+ migrate(filepath_archive, tmp_path / 'archive.aiida', 'main_0001')
diff --git a/tests/tools/archive/migration/test_v04_to_v05.py b/tests/tools/archive/migration/test_v04_to_v05.py
index b22f0e2e48..f394426eff 100644
--- a/tests/tools/archive/migration/test_v04_to_v05.py
+++ b/tests/tools/archive/migration/test_v04_to_v05.py
@@ -8,7 +8,7 @@
# For further information please visit http://www.aiida.net #
###########################################################################
"""Test archive file migration from export version 0.4 to 0.5"""
-from aiida.tools.archive.implementations.sqlite.migrations.legacy import migrate_v4_to_v5
+from aiida.storage.sqlite_zip.migrations.legacy import migrate_v4_to_v5
def test_migrate_external(migrate_from_func):
diff --git a/tests/tools/archive/migration/test_v05_to_v06.py b/tests/tools/archive/migration/test_v05_to_v06.py
index 4a6a29342c..7223e53ca2 100644
--- a/tests/tools/archive/migration/test_v05_to_v06.py
+++ b/tests/tools/archive/migration/test_v05_to_v06.py
@@ -9,8 +9,8 @@
###########################################################################
"""Test archive file migration from export version 0.5 to 0.6"""
from aiida.storage.psql_dos.migrations.utils.calc_state import STATE_MAPPING
-from aiida.tools.archive.implementations.sqlite.migrations.legacy import migrate_v5_to_v6
-from aiida.tools.archive.implementations.sqlite.migrations.utils import verify_metadata_version
+from aiida.storage.sqlite_zip.migrations.legacy import migrate_v5_to_v6
+from aiida.storage.sqlite_zip.migrations.utils import verify_metadata_version
from tests.utils.archives import get_archive_file, read_json_files
@@ -31,7 +31,7 @@ def test_migrate_v5_to_v6_calc_states(core_archive, migrate_from_func):
module does not include a `CalcJobNode` with a legacy `state` attribute.
"""
# Get metadata.json and data.json as dicts from v0.5 file archive
- archive_path = get_archive_file('export_v0.5_simple.aiida', **core_archive)
+ archive_path = get_archive_file('export_0.5_simple.aiida', **core_archive)
metadata, data = read_json_files(archive_path) # pylint: disable=unbalanced-tuple-unpacking
verify_metadata_version(metadata, version='0.5')
@@ -45,7 +45,7 @@ def test_migrate_v5_to_v6_calc_states(core_archive, migrate_from_func):
calc_jobs[pk] = data['node_attributes'][pk]['state']
# Migrate to v0.6
- metadata, data = migrate_from_func('export_v0.5_simple.aiida', '0.5', '0.6', migrate_v5_to_v6, core_archive)
+ metadata, data = migrate_from_func('export_0.5_simple.aiida', '0.5', '0.6', migrate_v5_to_v6, core_archive)
verify_metadata_version(metadata, version='0.6')
node_attributes = data['node_attributes']
@@ -73,10 +73,10 @@ def test_migrate_v5_to_v6_datetime(core_archive, migrate_from_func):
Datetime attributes were serialized into strings, by first converting to UTC and then printing with the format
'%Y-%m-%dT%H:%M:%S.%f'. In the database migration, datetimes were serialized *including* timezone information.
Here we test that the archive migration correctly reattaches the timezone information. The archive that we are
- using `export_v0.5_simple.aiida` contains a node with the attribute "scheduler_lastchecktime".
+ using `export_0.5_simple.aiida` contains a node with the attribute "scheduler_lastchecktime".
"""
# Get metadata.json and data.json as dicts from v0.5 file archive
- archive_path = get_archive_file('export_v0.5_simple.aiida', **core_archive)
+ archive_path = get_archive_file('export_0.5_simple.aiida', **core_archive)
metadata, data = read_json_files(archive_path) # pylint: disable=unbalanced-tuple-unpacking
verify_metadata_version(metadata, version='0.5')
@@ -90,7 +90,7 @@ def test_migrate_v5_to_v6_datetime(core_archive, migrate_from_func):
assert '+' not in serialized_original, msg
# Migrate to v0.6
- metadata, data = migrate_from_func('export_v0.5_simple.aiida', '0.5', '0.6', migrate_v5_to_v6, core_archive)
+ metadata, data = migrate_from_func('export_0.5_simple.aiida', '0.5', '0.6', migrate_v5_to_v6, core_archive)
verify_metadata_version(metadata, version='0.6')
serialized_migrated = data['node_attributes'][key]['scheduler_lastchecktime']
@@ -99,6 +99,6 @@ def test_migrate_v5_to_v6_datetime(core_archive, migrate_from_func):
else:
raise RuntimeError(
- 'the archive `export_v0.5_simple.aiida` did not contain a node with the attribute '
+ 'the archive `export_0.5_simple.aiida` did not contain a node with the attribute '
'`scheduler_lastchecktime` which is required for this test.'
)
diff --git a/tests/tools/archive/migration/test_v06_to_v07.py b/tests/tools/archive/migration/test_v06_to_v07.py
index 96a80aee31..b4f2e502b0 100644
--- a/tests/tools/archive/migration/test_v06_to_v07.py
+++ b/tests/tools/archive/migration/test_v06_to_v07.py
@@ -10,8 +10,8 @@
"""Test archive file migration from export version 0.6 to 0.7"""
import pytest
-from aiida.tools.archive.exceptions import CorruptArchive
-from aiida.tools.archive.implementations.sqlite.migrations.legacy.v06_to_v07 import (
+from aiida.common.exceptions import CorruptStorage
+from aiida.storage.sqlite_zip.migrations.legacy.v06_to_v07 import (
data_migration_legacy_process_attributes,
migrate_v6_to_v7,
)
@@ -49,7 +49,7 @@ def test_migrate_external(migrate_from_func):
def test_migration_0040_corrupt_archive():
- """Check CorruptArchive is raised for different cases during migration 0040"""
+ """Check CorruptStorage is raised for different cases during migration 0040"""
# data has one "valid" entry, in the form of Node .
# At least it has the needed key `node_type`.
# data also has one "invalid" entry, in form of Node .
@@ -73,7 +73,7 @@ def test_migration_0040_corrupt_archive():
}
}
- with pytest.raises(CorruptArchive, match='Your export archive is corrupt! Org. exception:'):
+ with pytest.raises(CorruptStorage, match='Your export archive is corrupt! Org. exception:'):
data_migration_legacy_process_attributes(data)
# data has one "valid" entry, in the form of Node .
@@ -101,7 +101,7 @@ def test_migration_0040_corrupt_archive():
}
}
- with pytest.raises(CorruptArchive, match='Your export archive is corrupt! Please see the log-file'):
+ with pytest.raises(CorruptStorage, match='Your export archive is corrupt! Please see the log-file'):
data_migration_legacy_process_attributes(data)
diff --git a/tests/tools/archive/migration/test_v07_to_v08.py b/tests/tools/archive/migration/test_v07_to_v08.py
index 34c9f0ece7..5c6dd52109 100644
--- a/tests/tools/archive/migration/test_v07_to_v08.py
+++ b/tests/tools/archive/migration/test_v07_to_v08.py
@@ -8,10 +8,7 @@
# For further information please visit http://www.aiida.net #
###########################################################################
"""Test archive file migration from export version 0.7 to 0.8"""
-from aiida.tools.archive.implementations.sqlite.migrations.legacy.v07_to_v08 import (
- migrate_v7_to_v8,
- migration_default_link_label,
-)
+from aiida.storage.sqlite_zip.migrations.legacy.v07_to_v08 import migrate_v7_to_v8, migration_default_link_label
def test_migrate_external(migrate_from_func):
@@ -28,7 +25,7 @@ def test_migrate_external(migrate_from_func):
def test_migration_0043_default_link_label():
- """Check CorruptArchive is raised for different cases during migration 0040"""
+ """Check link labels are migrated properly."""
# data has one "valid" link, in the form of