Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

♻️ REFACTOR: Move archive backend to aiida/storage #5375

Merged
merged 26 commits into from
Mar 6, 2022
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
7347623
♻️ REFACTOR: Move archive backend to `aiida/storage`
chrisjsewell Feb 22, 2022
064b42c
move migrations
chrisjsewell Feb 23, 2022
4e72acb
Move utilities
chrisjsewell Feb 23, 2022
d04ce8e
Update backend.py
chrisjsewell Feb 23, 2022
6f161a6
consolidate use of exceptions
chrisjsewell Feb 23, 2022
6b7dc36
Make ReadOnlyError subclass of AiidaException
chrisjsewell Feb 24, 2022
894807b
Move get_model_from_entity
chrisjsewell Feb 24, 2022
72a6cb0
Allow for profile creation via `SqliteZipBackend.create_profile`
chrisjsewell Feb 24, 2022
6bed02b
Fix rebase errors
chrisjsewell Feb 24, 2022
999aec4
Introduce aiida.storage.log.MIGRATE_LOGGER
chrisjsewell Feb 24, 2022
dbbc5a0
fix test archive file naming
chrisjsewell Feb 24, 2022
37b004e
Update arithmetic.add_old.aiida
chrisjsewell Feb 24, 2022
fed42e6
Implement alembic migrations
chrisjsewell Feb 25, 2022
98dfa09
Deprecate `verdi archive inspect`
chrisjsewell Feb 27, 2022
8aa1f88
Improve `verdi archive` docstrings
chrisjsewell Feb 27, 2022
89fb338
Add tests to ensure psql_dos and sqlite_zip are in-sync
chrisjsewell Feb 27, 2022
3ed4dc3
test commenting out new tests
chrisjsewell Feb 27, 2022
7035711
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 27, 2022
7111d90
re-instate test_schema
chrisjsewell Feb 28, 2022
8078d99
improve typing
chrisjsewell Mar 4, 2022
f0991b3
Handle dangling nodes in links/groups for legacy_to_main
chrisjsewell Mar 4, 2022
ad91612
Handle null fields in legacy archives
chrisjsewell Mar 4, 2022
4e65372
fix tests
chrisjsewell Mar 4, 2022
1b39db9
Update migrator.py
chrisjsewell Mar 4, 2022
ae28d23
allow reading as folder
chrisjsewell Mar 6, 2022
b624c19
Add `aiida/storage/sqlite_zip` to mypy type checking
chrisjsewell Mar 6, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 62 additions & 60 deletions aiida/cmdline/commands/cmd_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,19 @@
"""`verdi archive` command."""
from enum import Enum
import logging
from pathlib import Path
import traceback
from typing import List, Tuple
import urllib.request

import click
from click_spinner import spinner
import tabulate

from aiida.cmdline.commands.cmd_verdi import verdi
from aiida.cmdline.params import arguments, options
from aiida.cmdline.params.types import GroupParamType, PathOrUrl
from aiida.cmdline.utils import decorators, echo
from aiida.common.exceptions import CorruptStorage, IncompatibleStorageSchema, UnreachableStorage
from aiida.common.links import GraphTraversalRules
from aiida.common.log import AIIDA_LOGGER

Expand All @@ -36,66 +37,68 @@ def verdi_archive():
"""Create, inspect and import AiiDA archives."""


@verdi_archive.command('inspect')
@verdi_archive.command('version')
@click.argument('path', nargs=1, type=click.Path(exists=True, readable=True))
def archive_version(path):
"""Print the current version of an archive's schema."""
# note: this mirrors `cmd_storage:storage_version`
# it is currently hardcoded to the `SqliteZipBackend`, but could be generalized in the future
from aiida.storage.sqlite_zip.backend import SqliteZipBackend
storage_cls = SqliteZipBackend
profile = storage_cls.create_profile(path)
head_version = storage_cls.version_head()
try:
profile_version = storage_cls.version_profile(profile)
except (UnreachableStorage, CorruptStorage) as exc:
echo.echo_critical(f'archive file version unreadable: {exc}')
echo.echo(f'Latest archive schema version: {head_version!r}')
echo.echo(f'Archive schema version of {Path(path).name!r}: {profile_version!r}')


@verdi_archive.command('info')
@click.argument('path', nargs=1, type=click.Path(exists=True, readable=True))
@click.option('--statistics', is_flag=True, help='Provides more in-detail statistically relevant data.')
def archive_info(path, statistics):
"""Summarise the contents of an archive."""
# note: this mirrors `cmd_storage:storage_info`
# it is currently hardcoded to the `SqliteZipBackend`, but could be generalized in the future
from aiida.storage.sqlite_zip.backend import SqliteZipBackend
try:
storage = SqliteZipBackend(SqliteZipBackend.create_profile(path))
except (UnreachableStorage, CorruptStorage) as exc:
echo.echo_critical(f'archive file unreadable: {exc}')
except IncompatibleStorageSchema as exc:
echo.echo_critical(f'archive version incompatible: {exc}')
with spinner():
try:
data = storage.get_info(statistics=statistics)
finally:
storage.close()

echo.echo_dictionary(data, sort_keys=False, fmt='yaml')


@verdi_archive.command('inspect', hidden=True)
@click.argument('archive', nargs=1, type=click.Path(exists=True, readable=True))
@click.option('-v', '--version', is_flag=True, help='Print the archive format version and exit.')
@click.option('-m', '--meta-data', is_flag=True, help='Print the meta data contents and exit.')
@click.option('-d', '--database', is_flag=True, help='Include information on entities in the database.')
def inspect(archive, version, meta_data, database):
@decorators.deprecated_command(
'This command has been deprecated and will be removed soon. '
'Please call `verdi archive version` or `verdi archive info` instead.\n'
)
@click.pass_context
def inspect(ctx, archive, version, meta_data, database): # pylint: disable=unused-argument
"""Inspect contents of an archive without importing it.

By default a summary of the archive contents will be printed.
The various options can be used to change exactly what information is displayed.
.. deprecated:: v2.0.0, use `verdi archive version` or `verdi archive info` instead.
"""
from aiida.tools.archive.abstract import get_format
from aiida.tools.archive.exceptions import UnreadableArchiveError

archive_format = get_format()
latest_version = archive_format.latest_version
try:
current_version = archive_format.read_version(archive)
except UnreadableArchiveError as exc:
echo.echo_critical(f'archive file of unknown format: {exc}')

if version:
echo.echo(current_version)
return

if current_version != latest_version:
echo.echo_critical(
f"Archive version is not the latest: '{current_version}' != '{latest_version}'. "
'Use `verdi migrate` to upgrade to the latest version'
)

with archive_format.open(archive, 'r') as archive_reader:
metadata = archive_reader.get_metadata()

if meta_data:
echo.echo_dictionary(metadata, sort_keys=False)
return

statistics = {
name: metadata[key] for key, name in [
['export_version', 'Version archive'],
['aiida_version', 'Version aiida'],
['compression', 'Compression'],
['ctime', 'Created'],
['mtime', 'Modified'],
] if key in metadata
}
if 'conversion_info' in metadata:
statistics['Conversion info'] = '\n'.join(metadata['conversion_info'])

echo.echo(tabulate.tabulate(statistics.items()))

if database:
echo.echo('')
echo.echo('Database statistics')
echo.echo('-------------------')
with spinner():
with archive_format.open(archive, 'r') as archive_reader:
data = archive_reader.get_backend().get_info(statistics=True)
echo.echo_dictionary(data, sort_keys=False, fmt='yaml')
ctx.invoke(archive_version, path=archive)
elif database:
ctx.invoke(archive_info, path=archive, statistics=True)
else:
ctx.invoke(archive_info, path=archive, statistics=False)


@verdi_archive.command('create')
Expand Down Expand Up @@ -136,7 +139,7 @@ def create(
create_backward, return_backward, call_calc_backward, call_work_backward, include_comments, include_logs,
include_authinfos, compress, batch_size, test_run
):
"""Write subsets of the provenance graph to a single file.
"""Create an archive from all or part of a profiles's data.

Besides Nodes of the provenance graph, you can archive Groups, Codes, Computers, Comments and Logs.

Expand Down Expand Up @@ -214,7 +217,7 @@ def create(
help='Archive format version to migrate to (defaults to latest version).',
)
def migrate(input_file, output_file, force, in_place, version):
"""Migrate an export archive to a more recent format version."""
"""Migrate an archive to a more recent schema version."""
from aiida.common.progress_reporter import set_progress_bar_tqdm, set_progress_reporter
from aiida.tools.archive.abstract import get_format

Expand Down Expand Up @@ -248,7 +251,7 @@ def migrate(input_file, output_file, force, in_place, version):
f'{error.__class__.__name__}:{error}'
)

echo.echo_success(f'migrated the archive to version {version}')
echo.echo_success(f'migrated the archive to version {version!r}')


class ExtrasImportCode(Enum):
Expand Down Expand Up @@ -333,7 +336,7 @@ def import_archive(
ctx, archives, webpages, extras_mode_existing, extras_mode_new, comment_mode, include_authinfos, migration,
batch_size, import_group, group, test_run
):
"""Import data from an AiiDA archive file.
"""Import archived data to a profile.

The archive can be specified by its relative or absolute file path, or its HTTP URL.
"""
Expand Down Expand Up @@ -424,12 +427,11 @@ def _import_archive_and_migrate(archive: str, web_based: bool, import_kwargs: di
:param archive: the path or URL to the archive
:param web_based: If the archive needs to be downloaded first
:param import_kwargs: keyword arguments to pass to the import function
:param try_migration: whether to try a migration if the import raises IncompatibleArchiveVersionError
:param try_migration: whether to try a migration if the import raises `IncompatibleStorageSchema`

"""
from aiida.common.folders import SandboxFolder
from aiida.tools.archive.abstract import get_format
from aiida.tools.archive.exceptions import IncompatibleArchiveVersionError
from aiida.tools.archive.imports import import_archive as _import_archive

archive_format = get_format()
Expand All @@ -452,7 +454,7 @@ def _import_archive_and_migrate(archive: str, web_based: bool, import_kwargs: di
echo.echo_report(f'starting import: {archive}')
try:
_import_archive(archive_path, archive_format=archive_format, **import_kwargs)
except IncompatibleArchiveVersionError as exception:
except IncompatibleStorageSchema as exception:
if try_migration:

echo.echo_report(f'incompatible version detected for {archive}, trying migration')
Expand Down
6 changes: 3 additions & 3 deletions aiida/manage/configuration/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,9 @@ def storage_cls(self) -> Type['StorageBackend']:
if self.storage_backend == 'psql_dos':
from aiida.storage.psql_dos.backend import PsqlDosBackend
return PsqlDosBackend
if self.storage_backend == 'archive.sqlite':
from aiida.tools.archive.implementations.sqlite.backend import ArchiveReadOnlyBackend
return ArchiveReadOnlyBackend
if self.storage_backend == 'sqlite_zip':
from aiida.storage.sqlite_zip.backend import SqliteZipBackend
return SqliteZipBackend
raise ValueError(f'unknown storage backend type: {self.storage_backend}')

@property
Expand Down
1 change: 1 addition & 0 deletions aiida/storage/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
from aiida.common.log import AIIDA_LOGGER

STORAGE_LOGGER = AIIDA_LOGGER.getChild('storage')
MIGRATE_LOGGER = STORAGE_LOGGER.getChild('migrate')
2 changes: 1 addition & 1 deletion aiida/storage/psql_dos/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
"""Module with implementation of the storage backend using SqlAlchemy and the disk-objectstore."""
"""Module with implementation of the storage backend using PostGreSQL and the disk-objectstore."""

# AUTO-GENERATED

Expand Down
14 changes: 2 additions & 12 deletions aiida/storage/psql_dos/migrations/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,8 @@ def run_migrations_online():

The connection should have been passed to the config, which we use to configue the migration context.
"""
from aiida.storage.psql_dos.models.base import get_orm_metadata

# pylint: disable=unused-import
from aiida.common.exceptions import DbContentError
from aiida.storage.psql_dos.models.authinfo import DbAuthInfo
from aiida.storage.psql_dos.models.base import Base
from aiida.storage.psql_dos.models.comment import DbComment
from aiida.storage.psql_dos.models.computer import DbComputer
from aiida.storage.psql_dos.models.group import DbGroup
from aiida.storage.psql_dos.models.log import DbLog
from aiida.storage.psql_dos.models.node import DbLink, DbNode
from aiida.storage.psql_dos.models.settings import DbSetting
from aiida.storage.psql_dos.models.user import DbUser
config = context.config # pylint: disable=no-member

connection = config.attributes.get('connection', None)
Expand All @@ -43,7 +33,7 @@ def run_migrations_online():

context.configure( # pylint: disable=no-member
connection=connection,
target_metadata=Base.metadata,
target_metadata=get_orm_metadata(),
transaction_per_migration=True,
aiida_profile=aiida_profile,
on_version_apply=on_version_apply
Expand Down
18 changes: 8 additions & 10 deletions aiida/storage/psql_dos/migrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

from aiida.common import exceptions
from aiida.manage.configuration.profile import Profile
from aiida.storage.log import MIGRATE_LOGGER
from aiida.storage.psql_dos.models.settings import DbSetting
from aiida.storage.psql_dos.utils import create_sqlalchemy_engine

Expand Down Expand Up @@ -197,8 +198,6 @@ def migrate(self) -> None:

:raises: :class:`~aiida.common.exceptions.UnreachableStorage` if the storage cannot be accessed
"""
from aiida.cmdline.utils import echo

# the database can be in one of a few states:
# 1. Completely empty -> we can simply initialise it with the current ORM schema
# 2. Legacy django database -> we transfer the version to alembic, migrate to the head of the django branch,
Expand All @@ -211,7 +210,7 @@ def migrate(self) -> None:
if not inspect(connection).has_table(self.alembic_version_tbl_name):
if not inspect(connection).has_table(self.django_version_table.name):
# the database is assumed to be empty, so we need to initialise it
echo.echo_report('initialising empty storage schema')
MIGRATE_LOGGER.report('initialising empty storage schema')
self.initialise()
return
# the database is a legacy django one,
Expand All @@ -238,10 +237,10 @@ def migrate(self) -> None:
if 'django' in branches or 'sqlalchemy' in branches:
# migrate up to the top of the respective legacy branches
if 'django' in branches:
echo.echo_report('Migrating to the head of the legacy django branch')
MIGRATE_LOGGER.report('Migrating to the head of the legacy django branch')
self.migrate_up('django@head')
elif 'sqlalchemy' in branches:
echo.echo_report('Migrating to the head of the legacy sqlalchemy branch')
MIGRATE_LOGGER.report('Migrating to the head of the legacy sqlalchemy branch')
self.migrate_up('sqlalchemy@head')
# now re-stamp with the comparable revision on the main branch
with self._connection_context() as connection:
Expand All @@ -251,7 +250,7 @@ def migrate(self) -> None:
connection.commit()

# finally migrate to the main head revision
echo.echo_report('Migrating to the head of the main branch')
MIGRATE_LOGGER.report('Migrating to the head of the main branch')
self.migrate_up('main@head')

def migrate_up(self, version: str) -> None:
Expand Down Expand Up @@ -284,7 +283,7 @@ def _alembic_script(cls):
return ScriptDirectory.from_config(cls._alembic_config())

@contextlib.contextmanager
def _alembic_connect(self, _connection: Optional[Connection] = None):
def _alembic_connect(self, _connection: Optional[Connection] = None) -> Iterator[Config]:
"""Context manager to return an instance of an Alembic configuration.

The profiles's database connection is added in the `attributes` property, through which it can then also be
Expand All @@ -297,16 +296,15 @@ def _alembic_connect(self, _connection: Optional[Connection] = None):

def _callback(step: MigrationInfo, **kwargs): # pylint: disable=unused-argument
"""Callback to be called after a migration step is executed."""
from aiida.cmdline.utils import echo
from_rev = step.down_revision_ids[0] if step.down_revision_ids else '<base>'
echo.echo_report(f'- {from_rev} -> {step.up_revision_id}')
MIGRATE_LOGGER.report(f'- {from_rev} -> {step.up_revision_id}')

config.attributes['on_version_apply'] = _callback # pylint: disable=unsupported-assignment-operation

yield config

@contextlib.contextmanager
def _migration_context(self, _connection: Optional[Connection] = None) -> MigrationContext:
def _migration_context(self, _connection: Optional[Connection] = None) -> Iterator[MigrationContext]:
"""Context manager to return an instance of an Alembic migration context.

This migration context will have been configured with the current database connection, which allows this context
Expand Down
33 changes: 33 additions & 0 deletions aiida/storage/sqlite_zip/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
"""Module with implementation of the storage backend,
using an SQLite database and repository files within a zipfile.

The content of the zip file is::

|- storage.zip
|- metadata.json
|- db.sqlite3
|- repo/
|- hashkey1
|- hashkey2
...

For quick access, the metadata (such as the version) is stored in a `metadata.json` file,
at the "top" of the zip file, with the sqlite database, just below it, then the repository files.
Repository files are named by their SHA256 content hash.

This storage method is primarily intended for the AiiDA archive,
as a read-only storage method.
This is because sqlite and zip are not suitable for concurrent write access.

The archive format originally used a JSON file to store the database,
and these revisions are handled by the `version_profile` and `migrate` backend methods.
"""
Loading