Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ADD: New repository CLI #4965

Merged
merged 19 commits into from
Dec 9, 2021
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ repos:
pass_filenames: true
files: >-
(?x)^(
aiida/backends/control.py|
aiida/common/progress_reporter.py|
aiida/engine/.*py|
aiida/manage/manager.py|
Expand Down
15 changes: 15 additions & 0 deletions aiida/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,21 @@
###########################################################################
"""Module for implementations of database backends."""

# AUTO-GENERATED

# yapf: disable
# pylint: disable=wildcard-import

from .control import *

__all__ = (
'MAINTAIN_LOGGER',
)

# yapf: enable

# END AUTO-GENERATED

BACKEND_DJANGO = 'django'
BACKEND_SQLA = 'sqlalchemy'

Expand Down
80 changes: 80 additions & 0 deletions aiida/backends/control.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# -*- coding: utf-8 -*-
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
"""Module for overall repository control commands."""
# Note: these functions are not methods of `AbstractRepositoryBackend` because they need access to the orm.
# This is because they have to go through all the nodes to gather the list of keys that AiiDA is keeping
# track of (since they are descentralized in each node entry).
# See the get_unreferenced_keyset function
from typing import Optional, Set

from aiida.common.log import AIIDA_LOGGER
from aiida.manage.manager import get_manager
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
from aiida.manage.manager import get_manager
from typing import Optional, Set
from aiida.manage.manager import get_manager

from aiida.orm.implementation import Backend

__all__ = ('MAINTAIN_LOGGER',)
sphuber marked this conversation as resolved.
Show resolved Hide resolved

MAINTAIN_LOGGER = AIIDA_LOGGER.getChild('maintain')
sphuber marked this conversation as resolved.
Show resolved Hide resolved


def repository_maintain(
full: bool = False,
dry_run: bool = False,
backend: Optional[Backend] = None,
**kwargs,
) -> dict:
"""Performs maintenance tasks on the repository."""
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved

if backend is None:
backend = get_manager().get_backend()
repository = backend.get_repository()

unreferenced_objects = get_unreferenced_keyset(aiida_backend=backend)
if dry_run:
MAINTAIN_LOGGER.info(f'Would delete {len(unreferenced_objects)} unreferenced objects ...')
else:
MAINTAIN_LOGGER.info(f'Deleting {len(unreferenced_objects)} unreferenced objects ...')
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved
repository.delete_objects(list(unreferenced_objects))

MAINTAIN_LOGGER.info('Starting repository-specific operations ...')
repository.maintain(live=not full, dry_run=dry_run, **kwargs)


def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Optional[Backend] = None) -> Set[str]:
"""Returns the keyset of objects that exist in the repository but are not tracked by AiiDA.
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved

This should be all the soft-deleted files.
"""
from aiida import orm
MAINTAIN_LOGGER.info('Obtaining unreferenced object keys ...')

if aiida_backend is None:
aiida_backend = get_manager().get_backend()

repository = aiida_backend.get_repository()

keyset_backend = set(repository.list_objects())
keyset_aiidadb = set(orm.Node.objects(aiida_backend).iter_repo_keys())
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved

if check_consistency:
keyset_missing = keyset_aiidadb - keyset_backend
if len(keyset_missing) > 0:
raise RuntimeError(
'There are objects referenced in the database that are not present in the repository. Aborting!'
)

return keyset_backend - keyset_aiidadb


def get_repository_info(statistics: bool = False, backend: Optional[Backend] = None) -> dict:
"""Returns general information on the repository."""
if backend is None:
backend = get_manager().get_backend()
repository = backend.get_repository()
return repository.get_info(statistics)
6 changes: 6 additions & 0 deletions aiida/backends/general/migrations/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,12 @@ def list_objects(self) -> Iterable[str]:
def iter_object_streams(self, keys: List[str]):
raise NotImplementedError()

def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None:
raise NotImplementedError

def get_info(self, statistics: bool = False, **kwargs) -> dict:
raise NotImplementedError


def migrate_legacy_repository(shard=None):
"""Migrate the legacy file repository to the new disk object store and return mapping of repository metadata.
Expand Down
53 changes: 52 additions & 1 deletion aiida/cmdline/commands/cmd_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,59 @@ def storage_integrity():
@click.option('--statistics', is_flag=True, help='Provides more in-detail statistically relevant data.')
def storage_info(statistics):
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved
"""Summarise the contents of the storage."""
from aiida.backends.control import get_repository_info
from aiida.cmdline.utils.common import get_database_summary
from aiida.orm import QueryBuilder

data = get_database_summary(QueryBuilder, statistics)
data = {}
data['database'] = get_database_summary(QueryBuilder, statistics)
data['repository'] = get_repository_info(statistics=statistics)
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved

echo.echo_dictionary(data, sort_keys=False, fmt='yaml')


@verdi_storage.command('maintain')
@click.option(
'--full',
is_flag=True,
help='Perform all maintenance tasks, including the ones that should not be executed while the profile is in use.'
)
@click.option(
'--dry-run',
is_flag=True,
help='Returns information that allows to estimate the impact of the maintenance operations.'
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved
)
def storage_maintain(full, dry_run):
"""Performs maintenance tasks on the repository."""
from aiida.backends.control import repository_maintain

if dry_run and full:
echo.echo_critical('You cannot request both `--dry-run` and `--full` at the same time.')
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved

if full:
echo.echo_warning(
'\nIn order to safely perform the full maintenance operations on the internal storage, no other '
'process should be using the AiiDA profile being maintained. '
'This includes daemon workers, verdi shells, scripts with the profile loaded, etc). '
'Please make sure there is nothing like this currently running and that none is started until '
'these procedures conclude. '
'For performing maintanance operations that are safe to run while actively using AiiDA, just run '
'`verdi storage maintain`, without the `--full` flag.\n'
)

elif not dry_run:
echo.echo(
'\nThis command will perform all maintenance operations on the internal storage that can be safely '
'executed while still running AiiDA. '
'However, not all operations that are required to fully optimize disk usage and future performance '
'can be done in this way. '
'Whenever you find the time or opportunity, please consider running `verdi repository maintenance '
'--full` for a more complete optimization.\n'
)

if not dry_run:
if not click.confirm('Are you sure you want continue in this mode?'):
return

repository_maintain(full=full, dry_run=dry_run)
echo.echo('\nRequested maintainance procedures finished.')
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved
25 changes: 25 additions & 0 deletions aiida/repository/backend/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,31 @@ def list_objects(self) -> Iterable[str]:
:return: An iterable for all the available object keys.
"""

@abc.abstractmethod
def get_info(self, statistics: bool = False, **kwargs) -> dict:
"""Returns relevant information about the content of the repository.

:param statistics:
flag to enable extra information (statistics=False by default, only returns basic information).

:return: a dictionary with the information.
"""

@abc.abstractmethod
def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None:
"""Performs maintenance operations.

:param dry_run:
flag to only run
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved

:param live:
flag to indicate to the backend whether AiiDA is live or not (i.e. if the profile of the
backend is currently being used/accessed). The backend is expected then to only allow (and
thus set by default) the operations that are safe to perform in this state.

:return: None
"""

@contextlib.contextmanager
def open(self, key: str) -> Iterator[BinaryIO]:
"""Open a file handle to an object stored under the given key.
Expand Down
115 changes: 115 additions & 0 deletions aiida/repository/backend/disk_object_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

__all__ = ('DiskObjectStoreRepositoryBackend',)

BYTES_TO_MB = 9.53674316E-7
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved


class DiskObjectStoreRepositoryBackend(AbstractRepositoryBackend):
"""Implementation of the ``AbstractRepositoryBackend`` using the ``disk-object-store`` as the backend."""
Expand Down Expand Up @@ -118,3 +120,116 @@ def get_object_hash(self, key: str) -> str:
if self.container.hash_type != 'sha256':
return super().get_object_hash(key)
return key

def maintain( # type: ignore # pylint: disable=arguments-differ,too-many-branches
self,
dry_run: bool = False,
live: bool = True,
override_pack_loose: bool = None,
override_do_repack: bool = None,
override_clean_storage: bool = None,
override_do_vacuum: bool = None,
) -> dict:
"""Performs maintenance operations.

:param full:
a flag to perform operations that require to stop using the maintained profile.
:param override_pack_loose:
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved
override flag for forcing the packing of loose files.
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved
:param override_do_repack:
override flag for forcing the re-packing of already packed files.
:param override_clean_storage:
override flag for forcing the cleaning of soft-deleted files from the repository.
:param override_do_vacuum:
override flag for forcing the vacuuming of the internal database when cleaning the repository.
:return:
a dictionary with information on the operations performed.
"""
from aiida.backends.control import MAINTAIN_LOGGER
DOSTORE_LOGGER = MAINTAIN_LOGGER.getChild('disk_object_store') # pylint: disable=invalid-name

pack_loose = True
do_repack = not live
clean_storage = not live
do_vacuum = not live

if live:
if override_do_repack or override_clean_storage or override_do_vacuum:
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved
errmsg = 'a specifically resquest keyword cannot be applied while the profile is in use:\n'
if override_do_repack is not None:
errmsg = ' > override_do_repack = {override_do_repack}\n'
if override_clean_storage is not None:
errmsg = ' > override_clean_storage = {override_clean_storage}\n'
if override_do_vacuum is not None:
errmsg = ' > override_do_vacuum = {override_do_vacuum}\n'
raise ValueError(errmsg)
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved

if override_pack_loose is not None:
pack_loose = override_pack_loose

if override_do_repack is not None:
do_repack = override_do_repack

if override_clean_storage is not None:
clean_storage = override_clean_storage

if override_do_vacuum is not None:
do_vacuum = override_do_vacuum

if pack_loose:
files_numb = self.container.count_objects()['loose']
files_size = self.container.get_total_size()['total_size_loose'] * BYTES_TO_MB
if dry_run:
DOSTORE_LOGGER.report(f'Would pack all loose files ({files_numb} files occupying {files_size} MB) ...')
else:
DOSTORE_LOGGER.report(f'Packing all loose files ({files_numb} files occupying {files_size} MB) ...')
self.container.pack_all_loose()

if do_repack:
files_numb = self.container.count_objects()['packed']
files_size = self.container.get_total_size()['total_size_packfiles_on_disk'] * BYTES_TO_MB
Comment on lines +175 to +176
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This information is not really adding anything specific to the maintenance operation is it? It just gives the current size, but that doesn't tell what it will be nor what will be saved. Only the latter would be really interesting IMO

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, it can help give you an idea of how long it might take to do the repacking. But ok, I can take it out if you prefer.

if dry_run:
DOSTORE_LOGGER.report(
f'Would re-pack all pack files ({files_numb} files in packs, occupying {files_size} MB) ...'
)
else:
DOSTORE_LOGGER.report(
f'Re-packing all pack files ({files_numb} files in packs, occupying {files_size} MB) ...'
)
self.container.repack()

if clean_storage:
if dry_run:
DOSTORE_LOGGER.report(f'Would clean the repository database (with `vacuum={do_vacuum}`) ...')
else:
DOSTORE_LOGGER.report(f'Cleaning the repository database (with `vacuum={do_vacuum}`) ...')
self.container.clean_storage(vacuum=do_vacuum)


def get_info( # type: ignore # pylint: disable=arguments-differ
self,
statistics=False,
) -> dict:

output_info = {}
output_info['SHA-hash algorithm'] = self.container.hash_type
output_info['Compression algorithm'] = self.container.compression_algorithm

if not statistics:
return output_info

files_data = self.container.count_objects()
size_data = self.container.get_total_size()

output_info['Packs'] = files_data['pack_files'] # type: ignore

output_info['Objects'] = { # type: ignore
'unpacked': files_data['loose'],
'packed': files_data['packed'],
}
output_info['Size (MB)'] = { # type: ignore
'unpacked': size_data['total_size_loose'] * BYTES_TO_MB,
'packed': size_data['total_size_packfiles_on_disk'] * BYTES_TO_MB,
'other': size_data['total_size_packindexes_on_disk'] * BYTES_TO_MB,
}
return output_info
6 changes: 6 additions & 0 deletions aiida/repository/backend/sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,9 @@ def delete_objects(self, keys: List[str]) -> None:

def list_objects(self) -> Iterable[str]:
return self.sandbox.get_content_list()

def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None:
raise NotImplementedError

def get_info(self, statistics: bool = False, **kwargs) -> dict:
raise NotImplementedError
6 changes: 6 additions & 0 deletions aiida/tools/archive/implementations/sqlite/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,12 @@ def delete_objects(self, keys: List[str]) -> None:
def get_object_hash(self, key: str) -> str:
return key

def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None:
raise NotImplementedError

def get_info(self, statistics: bool = False, **kwargs) -> dict:
raise NotImplementedError


class ArchiveBackendQueryBuilder(SqlaQueryBuilder):
"""Archive query builder"""
Expand Down
1 change: 1 addition & 0 deletions docs/source/reference/command_line.rst
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,7 @@ Below is a list with all available subcommands.
Commands:
info Summarise the contents of the storage.
integrity Checks for the integrity of the data storage.
maintain Performs maintenance tasks on the repository.
migrate Migrate the storage to the latest schema version.


Expand Down
Loading