diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e5320f62db..eef8ff0a9b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -70,6 +70,7 @@ repos: pass_filenames: true files: >- (?x)^( + aiida/backends/control.py| aiida/common/progress_reporter.py| aiida/engine/.*py| aiida/manage/manager.py| diff --git a/aiida/backends/__init__.py b/aiida/backends/__init__.py index 81095dac98..e138a9beb7 100644 --- a/aiida/backends/__init__.py +++ b/aiida/backends/__init__.py @@ -9,6 +9,21 @@ ########################################################################### """Module for implementations of database backends.""" +# AUTO-GENERATED + +# yapf: disable +# pylint: disable=wildcard-import + +from .control import * + +__all__ = ( + 'MAINTAIN_LOGGER', +) + +# yapf: enable + +# END AUTO-GENERATED + BACKEND_DJANGO = 'django' BACKEND_SQLA = 'sqlalchemy' diff --git a/aiida/backends/control.py b/aiida/backends/control.py new file mode 100644 index 0000000000..73496e80b1 --- /dev/null +++ b/aiida/backends/control.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +"""Module for overall repository control commands.""" +# Note: these functions are not methods of `AbstractRepositoryBackend` because they need access to the orm. +# This is because they have to go through all the nodes to gather the list of keys that AiiDA is keeping +# track of (since they are descentralized in each node entry). +# See the get_unreferenced_keyset function +from typing import TYPE_CHECKING, Optional, Set + +from aiida.common.log import AIIDA_LOGGER +from aiida.manage.manager import get_manager + +if TYPE_CHECKING: + from aiida.orm.implementation import Backend + +__all__ = ('MAINTAIN_LOGGER',) + +MAINTAIN_LOGGER = AIIDA_LOGGER.getChild('maintain') + + +def repository_maintain( + full: bool = False, + dry_run: bool = False, + backend: Optional['Backend'] = None, + **kwargs, +) -> None: + """Performs maintenance tasks on the repository. + + :param full: + flag to perform operations that require to stop using the maintained profile. + + :param dry_run: + flag to only print the actions that would be taken without actually executing them. + + :param backend: + specific backend in which to apply the maintenance (defaults to current profile). + """ + + if backend is None: + backend = get_manager().get_backend() + repository = backend.get_repository() + + unreferenced_objects = get_unreferenced_keyset(aiida_backend=backend) + MAINTAIN_LOGGER.info(f'Deleting {len(unreferenced_objects)} unreferenced objects ...') + if not dry_run: + repository.delete_objects(list(unreferenced_objects)) + + MAINTAIN_LOGGER.info('Starting repository-specific operations ...') + repository.maintain(live=not full, dry_run=dry_run, **kwargs) + + +def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Optional['Backend'] = None) -> Set[str]: + """Returns the keyset of objects that exist in the repository but are not tracked by AiiDA. + + This should be all the soft-deleted files. + + :param check_consistency: + toggle for a check that raises if there are references in the database with no actual object in the + underlying repository. + + :param aiida_backend: + specific backend in which to apply the operation (defaults to current profile). + + :return: + a set with all the objects in the underlying repository that are not referenced in the database. + """ + from aiida import orm + MAINTAIN_LOGGER.info('Obtaining unreferenced object keys ...') + + if aiida_backend is None: + aiida_backend = get_manager().get_backend() + + repository = aiida_backend.get_repository() + + keyset_repository = set(repository.list_objects()) + keyset_database = set(orm.Node.objects(aiida_backend).iter_repo_keys()) + + if check_consistency: + keyset_missing = keyset_database - keyset_repository + if len(keyset_missing) > 0: + raise RuntimeError( + 'There are objects referenced in the database that are not present in the repository. Aborting!' + ) + + return keyset_repository - keyset_database + + +def get_repository_info(statistics: bool = False, backend: Optional['Backend'] = None) -> dict: + """Returns general information on the repository.""" + if backend is None: + backend = get_manager().get_backend() + repository = backend.get_repository() + return repository.get_info(statistics) diff --git a/aiida/backends/general/migrations/utils.py b/aiida/backends/general/migrations/utils.py index 139c522883..94c03075d7 100644 --- a/aiida/backends/general/migrations/utils.py +++ b/aiida/backends/general/migrations/utils.py @@ -122,6 +122,12 @@ def list_objects(self) -> Iterable[str]: def iter_object_streams(self, keys: List[str]): raise NotImplementedError() + def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None: + raise NotImplementedError + + def get_info(self, statistics: bool = False, **kwargs) -> dict: + raise NotImplementedError + def migrate_legacy_repository(shard=None): """Migrate the legacy file repository to the new disk object store and return mapping of repository metadata. diff --git a/aiida/cmdline/commands/cmd_storage.py b/aiida/cmdline/commands/cmd_storage.py index 58b7c96163..a11c3a128b 100644 --- a/aiida/cmdline/commands/cmd_storage.py +++ b/aiida/cmdline/commands/cmd_storage.py @@ -84,8 +84,58 @@ def storage_integrity(): @click.option('--statistics', is_flag=True, help='Provides more in-detail statistically relevant data.') def storage_info(statistics): """Summarise the contents of the storage.""" + from aiida.backends.control import get_repository_info from aiida.cmdline.utils.common import get_database_summary from aiida.orm import QueryBuilder - data = get_database_summary(QueryBuilder, statistics) + data = { + 'database': get_database_summary(QueryBuilder, statistics), + 'repository': get_repository_info(statistics=statistics), + } + echo.echo_dictionary(data, sort_keys=False, fmt='yaml') + + +@verdi_storage.command('maintain') +@click.option( + '--full', + is_flag=True, + help='Perform all maintenance tasks, including the ones that should not be executed while the profile is in use.' +) +@click.option( + '--dry-run', + is_flag=True, + help= + 'Run the maintenance in dry-run mode which will print actions that would be taken without actually executing them.' +) +def storage_maintain(full, dry_run): + """Performs maintenance tasks on the repository.""" + from aiida.backends.control import repository_maintain + + if full: + echo.echo_warning( + '\nIn order to safely perform the full maintenance operations on the internal storage, no other ' + 'process should be using the AiiDA profile being maintained. ' + 'This includes daemon workers, verdi shells, scripts with the profile loaded, etc). ' + 'Please make sure there is nothing like this currently running and that none is started until ' + 'these procedures conclude. ' + 'For performing maintanance operations that are safe to run while actively using AiiDA, just run ' + '`verdi storage maintain`, without the `--full` flag.\n' + ) + + else: + echo.echo( + '\nThis command will perform all maintenance operations on the internal storage that can be safely ' + 'executed while still running AiiDA. ' + 'However, not all operations that are required to fully optimize disk usage and future performance ' + 'can be done in this way. ' + 'Whenever you find the time or opportunity, please consider running `verdi repository maintenance ' + '--full` for a more complete optimization.\n' + ) + + if not dry_run: + if not click.confirm('Are you sure you want continue in this mode?'): + return + + repository_maintain(full=full, dry_run=dry_run) + echo.echo_success('Requested maintenance procedures finished.') diff --git a/aiida/repository/backend/abstract.py b/aiida/repository/backend/abstract.py index 7903cc60c6..135b901be8 100644 --- a/aiida/repository/backend/abstract.py +++ b/aiida/repository/backend/abstract.py @@ -119,6 +119,29 @@ def list_objects(self) -> Iterable[str]: :return: An iterable for all the available object keys. """ + @abc.abstractmethod + def get_info(self, statistics: bool = False, **kwargs) -> dict: + """Returns relevant information about the content of the repository. + + :param statistics: + flag to enable extra information (statistics=False by default, only returns basic information). + + :return: a dictionary with the information. + """ + + @abc.abstractmethod + def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None: + """Performs maintenance operations. + + :param dry_run: + flag to only print the actions that would be taken without actually executing them. + + :param live: + flag to indicate to the backend whether AiiDA is live or not (i.e. if the profile of the + backend is currently being used/accessed). The backend is expected then to only allow (and + thus set by default) the operations that are safe to perform in this state. + """ + @contextlib.contextmanager def open(self, key: str) -> Iterator[BinaryIO]: """Open a file handle to an object stored under the given key. diff --git a/aiida/repository/backend/disk_object_store.py b/aiida/repository/backend/disk_object_store.py index 16b06d9dce..66e3e886c5 100644 --- a/aiida/repository/backend/disk_object_store.py +++ b/aiida/repository/backend/disk_object_store.py @@ -12,6 +12,8 @@ __all__ = ('DiskObjectStoreRepositoryBackend',) +BYTES_TO_MB = 1 / 1024**2 + class DiskObjectStoreRepositoryBackend(AbstractRepositoryBackend): """Implementation of the ``AbstractRepositoryBackend`` using the ``disk-object-store`` as the backend.""" @@ -118,3 +120,96 @@ def get_object_hash(self, key: str) -> str: if self.container.hash_type != 'sha256': return super().get_object_hash(key) return key + + def maintain( # type: ignore # pylint: disable=arguments-differ,too-many-branches + self, + dry_run: bool = False, + live: bool = True, + pack_loose: bool = None, + do_repack: bool = None, + clean_storage: bool = None, + do_vacuum: bool = None, + ) -> dict: + """Performs maintenance operations. + + :param live: + if True, will only perform operations that are safe to do while the repository is in use. + :param pack_loose: + flag for forcing the packing of loose files. + :param do_repack: + flag for forcing the re-packing of already packed files. + :param clean_storage: + flag for forcing the cleaning of soft-deleted files from the repository. + :param do_vacuum: + flag for forcing the vacuuming of the internal database when cleaning the repository. + :return: + a dictionary with information on the operations performed. + """ + from aiida.backends.control import MAINTAIN_LOGGER + DOSTORE_LOGGER = MAINTAIN_LOGGER.getChild('disk_object_store') # pylint: disable=invalid-name + + if live and (do_repack or clean_storage or do_vacuum): + overrides = {'do_repack': do_repack, 'clean_storage': clean_storage, 'do_vacuum': do_vacuum} + keys = ', '.join([key for key, override in overrides if override is True]) # type: ignore + raise ValueError(f'The following overrides were enabled but cannot be if `live=True`: {keys}') + + pack_loose = True if pack_loose is None else pack_loose + + if live: + do_repack = False + clean_storage = False + do_vacuum = False + else: + do_repack = True if do_repack is None else do_repack + clean_storage = True if clean_storage is None else clean_storage + do_vacuum = True if do_vacuum is None else do_vacuum + + if pack_loose: + files_numb = self.container.count_objects()['loose'] + files_size = self.container.get_total_size()['total_size_loose'] * BYTES_TO_MB + DOSTORE_LOGGER.report(f'Packing all loose files ({files_numb} files occupying {files_size} MB) ...') + if not dry_run: + self.container.pack_all_loose() + + if do_repack: + files_numb = self.container.count_objects()['packed'] + files_size = self.container.get_total_size()['total_size_packfiles_on_disk'] * BYTES_TO_MB + DOSTORE_LOGGER.report( + f'Re-packing all pack files ({files_numb} files in packs, occupying {files_size} MB) ...' + ) + if not dry_run: + self.container.repack() + + if clean_storage: + DOSTORE_LOGGER.report(f'Cleaning the repository database (with `vacuum={do_vacuum}`) ...') + if not dry_run: + self.container.clean_storage(vacuum=do_vacuum) + + + def get_info( # type: ignore # pylint: disable=arguments-differ + self, + statistics=False, + ) -> dict: + + output_info = {} + output_info['SHA-hash algorithm'] = self.container.hash_type + output_info['Compression algorithm'] = self.container.compression_algorithm + + if not statistics: + return output_info + + files_data = self.container.count_objects() + size_data = self.container.get_total_size() + + output_info['Packs'] = files_data['pack_files'] # type: ignore + + output_info['Objects'] = { # type: ignore + 'unpacked': files_data['loose'], + 'packed': files_data['packed'], + } + output_info['Size (MB)'] = { # type: ignore + 'unpacked': size_data['total_size_loose'] * BYTES_TO_MB, + 'packed': size_data['total_size_packfiles_on_disk'] * BYTES_TO_MB, + 'other': size_data['total_size_packindexes_on_disk'] * BYTES_TO_MB, + } + return output_info diff --git a/aiida/repository/backend/sandbox.py b/aiida/repository/backend/sandbox.py index 6d17ffc87d..ed4d46407f 100644 --- a/aiida/repository/backend/sandbox.py +++ b/aiida/repository/backend/sandbox.py @@ -115,3 +115,9 @@ def delete_objects(self, keys: List[str]) -> None: def list_objects(self) -> Iterable[str]: return self.sandbox.get_content_list() + + def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None: + raise NotImplementedError + + def get_info(self, statistics: bool = False, **kwargs) -> dict: + raise NotImplementedError diff --git a/aiida/tools/archive/implementations/sqlite/backend.py b/aiida/tools/archive/implementations/sqlite/backend.py index 52fdeb5f36..a089633bf0 100644 --- a/aiida/tools/archive/implementations/sqlite/backend.py +++ b/aiida/tools/archive/implementations/sqlite/backend.py @@ -193,6 +193,12 @@ def delete_objects(self, keys: List[str]) -> None: def get_object_hash(self, key: str) -> str: return key + def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None: + raise NotImplementedError + + def get_info(self, statistics: bool = False, **kwargs) -> dict: + raise NotImplementedError + class ArchiveBackendQueryBuilder(SqlaQueryBuilder): """Archive query builder""" diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst index 9f2dc32d1a..4e3303a759 100644 --- a/docs/source/reference/command_line.rst +++ b/docs/source/reference/command_line.rst @@ -553,6 +553,7 @@ Below is a list with all available subcommands. Commands: info Summarise the contents of the storage. integrity Checks for the integrity of the data storage. + maintain Performs maintenance tasks on the repository. migrate Migrate the storage to the latest schema version. diff --git a/tests/backends/test_control.py b/tests/backends/test_control.py new file mode 100644 index 0000000000..cc111b8266 --- /dev/null +++ b/tests/backends/test_control.py @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +"""Tests for the :mod:`aiida.backends.control` module.""" +import pytest + +from aiida.manage.manager import get_manager + + +class MockRepositoryBackend(): + """Mock of the RepositoryBackend for testing purposes.""" + + # pylint: disable=no-self-use + + def get_info(self, *args, **kwargs): + """Method to return information.""" + return 'this is information about the repo' + + def delete_objects(self, *args, **kwargs): + """Method to delete objects.""" + + def maintain(self, live=True, dry_run=False, **kwargs): + """Method to perform maintainance operations.""" + + if live: + raise ValueError('Signal that live=True') + + elif dry_run: + raise ValueError('Signal that dry_run=True') + + elif len(kwargs) > 0: + raise ValueError('Signal that kwargs were passed') + + else: + raise ValueError('Signal that live and dry_run are False') + + +@pytest.fixture(scope='function') +def clear_storage_before_test(clear_database_before_test): # pylint: disable=unused-argument + """Clears the storage before a test.""" + repository = get_manager().get_backend().get_repository() + object_keys = list(repository.list_objects()) + repository.delete_objects(object_keys) + repository.maintain(live=False) + + +@pytest.mark.usefixtures('clear_storage_before_test') +def test_get_unreferenced_keyset(): + """Test the ``get_unreferenced_keyset`` method.""" + # NOTE: This tests needs to use the database because there is a call inside + # `get_unreferenced_keyset` that would require to mock a very complex class + # structure: + # + # keyset_aiidadb = set(orm.Node.objects(aiida_backend).iter_repo_keys()) + # + # Ideally this should eventually be replaced by a more direct call to the + # method of a single object that would be easier to mock and would allow + # to unit-test this in isolation. + from io import BytesIO + + from aiida import orm + from aiida.backends.control import get_unreferenced_keyset + + # Coverage code pass + unreferenced_keyset = get_unreferenced_keyset() + assert unreferenced_keyset == set() + + # Error catching: put a file, get the keys from the aiida db, manually delete the keys + # in the repository + datanode = orm.FolderData() + datanode.put_object_from_filelike(BytesIO(b'File content'), 'file.txt') + datanode.store() + + aiida_backend = get_manager().get_backend() + keys = list(orm.Node.objects(aiida_backend).iter_repo_keys()) + + repository_backend = aiida_backend.get_repository() + repository_backend.delete_objects(keys) + + with pytest.raises( + RuntimeError, match='There are objects referenced in the database that are not present in the repository' + ) as exc: + get_unreferenced_keyset() + assert 'aborting' in str(exc.value).lower() + + +#yapf: disable +@pytest.mark.parametrize(('kwargs', 'logged_texts'), ( + ( + {}, + [' > live: True', ' > dry_run: False'] + ), + ( + {'full': True, 'dry_run': True}, + [' > live: False', ' > dry_run: True'] + ), + ( + {'extra_kwarg': 'molly'}, + [' > live: True', ' > dry_run: False', ' > extra_kwarg: molly'] + ), +)) +# yapf: enable +@pytest.mark.usefixtures('clear_storage_before_test') +def test_repository_maintain(caplog, monkeypatch, kwargs, logged_texts): + """Test the ``repository_maintain`` method.""" + import logging + + from aiida.backends.control import repository_maintain + + def mock_maintain(self, live=True, dry_run=False, **kwargs): # pylint: disable=unused-argument + logmsg = 'keywords provided:\n' + logmsg += f' > live: {live}\n' + logmsg += f' > dry_run: {dry_run}\n' + for key, val in kwargs.items(): + logmsg += f' > {key}: {val}\n' + logging.info(logmsg) + + RepoBackendClass = get_manager().get_backend().get_repository().__class__ # pylint: disable=invalid-name + monkeypatch.setattr(RepoBackendClass, 'maintain', mock_maintain) + + with caplog.at_level(logging.INFO): + repository_maintain(**kwargs) + + message_list = caplog.records[0].msg.splitlines() + for text in logged_texts: + assert text in message_list + + +def test_repository_info(monkeypatch): + """Test the ``repository_info`` method.""" + from aiida.backends.control import get_repository_info + + def mock_get_info(self, statistics=False, **kwargs): # pylint: disable=unused-argument + output = {'value': 42} + if statistics: + output['extra_value'] = 0 + return output + + RepoBackendClass = get_manager().get_backend().get_repository().__class__ # pylint: disable=invalid-name + monkeypatch.setattr(RepoBackendClass, 'get_info', mock_get_info) + + repository_info_out = get_repository_info() + assert 'value' in repository_info_out + assert 'extra_value' not in repository_info_out + assert repository_info_out['value'] == 42 + + repository_info_out = get_repository_info(statistics=True) + assert 'value' in repository_info_out + assert 'extra_value' in repository_info_out + assert repository_info_out['value'] == 42 + assert repository_info_out['extra_value'] == 0 diff --git a/tests/cmdline/commands/test_storage.py b/tests/cmdline/commands/test_storage.py index da1371c18a..e7737b3c18 100644 --- a/tests/cmdline/commands/test_storage.py +++ b/tests/cmdline/commands/test_storage.py @@ -103,3 +103,39 @@ def mocked_migrate(self): # pylint: disable=no-self-use assert result.exc_info[0] is SystemExit assert 'Critical:' in result.output assert 'passed error message' in result.output + + +def tests_storage_maintain_logging(run_cli_command, monkeypatch, caplog): + """Test all the information and cases of the storage maintain command.""" + import logging + + from aiida.backends import control + + def mock_maintain(**kwargs): + logmsg = 'Provided kwargs:\n' + for key, val in kwargs.items(): + logmsg += f' > {key}: {val}\n' + logging.info(logmsg) + + monkeypatch.setattr(control, 'repository_maintain', mock_maintain) + + with caplog.at_level(logging.INFO): + _ = run_cli_command(cmd_storage.storage_maintain, user_input='Y') + + message_list = caplog.records[0].msg.splitlines() + assert ' > full: False' in message_list + assert ' > dry_run: False' in message_list + + with caplog.at_level(logging.INFO): + _ = run_cli_command(cmd_storage.storage_maintain, options=['--dry-run']) + + message_list = caplog.records[1].msg.splitlines() + assert ' > full: False' in message_list + assert ' > dry_run: True' in message_list + + with caplog.at_level(logging.INFO): + run_cli_command(cmd_storage.storage_maintain, options=['--full'], user_input='Y') + + message_list = caplog.records[2].msg.splitlines() + assert ' > full: True' in message_list + assert ' > dry_run: False' in message_list diff --git a/tests/repository/backend/test_abstract.py b/tests/repository/backend/test_abstract.py index 9fb177c0e7..a8c5aee836 100644 --- a/tests/repository/backend/test_abstract.py +++ b/tests/repository/backend/test_abstract.py @@ -14,11 +14,11 @@ class RepositoryBackend(AbstractRepositoryBackend): """Concrete implementation of ``AbstractRepositoryBackend``.""" @property - def uuid(self) -> Optional[str]: + def key_format(self) -> Optional[str]: return None @property - def key_format(self) -> Optional[str]: + def uuid(self) -> Optional[str]: return None def initialise(self, **kwargs) -> None: @@ -35,7 +35,7 @@ def _put_object_from_filelike(self, handle: BinaryIO) -> str: pass # pylint useless-super-delegation needs to be disabled here because it refuses to - # recognize that this is an abstract method and thus has to be overriden. See the + # recognize that this is an abstract method and thus has to be overwritten. See the # following issue: https://github.com/PyCQA/pylint/issues/1594 def delete_objects(self, keys: List[str]) -> None: # pylint: disable=useless-super-delegation super().delete_objects(keys) @@ -49,6 +49,12 @@ def list_objects(self) -> Iterable[str]: def iter_object_streams(self, keys: List[str]): raise NotImplementedError + def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None: + raise NotImplementedError + + def get_info(self, statistics: bool = False, **kwargs) -> dict: + raise NotImplementedError + @pytest.fixture(scope='function') def repository(): @@ -103,7 +109,7 @@ def test_put_object_from_file(repository, generate_directory): def test_passes_to_batch(repository, monkeypatch): - """Checks that the single object operations call the batch operations""" + """Checks that the single object operations call the batch operations.""" def mock_batch_operation(self, keys): raise NotImplementedError('this method was intentionally not implemented') @@ -122,7 +128,7 @@ def mock_batch_operation(self, keys): def test_delete_objects_test(repository, monkeypatch): - """Checks that the super of delete_objects will check for existence of the files""" + """Checks that the super of delete_objects will check for existence of the files.""" def has_objects_mock(self, keys): # pylint: disable=unused-argument return [False for key in keys] diff --git a/tests/repository/backend/test_disk_object_store.py b/tests/repository/backend/test_disk_object_store.py index 113c55f4a9..52e320b01f 100644 --- a/tests/repository/backend/test_disk_object_store.py +++ b/tests/repository/backend/test_disk_object_store.py @@ -21,6 +21,30 @@ def repository(tmp_path): yield DiskObjectStoreRepositoryBackend(container=container) +@pytest.fixture(scope='function') +def populated_repository(repository): + """Initializes the storage and database with minimal population.""" + from io import BytesIO + repository.initialise() + + content = BytesIO(b'Packed file number 1') + repository.put_object_from_filelike(content) + + content = BytesIO(b'Packed file number 2') + repository.put_object_from_filelike(content) + + repository.maintain(live=False) + + content = BytesIO(b'Packed file number 3 (also loose)') + repository.put_object_from_filelike(content) + + repository.maintain(live=True) + + content = BytesIO(b'Unpacked file') + repository.put_object_from_filelike(content) + yield repository + + def test_str(repository): """Test the ``__str__`` method.""" assert str(repository) @@ -184,3 +208,95 @@ def test_key_format(repository): """Test the ``key_format`` property.""" repository.initialise() assert repository.key_format == repository.container.hash_type + + +def test_get_info(populated_repository): + """Test the ``get_info`` method.""" + repository_info = populated_repository.get_info() + assert 'SHA-hash algorithm' in repository_info + assert 'Compression algorithm' in repository_info + assert repository_info['SHA-hash algorithm'] == 'sha256' + assert repository_info['Compression algorithm'] == 'zlib+1' + + repository_info = populated_repository.get_info(statistics=True) + assert 'SHA-hash algorithm' in repository_info + assert 'Compression algorithm' in repository_info + assert repository_info['SHA-hash algorithm'] == 'sha256' + assert repository_info['Compression algorithm'] == 'zlib+1' + + assert 'Packs' in repository_info + assert repository_info['Packs'] == 1 + + assert 'Objects' in repository_info + assert 'unpacked' in repository_info['Objects'] + assert 'packed' in repository_info['Objects'] + assert repository_info['Objects']['unpacked'] == 2 + assert repository_info['Objects']['packed'] == 3 + + assert 'Size (MB)' in repository_info + assert 'unpacked' in repository_info['Size (MB)'] + assert 'packed' in repository_info['Size (MB)'] + assert 'other' in repository_info['Size (MB)'] + + +#yapf: disable +@pytest.mark.parametrize(('kwargs', 'output_info'), ( + ( + {'live': True}, + {'unpacked': 2, 'packed': 4} + ), + ( + {'live': False}, + {'unpacked': 0, 'packed': 4} + ), + ( + {'live': False, 'do_vacuum': False}, + {'unpacked': 0, 'packed': 4} + ), + ( + { + 'live': False, + 'pack_loose': False, + 'do_repack': False, + 'clean_storage': False, + 'do_vacuum': False, + }, + {'unpacked': 2, 'packed': 3} + ), +)) +# yapf: enable +def test_maintain(populated_repository, kwargs, output_info): + """Test the ``maintain`` method.""" + populated_repository.maintain(**kwargs) + file_info = populated_repository.container.count_objects() + assert file_info['loose'] == output_info['unpacked'] + assert file_info['packed'] == output_info['packed'] + + +@pytest.mark.parametrize('do_vacuum', [True, False]) +def test_maintain_logging(caplog, populated_repository, do_vacuum): + """Test the logging of the ``maintain`` method.""" + populated_repository.maintain(live=False, do_vacuum=do_vacuum) + + list_of_logmsg = [] + for record in caplog.records: + assert record.levelname == 'REPORT' + assert record.name == 'aiida.maintain.disk_object_store' + list_of_logmsg.append(record.msg) + + assert 'packing' in list_of_logmsg[0].lower() + assert 're-packing' in list_of_logmsg[1].lower() + assert 'cleaning' in list_of_logmsg[2].lower() + + if do_vacuum: + assert 'vacuum=true' in list_of_logmsg[2].lower() + else: + assert 'vacuum=false' in list_of_logmsg[2].lower() + + +@pytest.mark.parametrize('kwargs', [{'do_repack': True}, {'clean_storage': True}, {'do_vacuum': True}]) +def test_maintain_live_overload(populated_repository, kwargs): + """Test the ``maintain`` method.""" + + with pytest.raises(ValueError): + populated_repository.maintain(live=True, **kwargs)