From 83155d22a13810fd32b3e8169763df77d6f9a146 Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Wed, 6 Oct 2021 13:49:55 +0200 Subject: [PATCH] `Data`: allow `source` to be passed as a keyword argument (#5163) The `Data` class defines the `source` property which allows to get and set the `source` attribute. It was designed to hold information on the source of the data if it came directly from an external database. Prior to `aiida-core` v1.0 it was possible to set this attribute directly through the constructor as a keyword argument, but due to the refactoring of the ORM, this was no longer possible. It went undetected because it was never tested and the two data types where this feature is typically used, the `UpfData` and `CifData`, implemented the keyword argument themselves, hiding the fact that `Data` no longer supported it. The `Data` constructor signature is updated to once again recognize the `source` keyword argument, and the custom implementation is removed from the `UpfData` and `CifData` plugins which now simply rely on the super class. A section is added to the "How to work with Data" section in the docs to document this feature and to recommend its usage when constructing data nodes by hand from external databases. --- aiida/orm/nodes/data/cif.py | 16 +-------- aiida/orm/nodes/data/data.py | 6 ++++ aiida/orm/nodes/data/upf.py | 13 ------- aiida/plugins/__init__.py | 1 + aiida/plugins/entry_point.py | 2 +- docs/source/howto/data.rst | 35 ++++++++++++++++++ docs/source/nitpick-exceptions | 1 + tests/orm/data/test_data.py | 66 ++++++++++++++++++++-------------- 8 files changed, 85 insertions(+), 55 deletions(-) diff --git a/aiida/orm/nodes/data/cif.py b/aiida/orm/nodes/data/cif.py index 8a419613f4..0f9a2e365b 100644 --- a/aiida/orm/nodes/data/cif.py +++ b/aiida/orm/nodes/data/cif.py @@ -262,17 +262,7 @@ class CifData(SinglefileData): _values = None _ase = None - def __init__( - self, - ase=None, - file=None, - filename=None, - values=None, - source=None, - scan_type=None, - parse_policy=None, - **kwargs - ): + def __init__(self, ase=None, file=None, filename=None, values=None, scan_type=None, parse_policy=None, **kwargs): """Construct a new instance and set the contents to that of the file. :param file: an absolute filepath or filelike object for CIF. @@ -280,7 +270,6 @@ def __init__( :param filename: specify filename to use (defaults to name of provided file). :param ase: ASE Atoms object to construct the CifData instance from. :param values: PyCifRW CifFile object to construct the CifData instance from. - :param source: :param scan_type: scan type string for parsing with PyCIFRW ('standard' or 'flex'). See CifFile.ReadCif :param parse_policy: 'eager' (parse CIF file on set_file) or 'lazy' (defer parsing until needed) """ @@ -301,9 +290,6 @@ def __init__( self.set_scan_type(scan_type or CifData._SCAN_TYPE_DEFAULT) self.set_parse_policy(parse_policy or CifData._PARSE_POLICY_DEFAULT) - if source is not None: - self.set_source(source) - if ase is not None: self.set_ase(ase) diff --git a/aiida/orm/nodes/data/data.py b/aiida/orm/nodes/data/data.py index 8b32aaef46..0c80acc188 100644 --- a/aiida/orm/nodes/data/data.py +++ b/aiida/orm/nodes/data/data.py @@ -42,6 +42,12 @@ class Data(Node): _storable = True _unstorable_message = 'storing for this node has been disabled' + def __init__(self, *args, source=None, **kwargs): + """Construct a new instance, setting the ``source`` attribute if provided as a keyword argument.""" + super().__init__(*args, **kwargs) + if source is not None: + self.source = source + def __copy__(self): """Copying a Data node is not supported, use copy.deepcopy or call Data.clone().""" raise exceptions.InvalidOperation('copying a Data node is not supported, use copy.deepcopy') diff --git a/aiida/orm/nodes/data/upf.py b/aiida/orm/nodes/data/upf.py index 70849d1f61..4ef8a2c4a4 100644 --- a/aiida/orm/nodes/data/upf.py +++ b/aiida/orm/nodes/data/upf.py @@ -253,19 +253,6 @@ def parse_upf(fname, check_filename=True): class UpfData(SinglefileData): """`Data` sub class to represent a pseudopotential single file in UPF format.""" - def __init__(self, file=None, filename=None, source=None, **kwargs): - """Create UpfData instance from pseudopotential file. - - :param file: filepath or filelike object of the UPF potential file to store. - Hint: Pass io.BytesIO(b"my string") to construct directly from a string. - :param filename: specify filename to use (defaults to name of provided file). - :param source: Dictionary with information on source of the potential (see ".source" property). - """ - # pylint: disable=redefined-builtin - super().__init__(file, filename=filename, **kwargs) - if source is not None: - self.set_source(source) - @classmethod def get_or_create(cls, filepath, use_first=False, store_upf=True): """Get the `UpfData` with the same md5 of the given file, or create it if it does not yet exist. diff --git a/aiida/plugins/__init__.py b/aiida/plugins/__init__.py index 66bd8fb14d..5c3e731676 100644 --- a/aiida/plugins/__init__.py +++ b/aiida/plugins/__init__.py @@ -31,6 +31,7 @@ 'SchedulerFactory', 'TransportFactory', 'WorkflowFactory', + 'get_entry_points', 'load_entry_point', 'load_entry_point_from_string', 'parse_entry_point', diff --git a/aiida/plugins/entry_point.py b/aiida/plugins/entry_point.py index 2e6629949c..97d8067010 100644 --- a/aiida/plugins/entry_point.py +++ b/aiida/plugins/entry_point.py @@ -21,7 +21,7 @@ from aiida.common.exceptions import LoadingEntryPointError, MissingEntryPointError, MultipleEntryPointError -__all__ = ('load_entry_point', 'load_entry_point_from_string', 'parse_entry_point') +__all__ = ('load_entry_point', 'load_entry_point_from_string', 'parse_entry_point', 'get_entry_points') ENTRY_POINT_GROUP_PREFIX = 'aiida.' ENTRY_POINT_STRING_SEPARATOR = ':' diff --git a/docs/source/howto/data.rst b/docs/source/howto/data.rst index 1954ce1251..d11cbd62c7 100644 --- a/docs/source/howto/data.rst +++ b/docs/source/howto/data.rst @@ -79,6 +79,41 @@ If none of the currently available data types, as listed by ``verdi plugin list` For details refer to the next section :ref:`"How to add support for custom data types"`. +.. _how-to:data:import:provenance: + +Provenance +---------- + +While AiiDA will automatically keep the provenance of data that is created by it through calculations and workflows, this is clearly not the case when creating data nodes manually, as described in the previous section. +Typically, the manual creation of data happens at the beginning of a project when data from external databases is imported as a starting point for further calculations. +To still keep some form of provenance, the :class:`~aiida.orm.nodes.data.Data` base class allows to record the _source_ of the data it contains. +When constructing a new data node, of any type, you can pass a dictionary with information of the source under the ``source`` keyword argument: + +.. code-block:: python + + data = Data(source={'uri': 'http://some.domain.org/files?id=12345', 'id': '12345'}) + +Once stored, this data can always be retrieved through the ``source`` property: + +.. code-block:: python + + data.source # Will return the ``source`` dictionary that was passed in the constructor, if any + +The following list shows all the keys that are allowed to be set in the ``source`` dictionary: + + * 'db_name': The name of the external database. + * 'db_uri': The base URI of the external database. + * 'uri': The exact URI of where the data can be retrieved. Ideally this is a persistent URI. + * 'id': The external ID with which the data is identified in the external database. + * 'version': The version of the data, if any. + * 'extras': Optional dictionary with other fields for source description. + * 'source_md5': MD5 checksum of the data. + * 'description': Human-readable free form description of the data's source. + * 'license': A string with the type of license that applies to the data, if any. + +If any other keys are defined, an exception will be raised by the constructor. + + .. _how-to:data:organize: Organizing data diff --git a/docs/source/nitpick-exceptions b/docs/source/nitpick-exceptions index b70c05da1f..99af999ad5 100644 --- a/docs/source/nitpick-exceptions +++ b/docs/source/nitpick-exceptions @@ -181,6 +181,7 @@ py:class Session py:class Query py:class BackendQueryBuilder py:class importlib_metadata.EntryPoint +py:class importlib_metadata.EntryPoints py:class Command py:class BooleanClauseList diff --git a/tests/orm/data/test_data.py b/tests/orm/data/test_data.py index d522871a1c..77499e6e19 100644 --- a/tests/orm/data/test_data.py +++ b/tests/orm/data/test_data.py @@ -7,24 +7,23 @@ # For further information on the license, see the LICENSE.txt file # # For further information please visit http://www.aiida.net # ########################################################################### -"""Tests for the `Data` base class.""" - +# pylint: disable=redefined-outer-name +"""Tests for the :class:`aiida.orm.nodes.data.data:Data` class.""" import os import numpy import pytest -from aiida import orm -from aiida.backends.testbase import AiidaTestCase +from aiida import orm, plugins from tests.static import STATIC_DIR -class TestData(AiidaTestCase): - """Test for the `Data` base class.""" +@pytest.fixture +@pytest.mark.usefixtures('clear_database_before_test') +def generate_class_instance(): + """Generate a dummy `Data` instance for the given sub class.""" - @staticmethod - def generate_class_instance(data_class): - """Generate a dummy `Data` instance for the given sub class.""" + def _generate_class_instance(data_class): if data_class is orm.CifData: instance = data_class(file=os.path.join(STATIC_DIR, 'data', 'Si.cif')) return instance @@ -67,26 +66,41 @@ def generate_class_instance(data_class): 'for this data class, add a generator of a dummy instance here'.format(data_class) ) - # Tracked in issue #4281 - @pytest.mark.flaky(reruns=2) - def test_data_exporters(self): - """Verify that the return value of the export methods of all `Data` sub classes have the correct type. + return _generate_class_instance + + +@pytest.fixture(scope='function', params=plugins.get_entry_points('aiida.data')) +def data_plugin(request): + """Fixture that parametrizes over all the registered entry points of the ``aiida.data`` entry point group.""" + return request.param.load() + + +@pytest.mark.usefixtures('clear_database_before_test') +def test_constructor(): + """Test the constructor. + + Specifically, verify that the ``source`` attribute can be set through a keyword argument. + """ + source = {'id': 1} + node = orm.Data(source=source) + assert isinstance(node, orm.Data) + assert node.source == source - It should be a tuple where the first should be a byte string and the second a dictionary. - """ - from aiida.plugins.entry_point import get_entry_points - for entry_point in get_entry_points('aiida.data'): +@pytest.mark.usefixtures('clear_database_before_test') +def test_data_exporters(data_plugin, generate_class_instance): + """Verify that the return value of the export methods of all `Data` sub classes have the correct type. - data_class = entry_point.load() - export_formats = data_class.get_export_formats() + It should be a tuple where the first should be a byte string and the second a dictionary. + """ + export_formats = data_plugin.get_export_formats() - if not export_formats: - continue + if not export_formats: + return - instance = self.generate_class_instance(data_class) + instance = generate_class_instance(data_plugin) - for fileformat in export_formats: - content, dictionary = instance._exportcontent(fileformat) # pylint: disable=protected-access - self.assertIsInstance(content, bytes) - self.assertIsInstance(dictionary, dict) + for fileformat in export_formats: + content, dictionary = instance._exportcontent(fileformat) # pylint: disable=protected-access + assert isinstance(content, bytes) + assert isinstance(dictionary, dict)