From b8cdb1de293b41a72835564c75c61b56c150219a Mon Sep 17 00:00:00 2001 From: John La Date: Wed, 17 Jul 2024 11:47:50 -0500 Subject: [PATCH] Add Metadata Class (#2135) --- sdv/metadata/__init__.py | 2 + sdv/metadata/metadata.py | 73 +++ tests/integration/metadata/test_metadata.py | 218 ++++++++ tests/unit/metadata/test_metadata.py | 545 ++++++++++++++++++++ 4 files changed, 838 insertions(+) create mode 100644 sdv/metadata/metadata.py create mode 100644 tests/integration/metadata/test_metadata.py create mode 100644 tests/unit/metadata/test_metadata.py diff --git a/sdv/metadata/__init__.py b/sdv/metadata/__init__.py index 71d689727..5d0ca7e1e 100644 --- a/sdv/metadata/__init__.py +++ b/sdv/metadata/__init__.py @@ -4,9 +4,11 @@ from sdv.metadata.errors import InvalidMetadataError, MetadataNotFittedError from sdv.metadata.multi_table import MultiTableMetadata from sdv.metadata.single_table import SingleTableMetadata +from sdv.metadata.metadata import Metadata __all__ = ( 'InvalidMetadataError', + 'Metadata', 'MetadataNotFittedError', 'MultiTableMetadata', 'SingleTableMetadata', diff --git a/sdv/metadata/metadata.py b/sdv/metadata/metadata.py new file mode 100644 index 000000000..1b628c9b9 --- /dev/null +++ b/sdv/metadata/metadata.py @@ -0,0 +1,73 @@ +"""Metadata.""" + +from pathlib import Path + +from sdv.metadata.multi_table import MultiTableMetadata +from sdv.metadata.single_table import SingleTableMetadata +from sdv.metadata.utils import read_json + + +class Metadata(MultiTableMetadata): + """Metadata class that handles all metadata.""" + + METADATA_SPEC_VERSION = 'V1' + + @classmethod + def load_from_json(cls, filepath): + """Create a ``Metadata`` instance from a ``json`` file. + + Args: + filepath (str): + String that represents the ``path`` to the ``json`` file. + + Raises: + - An ``Error`` if the path does not exist. + - An ``Error`` if the ``json`` file does not contain the ``METADATA_SPEC_VERSION``. + + Returns: + A ``Metadata`` instance. + """ + filename = Path(filepath).stem + metadata = read_json(filepath) + return cls.load_from_dict(metadata, filename) + + @classmethod + def load_from_dict(cls, metadata_dict, single_table_name=None): + """Create a ``Metadata`` instance from a python ``dict``. + + Args: + metadata_dict (dict): + Python dictionary representing a ``MultiTableMetadata`` + or ``SingleTableMetadata`` object. + single_table_name (string): + If the python dictionary represents a ``SingleTableMetadata`` then + this arg is used for the name of the table. + + Returns: + Instance of ``Metadata``. + """ + instance = cls() + instance._set_metadata_dict(metadata_dict, single_table_name) + return instance + + def _set_metadata_dict(self, metadata, single_table_name=None): + """Set a ``metadata`` dictionary to the current instance. + + Checks to see if the metadata is in the ``SingleTableMetadata`` or + ``MultiTableMetadata`` format and converts it to a standard + ``MultiTableMetadata`` format if necessary. + + Args: + metadata (dict): + Python dictionary representing a ``MultiTableMetadata`` or + ``SingleTableMetadata`` object. + """ + is_multi_table = 'tables' in metadata + + if is_multi_table: + super()._set_metadata_dict(metadata) + else: + if single_table_name is None: + single_table_name = 'default_table_name' + + self.tables[single_table_name] = SingleTableMetadata.load_from_dict(metadata) diff --git a/tests/integration/metadata/test_metadata.py b/tests/integration/metadata/test_metadata.py new file mode 100644 index 000000000..6adde98ed --- /dev/null +++ b/tests/integration/metadata/test_metadata.py @@ -0,0 +1,218 @@ +from sdv.datasets.demo import download_demo +from sdv.metadata.metadata import Metadata + + +def test_metadata(): + """Test ``MultiTableMetadata``.""" + # Create an instance + instance = Metadata() + + # To dict + result = instance.to_dict() + + # Assert + assert result == {'tables': {}, 'relationships': [], 'METADATA_SPEC_VERSION': 'V1'} + assert instance.tables == {} + assert instance.relationships == [] + + +def test_detect_from_dataframes_multi_table(): + """Test the ``detect_from_dataframes`` method works with multi-table.""" + # Setup + real_data, _ = download_demo(modality='multi_table', dataset_name='fake_hotels') + + metadata = Metadata() + + # Run + metadata.detect_from_dataframes(real_data) + + # Assert + metadata.update_column( + table_name='hotels', + column_name='classification', + sdtype='categorical', + ) + + expected_metadata = { + 'tables': { + 'hotels': { + 'columns': { + 'hotel_id': {'sdtype': 'id'}, + 'city': {'sdtype': 'city', 'pii': True}, + 'state': {'sdtype': 'administrative_unit', 'pii': True}, + 'rating': {'sdtype': 'numerical'}, + 'classification': {'sdtype': 'categorical'}, + }, + 'primary_key': 'hotel_id', + }, + 'guests': { + 'columns': { + 'guest_email': {'sdtype': 'email', 'pii': True}, + 'hotel_id': {'sdtype': 'id'}, + 'has_rewards': {'sdtype': 'categorical'}, + 'room_type': {'sdtype': 'categorical'}, + 'amenities_fee': {'sdtype': 'numerical'}, + 'checkin_date': {'sdtype': 'datetime', 'datetime_format': '%d %b %Y'}, + 'checkout_date': {'sdtype': 'datetime', 'datetime_format': '%d %b %Y'}, + 'room_rate': {'sdtype': 'numerical'}, + 'billing_address': {'sdtype': 'unknown', 'pii': True}, + 'credit_card_number': {'sdtype': 'credit_card_number', 'pii': True}, + }, + 'primary_key': 'guest_email', + }, + }, + 'relationships': [ + { + 'parent_table_name': 'hotels', + 'child_table_name': 'guests', + 'parent_primary_key': 'hotel_id', + 'child_foreign_key': 'hotel_id', + } + ], + 'METADATA_SPEC_VERSION': 'V1', + } + assert metadata.to_dict() == expected_metadata + + +def test_detect_from_data_frames_single_table(): + """Test the ``detect_from_dataframes`` method works with a single table.""" + # Setup + data, _ = download_demo(modality='multi_table', dataset_name='fake_hotels') + + metadata = Metadata() + metadata.detect_from_dataframes({'table_1': data['hotels']}) + + # Run + metadata.validate() + + # Assert + expected_metadata = { + 'METADATA_SPEC_VERSION': 'V1', + 'tables': { + 'table_1': { + 'columns': { + 'hotel_id': {'sdtype': 'id'}, + 'city': {'sdtype': 'city', 'pii': True}, + 'state': {'sdtype': 'administrative_unit', 'pii': True}, + 'rating': {'sdtype': 'numerical'}, + 'classification': {'sdtype': 'unknown', 'pii': True}, + }, + 'primary_key': 'hotel_id', + } + }, + 'relationships': [], + } + assert metadata.to_dict() == expected_metadata + + +def test_detect_from_csvs(tmp_path): + """Test the ``detect_from_csvs`` method.""" + # Setup + real_data, _ = download_demo(modality='multi_table', dataset_name='fake_hotels') + + metadata = Metadata() + + for table_name, dataframe in real_data.items(): + csv_path = tmp_path / f'{table_name}.csv' + dataframe.to_csv(csv_path, index=False) + + # Run + metadata.detect_from_csvs(folder_name=tmp_path) + + # Assert + metadata.update_column( + table_name='hotels', + column_name='classification', + sdtype='categorical', + ) + + expected_metadata = { + 'tables': { + 'hotels': { + 'columns': { + 'hotel_id': {'sdtype': 'id'}, + 'city': {'sdtype': 'city', 'pii': True}, + 'state': {'sdtype': 'administrative_unit', 'pii': True}, + 'rating': {'sdtype': 'numerical'}, + 'classification': {'sdtype': 'categorical'}, + }, + 'primary_key': 'hotel_id', + }, + 'guests': { + 'columns': { + 'guest_email': {'sdtype': 'email', 'pii': True}, + 'hotel_id': {'sdtype': 'id'}, + 'has_rewards': {'sdtype': 'categorical'}, + 'room_type': {'sdtype': 'categorical'}, + 'amenities_fee': {'sdtype': 'numerical'}, + 'checkin_date': {'sdtype': 'datetime', 'datetime_format': '%d %b %Y'}, + 'checkout_date': {'sdtype': 'datetime', 'datetime_format': '%d %b %Y'}, + 'room_rate': {'sdtype': 'numerical'}, + 'billing_address': {'sdtype': 'unknown', 'pii': True}, + 'credit_card_number': {'sdtype': 'credit_card_number', 'pii': True}, + }, + 'primary_key': 'guest_email', + }, + }, + 'relationships': [ + { + 'parent_table_name': 'hotels', + 'child_table_name': 'guests', + 'parent_primary_key': 'hotel_id', + 'child_foreign_key': 'hotel_id', + } + ], + 'METADATA_SPEC_VERSION': 'V1', + } + + assert metadata.to_dict() == expected_metadata + + +def test_detect_table_from_csv(tmp_path): + """Test the ``detect_table_from_csv`` method.""" + # Setup + real_data, _ = download_demo(modality='multi_table', dataset_name='fake_hotels') + + metadata = Metadata() + + for table_name, dataframe in real_data.items(): + csv_path = tmp_path / f'{table_name}.csv' + dataframe.to_csv(csv_path, index=False) + + # Run + metadata.detect_table_from_csv('hotels', tmp_path / 'hotels.csv') + + # Assert + metadata.update_column( + table_name='hotels', + column_name='city', + sdtype='categorical', + ) + metadata.update_column( + table_name='hotels', + column_name='state', + sdtype='categorical', + ) + metadata.update_column( + table_name='hotels', + column_name='classification', + sdtype='categorical', + ) + expected_metadata = { + 'tables': { + 'hotels': { + 'columns': { + 'hotel_id': {'sdtype': 'id'}, + 'city': {'sdtype': 'categorical'}, + 'state': {'sdtype': 'categorical'}, + 'rating': {'sdtype': 'numerical'}, + 'classification': {'sdtype': 'categorical'}, + }, + 'primary_key': 'hotel_id', + } + }, + 'relationships': [], + 'METADATA_SPEC_VERSION': 'V1', + } + + assert metadata.to_dict() == expected_metadata diff --git a/tests/unit/metadata/test_metadata.py b/tests/unit/metadata/test_metadata.py new file mode 100644 index 000000000..284bad8d6 --- /dev/null +++ b/tests/unit/metadata/test_metadata.py @@ -0,0 +1,545 @@ +from unittest.mock import patch + +import pytest + +from sdv.metadata.metadata import Metadata +from tests.utils import get_multi_table_data, get_multi_table_metadata + + +class TestMetadataClass: + """Test ``Metadata`` class.""" + + def get_multi_table_metadata(self): + """Set the tables and relationships for metadata.""" + metadata = {} + metadata['tables'] = { + 'users': { + 'columns': {'id': {'sdtype': 'id'}, 'country': {'sdtype': 'categorical'}}, + 'primary_key': 'id', + }, + 'payments': { + 'columns': { + 'payment_id': {'sdtype': 'id'}, + 'user_id': {'sdtype': 'id'}, + 'date': {'sdtype': 'datetime'}, + }, + 'primary_key': 'payment_id', + }, + 'sessions': { + 'columns': { + 'session_id': {'sdtype': 'id'}, + 'user_id': {'sdtype': 'id'}, + 'device': {'sdtype': 'categorical'}, + }, + 'primary_key': 'session_id', + }, + 'transactions': { + 'columns': { + 'transaction_id': {'sdtype': 'id'}, + 'session_id': {'sdtype': 'id'}, + 'timestamp': {'sdtype': 'datetime'}, + }, + 'primary_key': 'transaction_id', + }, + } + + metadata['relationships'] = [ + { + 'parent_table_name': 'users', + 'parent_primary_key': 'id', + 'child_table_name': 'sessions', + 'child_foreign_key': 'user_id', + }, + { + 'parent_table_name': 'sessions', + 'parent_primary_key': 'session_id', + 'child_table_name': 'transactions', + 'child_foreign_key': 'session_id', + }, + { + 'parent_table_name': 'users', + 'parent_primary_key': 'id', + 'child_table_name': 'payments', + 'child_foreign_key': 'user_id', + }, + ] + + return Metadata.load_from_dict(metadata) + + @patch('sdv.metadata.utils.Path') + def test_load_from_json_path_does_not_exist(self, mock_path): + """Test the ``load_from_json`` method. + + Test that the method raises a ``ValueError`` when the specified path does not + exist. + + Mock: + - Mock the ``Path`` library in order to return ``False``, that the file does not exist. + + Input: + - String representing a filepath. + + Side Effects: + - A ``ValueError`` is raised pointing that the ``file`` does not exist. + """ + # Setup + mock_path.return_value.exists.return_value = False + mock_path.return_value.name = 'filepath.json' + + # Run / Assert + error_msg = ( + "A file named 'filepath.json' does not exist. Please specify a different filename." + ) + with pytest.raises(ValueError, match=error_msg): + Metadata.load_from_json('filepath.json') + + @patch('sdv.metadata.utils.Path') + @patch('sdv.metadata.utils.json') + def test_load_from_json_single_table(self, mock_json, mock_path): + """Test the ``load_from_json`` method. + + Test that ``load_from_json`` function creates an instance with the contents returned by the + ``json`` load function when passing in a single table metadata json. + + Mock: + - Mock the ``Path`` library in order to return ``True``. + - Mock the ``json`` library in order to use a custom return. + + Input: + - String representing a filepath. + + Output: + - ``SingleTableMetadata`` instance with the custom configuration from the ``json`` + file (``json.load`` return value) + """ + # Setup + instance = Metadata() + mock_path.return_value.exists.return_value = True + mock_path.return_value.name = 'filepath.json' + mock_json.load.return_value = { + 'columns': {'animals': {'type': 'categorical'}}, + 'primary_key': 'animals', + 'METADATA_SPEC_VERSION': 'SINGLE_TABLE_V1', + } + + # Run + instance = Metadata.load_from_json('filepath.json') + + # Assert + assert list(instance.tables.keys()) == ['filepath'] + assert instance.tables['filepath'].columns == {'animals': {'type': 'categorical'}} + assert instance.tables['filepath'].primary_key == 'animals' + assert instance.tables['filepath'].sequence_key is None + assert instance.tables['filepath'].alternate_keys == [] + assert instance.tables['filepath'].sequence_index is None + assert instance.tables['filepath']._version == 'SINGLE_TABLE_V1' + + @patch('sdv.metadata.utils.Path') + @patch('sdv.metadata.utils.json') + def test_load_from_json_multi_table(self, mock_json, mock_path): + """Test the ``load_from_json`` method. + + Test that ``load_from_json`` function creates an instance with the contents returned by the + ``json`` load function when passing in a multi-table metadata json. + + Mock: + - Mock the ``Path`` library in order to return ``True``. + - Mock the ``json`` library in order to use a custom return. + + Input: + - String representing a filepath. + + Output: + - ``SingleTableMetadata`` instance with the custom configuration from the ``json`` + file (``json.load`` return value) + """ + # Setup + instance = Metadata() + mock_path.return_value.exists.return_value = True + mock_path.return_value.name = 'filepath.json' + mock_json.load.return_value = { + 'tables': { + 'table1': { + 'columns': {'animals': {'type': 'categorical'}}, + 'primary_key': 'animals', + 'METADATA_SPEC_VERSION': 'SINGLE_TABLE_V1', + } + }, + 'relationships': {}, + } + + # Run + instance = Metadata.load_from_json('filepath.json') + + # Asserts + assert list(instance.tables.keys()) == ['table1'] + assert instance.tables['table1'].columns == {'animals': {'type': 'categorical'}} + assert instance.tables['table1'].primary_key == 'animals' + assert instance.tables['table1'].sequence_key is None + assert instance.tables['table1'].alternate_keys == [] + assert instance.tables['table1'].sequence_index is None + assert instance.tables['table1']._version == 'SINGLE_TABLE_V1' + + @patch('sdv.metadata.multi_table.SingleTableMetadata') + def test_load_from_dict_multi_table(self, mock_singletablemetadata): + """Test that ``load_from_dict`` returns a instance of multi-table ``Metadata``. + + Test that when calling the ``load_from_dict`` method a new instance with the passed + python ``dict`` details should be created. + + Setup: + - A dict representing a multi-table ``Metadata``. + + Mock: + - Mock ``SingleTableMetadata`` from ``sdv.metadata.multi_table`` + + Output: + - ``instance`` that contains ``instance.tables`` and ``instance.relationships``. + + Side Effects: + - ``SingleTableMetadata.load_from_dict`` has been called. + """ + # Setup + multitable_metadata = { + 'tables': { + 'accounts': { + 'id': {'sdtype': 'numerical'}, + 'branch_id': {'sdtype': 'numerical'}, + 'amount': {'sdtype': 'numerical'}, + 'start_date': {'sdtype': 'datetime'}, + 'owner': {'sdtype': 'id'}, + }, + 'branches': { + 'id': {'sdtype': 'numerical'}, + 'name': {'sdtype': 'id'}, + }, + }, + 'relationships': [ + { + 'parent_table_name': 'accounts', + 'parent_primary_key': 'id', + 'child_table_name': 'branches', + 'child_foreign_key': 'branch_id', + } + ], + } + + single_table_accounts = object() + single_table_branches = object() + mock_singletablemetadata.load_from_dict.side_effect = [ + single_table_accounts, + single_table_branches, + ] + + # Run + instance = Metadata.load_from_dict(multitable_metadata) + + # Assert + assert instance.tables == { + 'accounts': single_table_accounts, + 'branches': single_table_branches, + } + + assert instance.relationships == [ + { + 'parent_table_name': 'accounts', + 'parent_primary_key': 'id', + 'child_table_name': 'branches', + 'child_foreign_key': 'branch_id', + } + ] + + @patch('sdv.metadata.multi_table.SingleTableMetadata') + def test_load_from_dict_integer_multi_table(self, mock_singletablemetadata): + """Test that ``load_from_dict`` returns a instance of multi-table ``Metadata``. + + Test that when calling the ``load_from_dict`` method a new instance with the passed + python ``dict`` details should be created. Make sure that integers passed in are + turned into strings to ensure metadata is properly typed. + + Setup: + - A dict representing a multi-table ``Metadata``. + + Mock: + - Mock ``SingleTableMetadata`` from ``sdv.metadata.multi_table`` + + Output: + - ``instance`` that contains ``instance.tables`` and ``instance.relationships``. + + Side Effects: + - ``SingleTableMetadata.load_from_dict`` has been called. + """ + # Setup + multitable_metadata = { + 'tables': { + 'accounts': { + 1: {'sdtype': 'numerical'}, + 2: {'sdtype': 'numerical'}, + 'amount': {'sdtype': 'numerical'}, + 'start_date': {'sdtype': 'datetime'}, + 'owner': {'sdtype': 'id'}, + }, + 'branches': { + 1: {'sdtype': 'numerical'}, + 'name': {'sdtype': 'id'}, + }, + }, + 'relationships': [ + { + 'parent_table_name': 'accounts', + 'parent_primary_key': 1, + 'child_table_name': 'branches', + 'child_foreign_key': 1, + } + ], + } + + single_table_accounts = { + '1': {'sdtype': 'numerical'}, + '2': {'sdtype': 'numerical'}, + 'amount': {'sdtype': 'numerical'}, + 'start_date': {'sdtype': 'datetime'}, + 'owner': {'sdtype': 'id'}, + } + single_table_branches = { + '1': {'sdtype': 'numerical'}, + 'name': {'sdtype': 'id'}, + } + mock_singletablemetadata.load_from_dict.side_effect = [ + single_table_accounts, + single_table_branches, + ] + + # Run + instance = Metadata.load_from_dict(multitable_metadata) + + # Assert + assert instance.tables == { + 'accounts': single_table_accounts, + 'branches': single_table_branches, + } + + assert instance.relationships == [ + { + 'parent_table_name': 'accounts', + 'parent_primary_key': '1', + 'child_table_name': 'branches', + 'child_foreign_key': '1', + } + ] + + def test_load_from_dict_single_table(self): + """Test that ``load_from_dict`` returns a instance of single-table ``Metadata``. + + Test that when calling the ``load_from_dict`` method a new instance with the passed + python ``dict`` details should be created. + """ + # Setup + my_metadata = { + 'columns': {'my_column': 'value'}, + 'primary_key': 'pk', + 'alternate_keys': [], + 'sequence_key': None, + 'sequence_index': None, + 'METADATA_SPEC_VERSION': 'SINGLE_TABLE_V1', + } + + # Run + instance = Metadata.load_from_dict(my_metadata) + + # Assert + assert list(instance.tables.keys()) == ['default_table_name'] + assert instance.tables['default_table_name'].columns == {'my_column': 'value'} + assert instance.tables['default_table_name'].primary_key == 'pk' + assert instance.tables['default_table_name'].sequence_key is None + assert instance.tables['default_table_name'].alternate_keys == [] + assert instance.tables['default_table_name'].sequence_index is None + assert instance.tables['default_table_name']._version == 'SINGLE_TABLE_V1' + + def test_load_from_dict_integer_single_table(self): + """Test that ``load_from_dict`` returns a instance of single-table ``Metadata``. + + Test that when calling the ``load_from_dict`` method a new instance with the passed + python ``dict`` details should be created. Make sure that integers passed in are + turned into strings to ensure metadata is properly typed. + """ + + # Setup + my_metadata = { + 'columns': {1: 'value'}, + 'primary_key': 'pk', + 'alternate_keys': [], + 'sequence_key': None, + 'sequence_index': None, + 'METADATA_SPEC_VERSION': 'SINGLE_TABLE_V1', + } + + # Run + instance = Metadata.load_from_dict(my_metadata) + + # Assert + assert list(instance.tables.keys()) == ['default_table_name'] + assert instance.tables['default_table_name'].columns == {'1': 'value'} + assert instance.tables['default_table_name'].primary_key == 'pk' + assert instance.tables['default_table_name'].sequence_key is None + assert instance.tables['default_table_name'].alternate_keys == [] + assert instance.tables['default_table_name'].sequence_index is None + + @patch('sdv.metadata.multi_table.SingleTableMetadata') + def test__set_metadata_multi_table(self, mock_singletablemetadata): + """Test the ``_set_metadata`` method for ``Metadata``. + + Setup: + - instance of ``Metadata``. + - A dict representing a ``MultiTableMetadata``. + + Mock: + - Mock ``SingleTableMetadata`` from ``sdv.metadata.multi_table`` + + Side Effects: + - ``instance`` now contains ``instance.tables`` and ``instance.relationships``. + - ``SingleTableMetadata.load_from_dict`` has been called. + """ + # Setup + multitable_metadata = { + 'tables': { + 'accounts': { + 'id': {'sdtype': 'numerical'}, + 'branch_id': {'sdtype': 'numerical'}, + 'amount': {'sdtype': 'numerical'}, + 'start_date': {'sdtype': 'datetime'}, + 'owner': {'sdtype': 'id'}, + }, + 'branches': { + 'id': {'sdtype': 'numerical'}, + 'name': {'sdtype': 'id'}, + }, + }, + 'relationships': [ + { + 'parent_table_name': 'accounts', + 'parent_primary_key': 'id', + 'child_table_name': 'branches', + 'chil_foreign_key': 'branch_id', + } + ], + } + + single_table_accounts = object() + single_table_branches = object() + mock_singletablemetadata.load_from_dict.side_effect = [ + single_table_accounts, + single_table_branches, + ] + + instance = Metadata() + + # Run + instance._set_metadata_dict(multitable_metadata) + + # Assert + assert instance.tables == { + 'accounts': single_table_accounts, + 'branches': single_table_branches, + } + + assert instance.relationships == [ + { + 'parent_table_name': 'accounts', + 'parent_primary_key': 'id', + 'child_table_name': 'branches', + 'chil_foreign_key': 'branch_id', + } + ] + + def test__set_metadata_single_table(self): + """Test the ``_set_metadata`` method for ``Metadata``. + + Setup: + - instance of ``Metadata``. + - A dict representing a ``SingleTableMetadata``. + + Mock: + - Mock ``SingleTableMetadata`` from ``sdv.metadata.multi_table`` + + Side Effects: + - ``SingleTableMetadata.load_from_dict`` has been called. + """ + # Setup + multitable_metadata = { + 'columns': {'my_column': 'value'}, + 'primary_key': 'pk', + 'alternate_keys': [], + 'sequence_key': None, + 'sequence_index': None, + 'METADATA_SPEC_VERSION': 'SINGLE_TABLE_V1', + } + + instance = Metadata() + + # Run + instance._set_metadata_dict(multitable_metadata) + + # Assert + assert instance.tables['default_table_name'].columns == {'my_column': 'value'} + assert instance.tables['default_table_name'].primary_key == 'pk' + assert instance.tables['default_table_name'].alternate_keys == [] + assert instance.tables['default_table_name'].sequence_key is None + assert instance.tables['default_table_name'].sequence_index is None + assert instance.tables['default_table_name'].METADATA_SPEC_VERSION == 'SINGLE_TABLE_V1' + + def test_validate(self): + """Test the method ``validate``. + + Test that when a valid ``Metadata`` has been provided no errors are being raised. + + Setup: + - Instance of ``Metadata`` with all valid tables and relationships. + """ + # Setup + instance = self.get_multi_table_metadata() + + # Run + instance.validate() + + def test_validate_no_relationships(self): + """Test the method ``validate`` without relationships. + + Test that when a valid ``Metadata`` has been provided no errors are being raised. + + Setup: + - Instance of ``Metadata`` with all valid tables and no relationships. + """ + # Setup + metadata = self.get_multi_table_metadata() + metadata_no_relationships = metadata.to_dict() + del metadata_no_relationships['relationships'] + test_metadata = Metadata.load_from_dict(metadata_no_relationships) + + # Run + test_metadata.validate() + assert test_metadata.METADATA_SPEC_VERSION == 'V1' + + def test_validate_data(self): + """Test that no error is being raised when the data is valid.""" + # Setup + metadata_dict = get_multi_table_metadata().to_dict() + metadata = Metadata.load_from_dict(metadata_dict) + data = get_multi_table_data() + + # Run and Assert + metadata.validate_data(data) + assert metadata.METADATA_SPEC_VERSION == 'V1' + + def test_validate_data_no_relationships(self): + """Test that no error is being raised when the data is valid but has no relationships.""" + # Setup + metadata_dict = get_multi_table_metadata().to_dict() + del metadata_dict['relationships'] + del metadata_dict['METADATA_SPEC_VERSION'] + metadata = Metadata.load_from_dict(metadata_dict) + data = get_multi_table_data() + + # Run and Assert + metadata.validate_data(data) + assert metadata.METADATA_SPEC_VERSION == 'V1'