From b647d13c4631401a0a61b7d78537c9a4473abf5d Mon Sep 17 00:00:00 2001 From: R-Palazzo <116157184+R-Palazzo@users.noreply.github.com> Date: Mon, 16 Sep 2024 17:13:29 +0200 Subject: [PATCH] Add `metadata.validate_table` method for single table usage (#2225) --- sdv/metadata/metadata.py | 22 ++++++++++++++++++++++ tests/unit/metadata/test_metadata.py | 27 +++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/sdv/metadata/metadata.py b/sdv/metadata/metadata.py index d32284626..053ef5896 100644 --- a/sdv/metadata/metadata.py +++ b/sdv/metadata/metadata.py @@ -184,3 +184,25 @@ def set_sequence_key(self, table_name, column_name): """ self._validate_table_exists(table_name) self.tables[table_name].set_sequence_key(column_name) + + def validate_table(self, data, table_name=None): + """Validate a table against the metadata. + + Args: + data (pandas.DataFrame): + Data to validate. + table_name (str): + Name of the table to validate. + """ + if table_name is None: + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + table_name = self._get_single_table_name() + + if not table_name: + raise InvalidMetadataError( + 'Metadata contains more than one table, please specify the `table_name` ' + 'to validate.' + ) + + return self.validate_data({table_name: data}) diff --git a/tests/unit/metadata/test_metadata.py b/tests/unit/metadata/test_metadata.py index a69ada211..1524c0757 100644 --- a/tests/unit/metadata/test_metadata.py +++ b/tests/unit/metadata/test_metadata.py @@ -4,6 +4,8 @@ import pandas as pd import pytest +from sdv.errors import InvalidDataError +from sdv.metadata.errors import InvalidMetadataError from sdv.metadata.metadata import Metadata from tests.utils import DataFrameMatcher, get_multi_table_data, get_multi_table_metadata @@ -551,6 +553,31 @@ def test_validate_data_no_relationships(self): metadata.validate_data(data) assert metadata.METADATA_SPEC_VERSION == 'V1' + def test_validate_table(self): + """Test the ``validate_table``method.""" + # Setup + metadata_multi_table = get_multi_table_metadata() + metadata_single_table = Metadata.load_from_dict( + metadata_multi_table.to_dict()['tables']['nesreca'], 'nesreca' + ) + table = get_multi_table_data()['nesreca'] + + expected_error_wrong_name = re.escape( + 'The provided data does not match the metadata:\n' + "The provided data is missing the tables {'nesreca'}." + ) + expected_error_mutli_table = re.escape( + 'Metadata contains more than one table, please specify the `table_name` to validate.' + ) + + # Run and Assert + metadata_single_table.validate_table(table) + metadata_single_table.validate_table(table, 'nesreca') + with pytest.raises(InvalidDataError, match=expected_error_wrong_name): + metadata_single_table.validate_table(table, 'wrong_name') + with pytest.raises(InvalidMetadataError, match=expected_error_mutli_table): + metadata_multi_table.validate_table(table) + @patch('sdv.metadata.metadata.Metadata') def test_detect_from_dataframes(self, mock_metadata): """Test ``detect_from_dataframes``.