Skip to content

Commit

Permalink
Add metadata.validate_table method for single table usage (#2225)
Browse files Browse the repository at this point in the history
  • Loading branch information
R-Palazzo committed Sep 26, 2024
1 parent e2bb1d0 commit b647d13
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 0 deletions.
22 changes: 22 additions & 0 deletions sdv/metadata/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,25 @@ def set_sequence_key(self, table_name, column_name):
"""
self._validate_table_exists(table_name)
self.tables[table_name].set_sequence_key(column_name)

def validate_table(self, data, table_name=None):
"""Validate a table against the metadata.
Args:
data (pandas.DataFrame):
Data to validate.
table_name (str):
Name of the table to validate.
"""
if table_name is None:
with warnings.catch_warnings():
warnings.simplefilter('ignore')
table_name = self._get_single_table_name()

if not table_name:
raise InvalidMetadataError(
'Metadata contains more than one table, please specify the `table_name` '
'to validate.'
)

return self.validate_data({table_name: data})
27 changes: 27 additions & 0 deletions tests/unit/metadata/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import pandas as pd
import pytest

from sdv.errors import InvalidDataError
from sdv.metadata.errors import InvalidMetadataError
from sdv.metadata.metadata import Metadata
from tests.utils import DataFrameMatcher, get_multi_table_data, get_multi_table_metadata

Expand Down Expand Up @@ -551,6 +553,31 @@ def test_validate_data_no_relationships(self):
metadata.validate_data(data)
assert metadata.METADATA_SPEC_VERSION == 'V1'

def test_validate_table(self):
"""Test the ``validate_table``method."""
# Setup
metadata_multi_table = get_multi_table_metadata()
metadata_single_table = Metadata.load_from_dict(
metadata_multi_table.to_dict()['tables']['nesreca'], 'nesreca'
)
table = get_multi_table_data()['nesreca']

expected_error_wrong_name = re.escape(
'The provided data does not match the metadata:\n'
"The provided data is missing the tables {'nesreca'}."
)
expected_error_mutli_table = re.escape(
'Metadata contains more than one table, please specify the `table_name` to validate.'
)

# Run and Assert
metadata_single_table.validate_table(table)
metadata_single_table.validate_table(table, 'nesreca')
with pytest.raises(InvalidDataError, match=expected_error_wrong_name):
metadata_single_table.validate_table(table, 'wrong_name')
with pytest.raises(InvalidMetadataError, match=expected_error_mutli_table):
metadata_multi_table.validate_table(table)

@patch('sdv.metadata.metadata.Metadata')
def test_detect_from_dataframes(self, mock_metadata):
"""Test ``detect_from_dataframes``.
Expand Down

0 comments on commit b647d13

Please sign in to comment.