Skip to content

Commit

Permalink
Add a warning if you're loading a SingleTableMetadata object (#2224)
Browse files Browse the repository at this point in the history
  • Loading branch information
R-Palazzo committed Sep 23, 2024
1 parent 749ef31 commit ceb1c6c
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 22 deletions.
8 changes: 8 additions & 0 deletions sdv/metadata/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@ def load_from_json(cls, filepath, single_table_name=None):
A ``Metadata`` instance.
"""
metadata = read_json(filepath)
if metadata.get('METADATA_SPEC_VERSION') == 'SINGLE_TABLE_V1':
single_table_name = single_table_name or cls.DEFAULT_SINGLE_TABLE_NAME
warnings.warn(
'You are loading an older SingleTableMetadata object. This will be converted into'
f" the new Metadata object with a placeholder table name ('{single_table_name}')."
' Please save this new object for future usage.'
)

return cls.load_from_dict(metadata, single_table_name)

@classmethod
Expand Down
35 changes: 35 additions & 0 deletions tests/integration/metadata/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,41 @@ def test_metadata():
assert instance.relationships == []


def test_load_from_json_single_table_metadata(tmp_path):
"""Test the ``load_from_json`` method with a single table metadata."""
# Setup
old_metadata = SingleTableMetadata.load_from_dict({
'columns': {
'column_1': {'sdtype': 'numerical'},
'column_2': {'sdtype': 'categorical'},
},
})
old_metadata.save_to_json(tmp_path / 'metadata.json')
expected_warning = re.escape(
'You are loading an older SingleTableMetadata object. This will be converted '
f"into the new Metadata object with a placeholder table name ('{DEFAULT_TABLE_NAME}')."
' Please save this new object for future usage.'
)

# Run
with pytest.warns(UserWarning, match=expected_warning):
metadata = Metadata.load_from_json(tmp_path / 'metadata.json')

# Assert
assert metadata.to_dict() == {
'tables': {
DEFAULT_TABLE_NAME: {
'columns': {
'column_1': {'sdtype': 'numerical'},
'column_2': {'sdtype': 'categorical'},
},
},
},
'relationships': [],
'METADATA_SPEC_VERSION': 'V1',
}


def test_detect_from_dataframes_multi_table():
"""Test the ``detect_from_dataframes`` method works with multi-table."""
# Setup
Expand Down
56 changes: 34 additions & 22 deletions tests/unit/metadata/test_metadata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from unittest.mock import Mock, patch
import re
from unittest.mock import Mock, call, patch

import pandas as pd
import pytest
Expand Down Expand Up @@ -94,17 +95,12 @@ def test_load_from_json_path_does_not_exist(self, mock_path):
with pytest.raises(ValueError, match=error_msg):
Metadata.load_from_json('filepath.json')

@patch('sdv.metadata.utils.Path')
@patch('sdv.metadata.utils.json')
def test_load_from_json_single_table(self, mock_json, mock_path):
"""Test the ``load_from_json`` method.
Test that ``load_from_json`` function creates an instance with the contents returned by the
``json`` load function when passing in a single table metadata json.
@patch('sdv.metadata.metadata.read_json')
def test_load_from_json_single_table(self, mock_read_json):
"""Test the ``load_from_json`` method for single table metadata.
Mock:
- Mock the ``Path`` library in order to return ``True``.
- Mock the ``json`` library in order to use a custom return.
- Mock the ``read_json`` function in order to return a custom json.
Input:
- String representing a filepath.
Expand All @@ -114,26 +110,42 @@ def test_load_from_json_single_table(self, mock_json, mock_path):
file (``json.load`` return value)
"""
# Setup
instance = Metadata()
mock_path.return_value.exists.return_value = True
mock_path.return_value.name = 'filepath.json'
mock_json.load.return_value = {
mock_read_json.return_value = {
'columns': {'animals': {'type': 'categorical'}},
'primary_key': 'animals',
'METADATA_SPEC_VERSION': 'SINGLE_TABLE_V1',
}
warning_message = (
'You are loading an older SingleTableMetadata object. This will be converted'
" into the new Metadata object with a placeholder table name ('{}')."
' Please save this new object for future usage.'
)

expected_warning_with_table_name = re.escape(warning_message.format('filepath'))
expected_warning_without_table_name = re.escape(
warning_message.format('default_table_name')
)

# Run
instance = Metadata.load_from_json('filepath.json', 'filepath')
with pytest.warns(UserWarning, match=expected_warning_with_table_name):
instance_with_table_name = Metadata.load_from_json('filepath.json', 'filepath')
with pytest.warns(UserWarning, match=expected_warning_without_table_name):
instance_without_table_name = Metadata.load_from_json('filepath.json')

# Assert
assert list(instance.tables.keys()) == ['filepath']
assert instance.tables['filepath'].columns == {'animals': {'type': 'categorical'}}
assert instance.tables['filepath'].primary_key == 'animals'
assert instance.tables['filepath'].sequence_key is None
assert instance.tables['filepath'].alternate_keys == []
assert instance.tables['filepath'].sequence_index is None
assert instance.tables['filepath']._version == 'SINGLE_TABLE_V1'
mock_read_json.assert_has_calls([call('filepath.json'), call('filepath.json')])
table_name_to_instance = {
'filepath': instance_with_table_name,
'default_table_name': instance_without_table_name,
}
for table_name, instance in table_name_to_instance.items():
assert list(instance.tables.keys()) == [table_name]
assert instance.tables[table_name].columns == {'animals': {'type': 'categorical'}}
assert instance.tables[table_name].primary_key == 'animals'
assert instance.tables[table_name].sequence_key is None
assert instance.tables[table_name].alternate_keys == []
assert instance.tables[table_name].sequence_index is None
assert instance.tables[table_name]._version == 'SINGLE_TABLE_V1'

@patch('sdv.metadata.utils.Path')
@patch('sdv.metadata.utils.json')
Expand Down

0 comments on commit ceb1c6c

Please sign in to comment.