Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patch on multi table combiner and test case #89

Merged
merged 2 commits into from
Dec 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions sdgx/data_models/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ class Metadata(BaseModel):
column_list(list[str]): list of the comlumn name in the table, other columns lists are used to store column information.
"""

# for primary key
# compatible with single primary key or composite primary key
primary_keys: List[str] = []
"""
primary_keys is used to store single primary key or composite primary key
"""

# variables related to columns
# column_list is used to store all columns' name
column_list: List[str] = []
""""
column_list is used to store all columns' name
"""

# other columns lists are used to store column information
# here are 5 basic data types
Expand All @@ -46,6 +48,9 @@ class Metadata(BaseModel):
# version info
metadata_version: str = "1.0"
_extend: Dict[str, Any] = {}
"""
For extend information, use ``get`` and ``set``
"""

def get(self, key: str, default=None) -> Any:
return getattr(self, key, getattr(self._extend, key, default))
Expand Down
34 changes: 16 additions & 18 deletions sdgx/data_models/multi_table_combiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class MultiTableCombiner(BaseModel):
metadata_version: str = "1.0"

metadata_dict: Dict[str, Any] = {}
relationships: List[Any] = []
relationships: List[Relationship] = []

def check(self):
"""Do necessary checks:
Expand All @@ -35,26 +35,24 @@ def check(self):
if metadata_cnt != relationship_cnt + 1:
raise MultiTableCombinerError("Number of tables should corresponds to relationships.")

# table name check
table_names_from_relationships = set()
table_names = set(self.metadata_dict.keys())
relationship_parents = set(r.parent_table for r in self.relationships)
relationship_children = set(r.child_table for r in self.relationships)

# each relationship's table must have metadata
table_names = list(self.metadata_dict.keys())
for each_r in self.relationships:
if each_r.parent_table not in table_names:
raise MultiTableCombinerError(
f"Metadata of parent table {each_r.parent_table} is missing."
)
if each_r.child_table not in table_names:
raise MultiTableCombinerError(
f"Metadata of child table {each_r.child_table} is missing."
)
table_names_from_relationships.add(each_r.parent_table)
table_names_from_relationships.add(each_r.child_table)
if not table_names.issuperset(relationship_parents):
raise MultiTableCombinerError(
f"Relationships' parent table {relationship_parents - table_names} is missing."
)
if not table_names.issuperset(relationship_children):
raise MultiTableCombinerError(
f"Relationships' child table {relationship_children - table_names} is missing."
)

# each table in metadata must in a relationship
for each_t in table_names:
if each_t not in table_names_from_relationships:
raise MultiTableCombinerError(f"Table {each_t} has not relationship.")
if not (relationship_parents + relationship_children).issuperset(table_names):
raise MultiTableCombinerError(
f"Table {table_names - (relationship_parents+relationship_children)} is missing in relationships."
)

logger.info("MultiTableCombiner check finished.")
4 changes: 2 additions & 2 deletions sdgx/data_models/relationship.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pydantic import BaseModel

from sdgx.exceptions import RelationshipError
from sdgx.exceptions import RelationshipInitError


class Relationship(BaseModel):
Expand All @@ -26,4 +26,4 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)

if self.parent_table == self.child_table:
raise RelationshipError("child table and parent table cannot be the same")
raise RelationshipInitError("child table and parent table cannot be the same")
2 changes: 1 addition & 1 deletion sdgx/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class MetadataInvalidError(DataModelError):
ERROR_CODE = 9002


class RelationshipError(DataModelError):
class RelationshipInitError(DataModelError):
ERROR_CODE = 9003


Expand Down
5 changes: 3 additions & 2 deletions tests/metadata/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@ def test_metadata(metadata: Metadata):
assert metadata.datetime_columns == metadata.get("datetime_columns")
assert metadata.bool_columns == metadata.get("bool_columns")
assert metadata.numeric_columns == metadata.get("numeric_columns")
assert metadata.set("a", 1) == metadata.get("a")
assert metadata.model_dump_json()


def test_metadata_save_load(metadata: Metadata):
test_path = Path("metadata_path_test.json")
def test_metadata_save_load(metadata: Metadata, tmp_path: Path):
test_path = tmp_path / "metadata_path_test.json"
metadata.save(test_path)
# load from path
new_meatadata = Metadata.load(test_path)
Expand Down