From 94c3c7007dc78f9533c6e117b2ca39c7ec5c79ae Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Wed, 27 Dec 2023 19:05:37 +0800 Subject: [PATCH 1/2] Patch on multi table combiner and test case --- sdgx/data_models/multi_table_combiner.py | 34 +++++++++++------------- sdgx/data_models/relationship.py | 4 +-- sdgx/exceptions.py | 2 +- tests/metadata/test_metadata.py | 5 ++-- 4 files changed, 22 insertions(+), 23 deletions(-) diff --git a/sdgx/data_models/multi_table_combiner.py b/sdgx/data_models/multi_table_combiner.py index 6130c492..f6b773ed 100644 --- a/sdgx/data_models/multi_table_combiner.py +++ b/sdgx/data_models/multi_table_combiner.py @@ -20,7 +20,7 @@ class MultiTableCombiner(BaseModel): metadata_version: str = "1.0" metadata_dict: Dict[str, Any] = {} - relationships: List[Any] = [] + relationships: List[Relationship] = [] def check(self): """Do necessary checks: @@ -35,26 +35,24 @@ def check(self): if metadata_cnt != relationship_cnt + 1: raise MultiTableCombinerError("Number of tables should corresponds to relationships.") - # table name check - table_names_from_relationships = set() + table_names = set(self.metadata_dict.keys()) + relationship_parents = set(r.parent_table for r in self.relationships) + relationship_children = set(r.child_table for r in self.relationships) # each relationship's table must have metadata - table_names = list(self.metadata_dict.keys()) - for each_r in self.relationships: - if each_r.parent_table not in table_names: - raise MultiTableCombinerError( - f"Metadata of parent table {each_r.parent_table} is missing." - ) - if each_r.child_table not in table_names: - raise MultiTableCombinerError( - f"Metadata of child table {each_r.child_table} is missing." - ) - table_names_from_relationships.add(each_r.parent_table) - table_names_from_relationships.add(each_r.child_table) + if not table_names.issuperset(relationship_parents): + raise MultiTableCombinerError( + f"Relationships' parent table {relationship_parents - table_names} is missing." + ) + if not table_names.issuperset(relationship_children): + raise MultiTableCombinerError( + f"Relationships' child table {relationship_children - table_names} is missing." + ) # each table in metadata must in a relationship - for each_t in table_names: - if each_t not in table_names_from_relationships: - raise MultiTableCombinerError(f"Table {each_t} has not relationship.") + if not (relationship_parents + relationship_children).issuperset(table_names): + raise MultiTableCombinerError( + f"Table {table_names - (relationship_parents+relationship_children)} is missing in relationships." + ) logger.info("MultiTableCombiner check finished.") diff --git a/sdgx/data_models/relationship.py b/sdgx/data_models/relationship.py index e4db62cf..d83191f9 100644 --- a/sdgx/data_models/relationship.py +++ b/sdgx/data_models/relationship.py @@ -2,7 +2,7 @@ from pydantic import BaseModel -from sdgx.exceptions import RelationshipError +from sdgx.exceptions import RelationshipInitError class Relationship(BaseModel): @@ -26,4 +26,4 @@ def __init__(self, **kwargs): super().__init__(**kwargs) if self.parent_table == self.child_table: - raise RelationshipError("child table and parent table cannot be the same") + raise RelationshipInitError("child table and parent table cannot be the same") diff --git a/sdgx/exceptions.py b/sdgx/exceptions.py index 73564442..7747f4e8 100644 --- a/sdgx/exceptions.py +++ b/sdgx/exceptions.py @@ -123,7 +123,7 @@ class MetadataInvalidError(DataModelError): ERROR_CODE = 9002 -class RelationshipError(DataModelError): +class RelationshipInitError(DataModelError): ERROR_CODE = 9003 diff --git a/tests/metadata/test_metadata.py b/tests/metadata/test_metadata.py index ebefc930..36dbf714 100644 --- a/tests/metadata/test_metadata.py +++ b/tests/metadata/test_metadata.py @@ -25,11 +25,12 @@ def test_metadata(metadata: Metadata): assert metadata.datetime_columns == metadata.get("datetime_columns") assert metadata.bool_columns == metadata.get("bool_columns") assert metadata.numeric_columns == metadata.get("numeric_columns") + assert metadata.set("a", 1) == metadata.get("a") assert metadata.model_dump_json() -def test_metadata_save_load(metadata: Metadata): - test_path = Path("metadata_path_test.json") +def test_metadata_save_load(metadata: Metadata, tmp_path: Path): + test_path = tmp_path / "metadata_path_test.json" metadata.save(test_path) # load from path new_meatadata = Metadata.load(test_path) From b090fe4e1bdae9bbda0805a8adddb0e405a7a602 Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Wed, 27 Dec 2023 19:11:09 +0800 Subject: [PATCH 2/2] More on comments --- sdgx/data_models/metadata.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sdgx/data_models/metadata.py b/sdgx/data_models/metadata.py index 37f4258a..d8e57a7c 100644 --- a/sdgx/data_models/metadata.py +++ b/sdgx/data_models/metadata.py @@ -27,13 +27,15 @@ class Metadata(BaseModel): column_list(list[str]): list of the comlumn name in the table, other columns lists are used to store column information. """ - # for primary key - # compatible with single primary key or composite primary key primary_keys: List[str] = [] + """ + primary_keys is used to store single primary key or composite primary key + """ - # variables related to columns - # column_list is used to store all columns' name column_list: List[str] = [] + """" + column_list is used to store all columns' name + """ # other columns lists are used to store column information # here are 5 basic data types @@ -46,6 +48,9 @@ class Metadata(BaseModel): # version info metadata_version: str = "1.0" _extend: Dict[str, Any] = {} + """ + For extend information, use ``get`` and ``set`` + """ def get(self, key: str, default=None) -> Any: return getattr(self, key, getattr(self._extend, key, default))