Skip to content

Commit

Permalink
Merge pull request #1526 from Sage-Bionetworks/FDS-2506-type-val
Browse files Browse the repository at this point in the history
[FDS-2506] Fix type Validation
  • Loading branch information
andrewelamb authored Oct 30, 2024
2 parents c69ec88 + 5fcaac3 commit 59889b6
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 31 deletions.
51 changes: 33 additions & 18 deletions schematic/models/validate_attribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,23 +1104,40 @@ def type_validation(
manifest_col: pd.Series,
) -> tuple[list[list[str]], list[list[str]]]:
"""
Purpose:
Check if values for a given manifest attribue are the same type
Check if values for a given manifest attribute are the same type
specified in val_rule.
Input:
- val_rule: str, Validation rule, specifying input type, either
Args:
val_rule (str): Validation rule, specifying input type, either
'float', 'int', 'num', 'str'
- manifest_col: pd.Series, column for a given
manifest_col (pd.Series): column for a given
attribute in the manifest
Raises:
ValueError: If after splitting the validation rule by spaces,
there are no components left
ValueError: If after splitting the validation rule by spaces,
there are more than two components left
ValueError: If after splitting the validation rule by spaces,
the first component is not one of 'float', 'int', 'num', 'str'
Returns:
-This function will return errors when the user input value
does not match schema specifications.
logger.error or logger.warning.
Errors: list[str] Error details for further storage.
warnings: list[str] Warning details for further storage.
TODO:
Convert all inputs to .lower() just to prevent any entry errors.
tuple[list[list[str]], list[list[str]]]: _description_
"""
val_rule_components = val_rule.split(" ")
if len(val_rule_components) == 0:
raise ValueError("val_rule must contain at least one component.")
if len(val_rule_components) > 2:
raise ValueError("val_rule must contain no more than two components.")
val_rule_type = val_rule_components[0]
if val_rule_type not in ["float", "int", "num", "str"]:
raise ValueError(
(
f"val_rule first component: {val_rule_type} must be one of "
"['float', 'int', 'num', 'str']"
)
)

specified_type = {
"num": (int, np.int64, float),
"int": (int, np.int64),
Expand All @@ -1132,15 +1149,15 @@ def type_validation(
warnings: list[list[str]] = []

# num indicates either a float or int.
if val_rule == "num":
if val_rule_type == "num":
for i, value in enumerate(manifest_col):
entry_has_value = self.get_entry_has_value(
entry=value,
node_display_name=manifest_col.name,
)
if (
bool(value)
and not isinstance(value, specified_type[val_rule])
and not isinstance(value, specified_type[val_rule_type])
and entry_has_value
):
vr_errors, vr_warnings = GenerateError.generate_type_error(
Expand All @@ -1152,18 +1169,17 @@ def type_validation(
)
if vr_errors:
errors.append(vr_errors)
# It seems impossible to get warnings with type rules
if vr_warnings:
warnings.append(vr_warnings)
elif val_rule in ["int", "float", "str"]:
elif val_rule_type in ["int", "float", "str"]:
for i, value in enumerate(manifest_col):
entry_has_value = self.get_entry_has_value(
entry=value,
node_display_name=manifest_col.name,
)
if (
bool(value)
and not isinstance(value, specified_type[val_rule])
and not isinstance(value, specified_type[val_rule_type])
and entry_has_value
):
vr_errors, vr_warnings = GenerateError.generate_type_error(
Expand All @@ -1175,7 +1191,6 @@ def type_validation(
)
if vr_errors:
errors.append(vr_errors)
# It seems impossible to get warnings with type rules
if vr_warnings:
warnings.append(vr_warnings)
return errors, warnings
Expand Down
8 changes: 4 additions & 4 deletions tests/data/example.model.csv
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ Check Regex List Like,,,,,TRUE,DataProperty,,,list like::regex match [a-f]
Check Regex Single,,,,,TRUE,DataProperty,,,regex search [a-f]
Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f]
Check Regex Integer,,,,,TRUE,DataProperty,,,regex search ^\d+$
Check Num,,,,,TRUE,DataProperty,,,num
Check Float,,,,,TRUE,DataProperty,,,float
Check Int,,,,,TRUE,DataProperty,,,int
Check String,,,,,TRUE,DataProperty,,,str
Check Num,,,,,TRUE,DataProperty,,,num error
Check Float,,,,,TRUE,DataProperty,,,float error
Check Int,,,,,TRUE,DataProperty,,,int error
Check String,,,,,TRUE,DataProperty,,,str error
Check URL,,,,,TRUE,DataProperty,,,url
Check Match at Least,,,,,TRUE,DataProperty,,,matchAtLeastOne Patient.PatientID set
Check Match Exactly,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactly set
Expand Down
8 changes: 4 additions & 4 deletions tests/data/example.model.jsonld
Original file line number Diff line number Diff line change
Expand Up @@ -1258,7 +1258,7 @@
"sms:displayName": "Check Num",
"sms:required": "sms:true",
"sms:validationRules": [
"num"
"num error"
]
},
{
Expand All @@ -1277,7 +1277,7 @@
"sms:displayName": "Check Float",
"sms:required": "sms:true",
"sms:validationRules": [
"float"
"float error"
]
},
{
Expand All @@ -1296,7 +1296,7 @@
"sms:displayName": "Check Int",
"sms:required": "sms:true",
"sms:validationRules": [
"int"
"int error"
]
},
{
Expand All @@ -1315,7 +1315,7 @@
"sms:displayName": "Check String",
"sms:required": "sms:true",
"sms:validationRules": [
"str"
"str error"
]
},
{
Expand Down
152 changes: 147 additions & 5 deletions tests/unit/test_validate_attribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,104 @@ def test_generate_filename_error_unsupported_error_type(
error_type="unsupported error type",
)

@pytest.mark.parametrize(
"input_rule, input_num, input_name, input_entry, expected_error, expected_warning",
[
(
"x",
0,
"Patient",
"y",
[],
[
0,
"Patient",
"On row 0 the attribute Patient does not contain the proper value type x.",
"y",
],
),
(
"x warning",
0,
"Patient",
"y",
[],
[
0,
"Patient",
"On row 0 the attribute Patient does not contain the proper value type x.",
"y",
],
),
(
"x error",
0,
"Patient",
"y",
[
0,
"Patient",
"On row 0 the attribute Patient does not contain the proper value type x.",
"y",
],
[],
),
],
)
def test_generate_type_error(
self,
dmge: DataModelGraphExplorer,
input_rule: str,
input_num: int,
input_name: str,
input_entry: str,
expected_error: list[str],
expected_warning: list[str],
) -> None:
"""Testing for GenerateError.generate_type_error"""
error, warning = GenerateError.generate_type_error(
val_rule=input_rule,
row_num=input_num,
attribute_name=input_name,
invalid_entry=input_entry,
dmge=dmge,
)
import logging

logging.warning(error)
logging.warning(warning)
assert error == expected_error
assert warning == expected_warning

@pytest.mark.parametrize(
"input_rule, input_num, input_name, input_entry, exception",
[
# Empty rule or entry causes a key error
("", 0, "x", "x", KeyError),
("x", 0, "x", "", KeyError),
# Empty attribute causes an index error
("x", 0, "", "x", IndexError),
],
)
def test_generate_type_error_exceptions(
self,
dmge: DataModelGraphExplorer,
input_rule: str,
input_num: int,
input_name: str,
input_entry: str,
exception: Exception,
) -> None:
"""Testing for GenerateError.generate_type_error"""
with pytest.raises(exception):
GenerateError.generate_type_error(
val_rule=input_rule,
row_num=input_num,
attribute_name=input_name,
invalid_entry=input_entry,
dmge=dmge,
)


class TestValidateAttributeObject:
"""Testing for ValidateAttribute class with all Synapse calls mocked"""
Expand Down Expand Up @@ -1792,15 +1890,19 @@ def test_type_validation_passing(
"input_column, rule",
[
(Series([1], name="Check String"), "str"),
(Series([1], name="Check String"), "str error"),
(Series(["a"], name="Check Num"), "num"),
(Series(["a"], name="Check Num"), "num error"),
(Series(["20"], name="Check Num"), "num"),
(Series([1.1], name="Check Int"), "int"),
(Series(["a"], name="Check Int"), "int"),
(Series(["a"], name="Check Int"), "int error"),
(Series([1], name="Check Float"), "float"),
(Series(["a"], name="Check Float"), "float"),
(Series(["a"], name="Check Float"), "float error"),
],
)
def test_type_validation_failing(
def test_type_validation_errors(
self, va_obj: ValidateAttribute, input_column: Series, rule: str
) -> None:
"""
Expand All @@ -1814,20 +1916,60 @@ def test_type_validation_failing(
@pytest.mark.parametrize(
"input_column, rule",
[
(Series([1], name="Check String"), "str error"),
(Series([1], name="Check String"), "str warning"),
(Series(["a"], name="Check Num"), "num warning"),
(Series(["a"], name="Check Int"), "int warning"),
(Series(["a"], name="Check Float"), "float warning"),
],
)
def test_type_validation_does_not_work(
def test_type_validation_warnings(
self, va_obj: ValidateAttribute, input_column: Series, rule: str
) -> None:
"""
This tests ValidateAttribute.type_validation
This test shows that the msg level parameter doesn't work
This test shows failing examples using the type rule
"""
errors, warnings = va_obj.type_validation(rule, input_column)
assert len(errors) == 0
assert len(warnings) == 0
assert len(warnings) == 1

@pytest.mark.parametrize(
"input_column, rule, exception, msg",
[
(
Series([1], name="Check String"),
"",
ValueError,
"val_rule first component: must be one of",
),
(
Series([1], name="Check String"),
"x",
ValueError,
"val_rule first component: x must be one of",
),
(
Series([1], name="Check String"),
"x x x",
ValueError,
"val_rule must contain no more than two components.",
),
],
)
def test_type_validation_exceptions(
self,
va_obj: ValidateAttribute,
input_column: Series,
rule: str,
exception: Exception,
msg: str,
) -> None:
"""
This tests ValidateAttribute.type_validation
This test shows failing examples using the type rule
"""
with pytest.raises(exception, match=msg):
va_obj.type_validation(rule, input_column)

################
# url_validation
Expand Down

0 comments on commit 59889b6

Please sign in to comment.