-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathremodeler_validator.py
211 lines (181 loc) · 9.65 KB
/
remodeler_validator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
""" Validator for remodeler input files. """
import jsonschema
from copy import deepcopy
from hed.tools.remodeling.operations.valid_operations import valid_operations
class RemodelerValidator:
""" Validator for remodeler input files. """
MESSAGE_STRINGS = {
"0": {
"minItems": "There are no operations defined. Specify at least 1 operation for the remodeler to execute.",
"type": "Operations must be contained in a list or array. This is also true for a single operation."
},
"1": {
"type": "Each operation must be defined in a dictionary: {instance} is not a dictionary object.",
"required": "Operation dictionary {operation_index} is missing '{missing_value}'. " +
"Every operation dictionary must specify the type of operation, " +
"a description, and the operation parameters.",
"additionalProperties": "Operation dictionary {operation_index} contains an unexpected field " +
"'{added_property}'. Every operation dictionary must specify the type " +
"of operation, a description, and the operation parameters."
},
"2": {
"type": "Operation {operation_index}: {instance} is not a {validator_value}. " +
"{operation_field} should be of type {validator_value}.",
"enum": "{instance} is not a known remodeler operation. See the documentation for valid operations.",
"required": "Operation {operation_index}: The parameter {missing_value} is missing. {missing_value} " +
"is a required parameter of {operation_name}.",
"additionalProperties": "Operation {operation_index}: Operation parameters for {operation_name} " +
"contain an unexpected field '{added_property}'.",
"dependentRequired": "Operation {operation_index}: The parameter {missing_value} is missing: " +
"{missing_value} is a required parameter of {operation_name} " +
"when {dependent_on} is specified."
},
"more": {
"type": "Operation {operation_index}: The value of {parameter_path} in the {operation_name} operation " +
"should be {validator_value}. {instance} is not a {validator_value}.",
"minItems": "Operation {operation_index}: The list in {parameter_path} in the {operation_name} " +
"operation should have at least {validator_value} item(s).",
"required": "Operation {operation_index}: The field {missing_value} is missing in {parameter_path}. " +
"{missing_value} is a required parameter of {parameter_path}.",
"additionalProperties": "Operation {operation_index}: Operation parameters for {parameter_path} " +
"contain an unexpected field '{added_property}'.",
"enum": "Operation {operation_index}: Operation parameter {parameter_path} in the {operation_name} " +
"operation contains and unexpected value. Value should be one of {validator_value}.",
"uniqueItems": "Operation {operation_index}: The list in {parameter_path} in the {operation_name} " +
"operation should only contain unique items.",
"minProperties": "Operation {operation_index}: The dictionary in {parameter_path} in the " +
"{operation_name} operation should have at least {validator_value} key(s)."
}
}
BASE_ARRAY = {
"type": "array",
"items": {},
"minItems": 1
}
OPERATION_DICT = {
"type": "object",
"required": [
"operation",
"description",
"parameters"
],
"additionalProperties": False,
"properties": {
"operation": {
"type": "string",
"enum": [],
"default": "convert_columns"
},
"description": {
"type": "string"
},
"parameters": {
"type": "object",
"properties": {}
}
},
"allOf": []
}
PARAMETER_SPECIFICATION_TEMPLATE = {
"if": {
"properties": {
"operation": {
"const": ""
}
},
"required": [
"operation"
]
},
"then": {
"properties": {
"parameters": {}
}
}
}
def __init__(self):
""" Constructor for remodeler Validator. """
self.schema = self._construct_schema() # The compiled json schema against which remodeler files are validated.
self.validator = jsonschema.Draft202012Validator(self.schema) # The instantiated json schema validator.
def validate(self, operations):
""" Validate remodeler operations against the json schema specification and specific op requirements.
Parameters:
operations (list): Dictionary with input operations to run through the remodeler.
Returns:
list: List with the error messages for errors identified by the validator.
"""
list_of_error_strings = []
for error in sorted(self.validator.iter_errors(operations), key=lambda e: e.path):
list_of_error_strings.append(
self._parse_message(error, operations))
if list_of_error_strings:
return list_of_error_strings
operation_by_parameters = [(operation["operation"], operation["parameters"]) for operation in operations]
for index, operation in enumerate(operation_by_parameters):
error_strings = valid_operations[operation[0]].validate_input_data(operation[1])
for error_string in error_strings:
list_of_error_strings.append(f"Operation {index + 1} ({operation[0]}): {error_string}")
return list_of_error_strings
def _parse_message(self, error, operations):
""" Return a user-friendly error message based on the jsonschema validation error.
Parameters:
error (ValidationError): A validation error from jsonschema validator.
operations (dict): The operations that were validated.
Note:
- json schema error does not contain all necessary information to return a
proper error message so, we also take some information directly from the operations
that led to the error.
- all necessary information is gathered into an error dict, message strings are predefined
in a dictionary which are formatted with additional information.
"""
error_dict = vars(error)
level = len(error_dict["path"])
if level > 2:
level = "more"
# some information is in the validation error but not directly in a field, so I need to
# modify before they can be parsed in
# if they are necessary, they are there, if they are not there, they are not necessary
try:
error_dict["operation_index"] = error_dict["path"][0] + 1
error_dict["operation_field"] = error_dict["path"][1].capitalize()
error_dict["operation_name"] = operations[int(
error_dict["path"][0])]['operation']
# everything except the first two values reversed
parameter_path = [*error_dict['path']][:1:-1]
for ind, value in enumerate(parameter_path):
if isinstance(value, int):
parameter_path[ind] = f"item {value+1}"
error_dict["parameter_path"] = " ".join(parameter_path)
except (IndexError, TypeError, KeyError):
pass
attr_type = str(error_dict["validator"])
# the missing value with required elements, or the wrong additional value is not known to the
# validation error object
# this is a known issue of jsonschema: https://github.com/python-jsonschema/jsonschema/issues/119
# for now the simplest thing seems to be to extract it from the error message
if attr_type == 'required':
error_dict["missing_value"] = error_dict["message"].split("'")[
1::2][0]
if attr_type == 'additionalProperties':
error_dict["added_property"] = error_dict["message"].split("'")[
1::2][0]
# dependent is required, provided both the missing value and the reason it is required in one dictionary
# it is split over two for the error message
if attr_type == 'dependentRequired':
error_dict["missing_value"] = list(error_dict["validator_value"].keys())[0]
error_dict["dependent_on"] = list(error_dict["validator_value"].values())[0]
return self.MESSAGE_STRINGS[str(level)][attr_type].format(**error_dict)
def _construct_schema(self):
""" Return a schema specialized to the operations.
Returns:
dict: Array of schema operations.
"""
schema = deepcopy(self.BASE_ARRAY)
schema["items"] = deepcopy(self.OPERATION_DICT)
for operation in valid_operations.items():
schema["items"]["properties"]["operation"]["enum"].append(operation[0])
parameter_specification = deepcopy(self.PARAMETER_SPECIFICATION_TEMPLATE)
parameter_specification["if"]["properties"]["operation"]["const"] = operation[0]
parameter_specification["then"]["properties"]["parameters"] = operation[1].PARAMS
schema["items"]["allOf"].append(deepcopy(parameter_specification))
return schema