This repository has been archived by the owner on Jun 15, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 12
/
json_generator.py
executable file
·334 lines (269 loc) · 9.98 KB
/
json_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
#!/usr/bin/env python3
import sys
import json
import distutils.util
import argparse
"""
Script to generate a *_model.json file, given *.py
*.py file must have docstring at the header of the file first
otherwise, an empty *_model.json file will be generated
"""
"""
usage / options
./json_generator filename
"""
# Docstring format
# the docstring (if there is any) must contain all of these (no more no less)
"""
name: str,
run: str,
props: str,
doc: str,
source: str,
graph: str,
active: bool
"""
# jsonFields is a list of fields or keys that the parser must know
# "source" should be required as a field at the very least. It is later used by
# json_combiner.py as a way to tell if a model is new or not
jsonFields = set(["name", "run", "props", "doc", "source", "graph", "active"])
script_name = sys.argv[0]
jsonFieldDelimitor = ":"
saw_error = False
def validate_config(): # () -> None
"""
Checks the configuration of this script
"""
if(len(jsonFields) == 0):
script_output("jsonFields should be len greater than 0")
exit(1)
if(len(jsonFieldDelimitor) != 1):
script_output(
"jsonFieldDelimitor must be len 1")
exit(1)
def script_output(message, withName=True): # (str, bool) -> None
"""
Wrapper for print to include the script's name
"""
if(withName is True):
print(script_name + ": " + message)
else:
print(message)
def has_docstring_quotes(line): # (str) -> bool
"""
Function to check for \"\"\" or \'\'\' in given string
Used for checking start and end of docstring
"""
f1, f2 = line.find("\'\'\'"), line.find("\"\"\"")
if(f1 != -1 or f2 != -1):
return True
return False
def validate_docstring(content, filename, withOutput=True):
"""
Function to validate the contents of a given docstring
pass in filename for error messages
params: ([str...], str, bool) -> bool
"""
global saw_error
MIN_DOCLEN = 2 # Min len of the docstring should be at least 2 lines
QUOTE_LEN = 4 # len of a """ or ''' (counting newline)
if(len(content) < MIN_DOCLEN):
if(withOutput):
script_output("docstring too short in " + filename)
saw_error = True
return False
if(len(content[0].strip()) > QUOTE_LEN or
len(content[len(content)-1].strip()) > QUOTE_LEN):
if(withOutput):
script_output("docstring quotes should be in a line by itself")
script_output("Problem in " + filename, False)
saw_error = True
return False
return True
def validate_model(model_kv, key_set):
"""
Method to check if docstring we read in is valid
Things to check for:
1.) Missing fields
2.) Any foreign keys
3.) Duplicate keys
key_set is the set of unique keys in model_kv
"""
global saw_error
# In case, file had a docstring, but it wasn't what we were expecting
if(len(key_set) == 0):
script_output("Didn't find any known fields.")
script_output("Fields should be in the following format:", False)
script_output("<key>: <value>\n", False)
script_output(
"Please make sure your FIRST docstring has all the fields:", False)
script_output(str(jsonFields), False)
saw_error = True
return False
# Missing fields
if(len(key_set) != len(jsonFields)):
script_output("Missing required fields: " + str(jsonFields - key_set))
script_output(
"Please make sure to put a space after delimitor", False)
script_output(
"Current delimitor: " + repr(jsonFieldDelimitor[0]), False)
saw_error = True
return False
# If for some random reason duplicates were not filtered during parsing
if(len(model_kv) != len(jsonFields)):
script_output(
"script error, model_kv is not the same len as jsonFields")
saw_error = True
return False
return True
def strip_docstring(content):
"""
function to strip away leading and trailing spaces within docstring
Returns empty [] if nonvalid
"""
if(validate_docstring(content, "", False)):
result = [content[0]]
startIndex, endIndex = 1, len(content) - 2
while(startIndex < len(content) - 1 and len(content[startIndex]) == 0):
startIndex += 1
while(endIndex > 0 and len(content[endIndex]) == 0):
endIndex -= 1
# If there was nothing to strip, skip any work and just return content
if(startIndex == 1 and endIndex == len(content) - 2):
return content
# Grab everything between
result.extend(content[startIndex:endIndex+1])
# Add in the ending docstring
result.append(content[-1])
return result
return []
def clean_valString(valString):
"""
Cleans up the value string that was processed
removes leading and trailing whitespace and comma
"""
# strip leading and trailing whitespace
result = valString.strip()
# remove trailing comma if any
if(len(result) > 0 and result[-1] == ","):
result = result[:len(result)-1]
return result
def convert_valString(valString):
"""
Function to attempt to convert the string to its actual type
I.E, "1" -> 1, "true" -> true
The catch all type is to string
"""
if(valString.lower() == "null"):
return None
try:
return int(valString)
except ValueError:
try:
# This returns 0,1 if the string is a bool value
# bool() then converts it into True or False
return bool(distutils.util.strtobool(valString))
except ValueError:
return valString
except Exception:
return valString
def parse_docstring(file_path):
"""
parses the docstring at the top of every model file
returns a [] with tuples (KEY, VAL) in the order of the jsonFields
general expected form: <key>: <value>
Parsing logic
1.) Reading in lines between docstring quotes and prestrip
leading whitespaces
2.) Checks if docstring is empty or docstring quotes on same line
(Stops parsing if true)
3.) loop through lines of docstring we read in. Looking for
<key>: <value> pattern. If ": " is found, then we got a potential key.
Else, the line is part of the <value> of the last key detected
3b.) Stop parsing if we found duplicate keys, or
we found an unexpected key
4.) return the result if our result was validated
"""
global saw_error
with open(file_path, 'r') as input_stream:
# Step 1:
# Read everything from the docstring in first for processing
docstring_content = []
num_indicator = 0 # Indicators of docstring quotes
while(num_indicator < 2):
line = input_stream.readline()
if(len(line) == 0):
return {}
if(has_docstring_quotes(line)):
num_indicator += 1
# We have entered a docstring
if(num_indicator > 0):
# Prestrip whitespaces in front of line
line = line.lstrip()
docstring_content.append(line)
# If invalid docstring, return empty
if(validate_docstring(docstring_content, file_path) is False):
return {}
# Remove leading and trailing lines
docstring_content = strip_docstring(docstring_content)
# Step 2: Now we process the docstring
model_kv = {} # model's key val pair for the json
delimitorLen = len(jsonFieldDelimitor)
found_set = set()
keyString, valueString = "", ""
for i in range(1, len(docstring_content)-1):
line = docstring_content[i]
delimitorIndex = line.find(jsonFieldDelimitor)
# Found sign of a key
if(delimitorIndex != -1 and
delimitorIndex+1 < len(line) and
line[delimitorIndex+1].isspace()):
lineKey = line[:delimitorIndex]
if(lineKey in found_set):
script_output("FOUND DUPLICATE FIELD: " + lineKey)
script_output("in " + file_path, False)
saw_error = True
return {}
if(lineKey in jsonFields):
if(len(valueString) > 0):
model_kv[keyString] = \
convert_valString(clean_valString(valueString))
found_set.add(lineKey)
v_start = len(lineKey) + delimitorLen
keyString = lineKey
valueString = line[v_start:]
else:
# stop parsing since we found a rogue key
script_output("UNKNOWN KEY " + repr(lineKey))
saw_error = True
return {}
else:
valueString += line
# Last key
model_kv[keyString] = convert_valString(clean_valString(valueString))
if(validate_model(model_kv, found_set) is False):
return {}
# Step 3: return the finished product to our caller
return model_kv
def generate_json(model_kv):
"""
Generates json and prints to stdout
model_kv is generated by parse_docstring()
it contains the key value pair of each field
(dict) -> None
"""
if(type(model_kv) != dict):
script_output("generate_json(dict), check function def")
exit(1)
print(json.JSONEncoder(sort_keys=True, indent=4).encode(model_kv))
def main():
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("filename")
args = arg_parser.parse_args()
model_file = args.filename
validate_config()
generate_json(parse_docstring(model_file))
if(saw_error is True):
exit(1)
if __name__ == "__main__":
main()