-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrudilexanalyzer.py
44 lines (33 loc) · 1.09 KB
/
rudilexanalyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import re
# Specifying grammar patterns
pattern_list = [ r"~([^%~\n ]*)", r"%([\w+][?\w]*)"]
token_regex = re.compile(r"{}|{}".format(pattern_list[1],pattern_list[0]))
def analyze_grammar(filepath):
result = []
# Opening grammar file
with open(filepath, 'r') as reader:
# Reading all lines
lines = reader.readlines()
# Removing blank lines
while '\n' in lines: lines.remove('\n')
# Finding all tokens
for i in range(len(lines)):
tokens = token_regex.findall(lines[i])
tokens_list = []
# Generating final list from entry
for token in tokens:
if (token[0]):
tokens_list.append("%"+token[0])
elif (token[1]):
tokens_list.append("~"+token[1])
# Adding new product rule
result.append(tokens_list)
return result
def print_grammar(grammar):
for rule in grammar:
tmp = list(rule)
tmp[0] = tmp[0] + " := "
result = " ".join(tmp)
print(result)
grammar_comment = analyze_grammar("grammar_comment.txt")
print_grammar(grammar_comment)