-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCYK_Parser.py
136 lines (112 loc) · 4.35 KB
/
CYK_Parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
'''
Code by Abdulhadi Ali Alaraj
Dependencies:
- Pandas
This program takes in a context-free grammar made in
chomsky normal form and gives you two options, you
can either recursively generate a random string from
the grammar or you can input a string and check if it
can be generated by the grammar using the CYK Parser
Bracket Grammar in CNF:
S → SS | AC | AB
C → SB
A → (
B → )
Input example:
Enter an input of form S or type 'end' to stop: S
Enter an input of form AB or a single terminal: SS
Enter an input of form S or type 'end' to stop: S
Enter an input of form AB or a single terminal: AC
Enter an input of form S or type 'end' to stop: S
Enter an input of form AB or a single terminal: AB
Enter an input of form S or type 'end' to stop: C
Enter an input of form AB or a single terminal: SB
Enter an input of form S or type 'end' to stop: A
Enter an input of form AB or a single terminal: (
Enter an input of form S or type 'end' to stop: B
Enter an input of form AB or a single terminal: )
Enter an input of form S or type 'end' to stop: end
'''
import random
import pandas as pd
def generate_string(start_symbol):
"""
Generate a random string from the context-free grammar, starting from the given start symbol.
"""
#If the start symbol is a terminal, return it
if start_symbol not in [rule[0] for rule in grammar]:
return start_symbol
#Otherwise, choose a random production rule for the start symbol
rules = [rule[1] for rule in grammar if rule[0] == start_symbol]
chosen_rule = random.choice(rules)
#Recursively generate strings for each symbol in the chosen production rule
return ''.join([generate_string(symbol) for symbol in chosen_rule])
def cyk_parser(grammar, input_str):
n = len(input_str)
# initialize empty table of size n x n x |grammar|
table = [[set() for _ in range(n)] for _ in range(n)]
# fill in the base case of the table
for i in range(n):
for (nt, t) in grammar:
if input_str[i] == t:
table[i][i].add(nt)
# fill in the rest of the table
for l in range(2, n+1):
for i in range(n-l+1):
j = i + l - 1
for k in range(i, j):
for (nt, rhs) in grammar:
if len(rhs) == 2:
B, C = rhs
if B in table[i][k] and C in table[k+1][j]:
table[i][j].add(nt)
#remove empty sets and replace with zeros (visual preference)
for i in range(len(table)):
for j in range(len(table[0])):
if table[i][j] == set():
table[i][j] = '0'
#output CYK Table as a pandas dataframe because it looks better
df = pd.DataFrame(table)
print(df)
#check if the start symbol is in the last cell of the table
return 'S' in table[0][n-1]
grammar = set()
print("Enter the grammar in Chomsky Normal Form\n")
# Define the context-free grammar as a set of tuples
while True:
s_input = input("Enter an input of form S or type 'end' to stop: ")
if s_input == 'end':
break
ab_input = input("Enter an input of form AB or a single terminal: ")
if len(ab_input) == 1:
grammar.add((s_input, (ab_input)))
else:
ab_list = tuple(ab_input)
if len(ab_list) == 2:
grammar.add((s_input, ab_list))
else:
print("Invalid input, please try again.")
print("\nGrammar: ")
for item in grammar:
if isinstance(item[1], tuple):
print(item[0] + "->" + "".join(item[1]))
else:
print(item[0] + "->" + item[1])
print()
while True:
decide = int(input("Enter 1 for CYK, 2 for String Generation, and 0 to exit: "))
if decide == 1:
input_str = input("Enter a string: ")
if input_str == '0':
break
result = cyk_parser(grammar, input_str)
if result:
print(f"The string {input_str} is in the grammar.\n")
else:
print(f"The string {input_str} is not in the grammar.\n")
if decide == 2:
# Generate a random string starting from the 'S' symbol
print("This string is generated by the grammar: ",generate_string('S'))
if decide == 0:
print("Exiting Program...")
break