-
Notifications
You must be signed in to change notification settings - Fork 2
/
parse.py
executable file
·89 lines (71 loc) · 2.73 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python3
import re, os, platform
from data import Data
import sys
import pyperclip
use_quotes = '-q' in sys.argv
append_commas = '-c' in sys.argv
data = Data()
intents = data.intents
variables = data.variables
intent_list = list(intents.keys())
limit = len(intents)
prompt = "Pick an intent [1-" + str(limit) + "]\n"
for i in range(limit):
prompt = prompt + str(i+1) + ") " + intent_list[i] + "\n"
choice = int(input(prompt)) - 1
sentences = intents[intent_list[choice]]
if type(sentences) == str:
sentences = [sentences]
all_utterances = []
for sentence in sentences:
for key in variables:
sentence = sentence.replace('$' + key, variables[key])
#pieces = AUML.split(' ')
# see https://stackoverflow.com/questions/9644784/splitting-on-spaces-except-between-certain-characters
pieces = re.split(r"\s+(?=[^()]*(?:\(|$))", sentence)
utterances = [[]]
for piece in pieces:
if piece[0] == '(' and piece[-1] == ')':
fragments = piece[1:-1].split('|')
new_utterances = []
for index, utterance in enumerate(utterances):
original_utterance = utterances[index]
for fragment in fragments:
#use copy of original_utterance or it all goes to hell
new_utterance = list(original_utterance)
new_utterance.append(fragment)
new_utterances.append(new_utterance)
utterances = new_utterances
elif piece[-1] == '?':
word = piece[0:-1]
temp = []
for index, utterance in enumerate(utterances):
# keep the pattern WITHOUT the optional word
temp.append(list(utterances[index]))
# but also add the optional word to another branch
utterances[index].append(word)
utterances.extend(temp)
else:
for index, utterance in enumerate(utterances):
utterances[index].append(piece)
all_utterances.extend(utterances)
all_utterances = [' '.join(x) for x in all_utterances]
list_set = set(all_utterances)
# convert the set to the list
unique_list = (list(list_set))
print ('***********************************')
print ('***********************************')
print ('Creating ' + str(len(all_utterances)) + ' utterances')
print ('***********************************')
print ('***********************************')
clip = ''
for utterance in unique_list:
quote = '"' if use_quotes else ''
comma = ',' if append_commas else ''
fmt_utterance = quote + utterance + quote + comma
print (fmt_utterance)
clip = clip + fmt_utterance + "\r"
# all the utterances will be inserted
# into your clipboard
pyperclip.copy(clip)