-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
209 lines (180 loc) · 8.3 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import openai
import os
"""
Replit Instructions:
1) To run this program, save your openai API key as a system environment
variable using the key "OPENAI_API_KEY". If using replit, system
environment variables can be accessed and set in the "Secrets" Tool.
"""
def ai_location_search(pdf_txt):
'''
uses gpt to search inspection report text for location data. First parses text
and then calls ai_classify to perform binary classification of each text line. Hard cap
calls to ai_classify by setting search_depth val. Conditional branch executed
if document formatting does not match assumed formatting or if no location is found.
Conditional branch calls ai_analyze to search entire text extraction for locations.
WARNING: If ai_analyze is called, results may be unpredictable.
Input: pdf_text string
Return: Locations or no location notification as string
'''
#split text at fixed text marker in document. Replace as needed.
split_flag = ("TARDIS Quality Inspectors LLC\n"
"2289 Farington Ave\n"
"Atlanta, Georgia\n"
"98922")
#flag to control conditional logic path
bool_flag = False
#split text if flag found
if split_flag in pdf_txt:
locations = "" #return value once locations are found
split_text = pdf_txt.split(split_flag) #split text at flag
lines = split_text[1].split("\n") #split text at each newline for traversal
search_depth = 17 #set num lines to search for locations.
for i in range(search_depth): #traverse each line of text
if i > 5: #skip first few lines after split
if ai_classify(lines[i]): #if line of text looks like location execute
locations += lines[i]+"\n" #append loc(s). If multiple seperate w '\n'
bool_flag = True #At least one location found. Flag True
if bool_flag: #locations found with classify function
#print(f'classify: {locations}') #return locations
return locations
else:
#alternate format or no location. Call analyze to search entire document
locations = ai_analyze(pdf_txt)
#print(f'analyze: {locations}')
return locations
def ai_classify(line):
"""
use openai gpt to determine whether a string matches a pattern.
Input: line string
Return: Boolean true / false if pattern matches
"""
#load openai api_key from env variables
openai.api_key = os.getenv("OPENAI_API_KEY")
#examples of data to match for LLM binary classification prompt
location_examples = (
"Gallifrey - Citadel; Time Rotor Room; Grid lines TARDIS.4/REG.5; Installation of Time Vortex Stabilizers\n"
"Skaro - Dalek City; Emperor's Chambers; TARDIS.9/EX.3; Reinforcement of Dalek Battle Armor\n"
"TARDIS; Roof; Mod 15 Outrigger Retrofit for Connection Type F; Console Room - Cloister Room/TARDIS.1\n"
"Mondas - Cybermen Outpost; Cyber-Conversion Chambers; TARDIS.5/TQ-TR; Welding of Cyberman Support Struts\n"
"Gallifrey - Academy; Prydonian Chapter Room; Grid lines TM.3/TARDIS.9; Installation of Time Scoop Mechanism\n"
"Gallifrey - Time War Battlefield; OS-Interface; TB.1-TB.5/TARDIS.7; Threaded Rod Welding for Time Lord Weaponry\n"
"Karn - Sisterhood of Karn; Basement; TG-TJ/TARDIS.8 Field Work for Elixir of Life Production"
)
#prompt to LLM for binary classification of data (responds Yes if match. Responds No if no match)
classifier_prompt = (f'Here are examples of different locations seperated by newlines. Is this input also a location? Respond with only "yes" or "no"\n'
f'"yes" examples: {location_examples}\n'
f'input: {line}')
#system message helps set the behavior of gpt
system_message = "You parse text extracted from pdfs and categorize it"
#DEBUG - check prompt formatting
#print(f'Prompt: {classifier_prompt}')
#call openai - gpt-3 turbo model
response = openai.ChatCompletion.create(
model ="gpt-3.5-turbo",
temperature=0.2, #lower temperature to reduce randomness in responses
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": classifier_prompt}
]
)
#DEBUG - Print gpt response for debugging
"""
print(type(response))
print(response) #includes all response metadata
print("gpt3.5 response text: "+ response['choices'][0]['message']['content'])
"""
#Save openai yes/no response and format to machine friendly string
ai_response = response['choices'][0]['message']['content'] #save gpt response
ai_response = ai_response.replace('\n', '') #remove newlines
ai_response = ai_response.lower() #convert to lowecase
#DEBUG - formatted ai response
#print(f'Formatted Response: {ai_response}')
#Convert yes / no to boolean value for return
if "yes" in ai_response:
#print('True')
return True
else: # Either ai_response is 'no' or Skynet became self aware
#print('False')
return False
def ai_analyze(text):
"""
use openai gpt to analyze entire pdf text extraction and search for location
Input: text string
Return: location(s) string
"""
#load openai api_key from env variables
openai.api_key = os.getenv("OPENAI_API_KEY")
#examples of data to match for LLM binary classification prompt
location_examples = (
"Gallifrey - Citadel; Time Rotor Room; Grid lines TARDIS.4/REG.5; Installation of Time Vortex Stabilizers\n"
"Skaro - Dalek City; Emperor's Chambers; TARDIS.9/EX.3; Reinforcement of Dalek Battle Armor\n"
"TARDIS; Roof; Mod 15 Outrigger Retrofit for Connection Type F; Console Room - Cloister Room/TARDIS.1\n"
"Mondas - Cybermen Outpost; Cyber-Conversion Chambers; TARDIS.5/TQ-TR; Welding of Cyberman Support Struts\n"
"Gallifrey - Academy; Prydonian Chapter Room; Grid lines TM.3/TARDIS.9; Installation of Time Scoop Mechanism\n"
"Gallifrey - Time War Battlefield; OS-Interface; TB.1-TB.5/TARDIS.7; Threaded Rod Welding for Time Lord Weaponry\n"
"Karn - Sisterhood of Karn; Basement; TG-TJ/TARDIS.8 Field Work for Elixir of Life Production"
)
#prompt to gpt
analyze_prompt = (
f'Search this text for locations. Return locations if found.\n'
f'text: {text}'
)
#system message helps set the behavior of gpt
system_message = (
f'You are a master at searching a document for locations and '
f'returning found locations to the user. List the locations, and '
f'only the locations. Do not add context to the response.'
f'If no locations are found reply "no locations found".\n'
f'Here are examples of what locations look like in this context:\n'
f'{location_examples}'
)
#TEST - check prompt formatting
#print(f'Prompt: {classifier_prompt}')
#call openai - gpt-3 turbo model
response = openai.ChatCompletion.create(
model ="gpt-3.5-turbo",
temperature=0.1, #lower temperature to reduce randomness in response
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": analyze_prompt}
]
)
#DEBUG - Print gpt prompt, response, token count, etc.
'''
print(f'Prompt: {analyze_prompt}')
print(f'System Message: {system_message}')
print(type(response))
print(response) #includes all response metadata
print("gpt3.5 response text: "+ response['choices'][0]['message']['content'])
'''
#Return GPT Locations
return (response['choices'][0]['message']['content'])
def display_menu():
while True:
print("\nAEC_AI_Report_Location_Search:")
print("1) Report with no locations")
print("2) Report with 2 locations")
print("3) Report with non-standard format")
print("4) Exit")
choice = input("Enter your choice (1-4): ")
if choice == "1":
filename = "no_locations.txt"
elif choice == "2":
filename = "2_locations.txt"
elif choice == "3":
filename = "odd_format.txt"
elif choice == "4":
print("Exiting program...")
break
else:
print("Invalid choice. Please enter a number between 1 and 4.")
continue
# Open the text file in read mode
# replace input.txt for testing alternate code
with open(filename, 'r') as file:
# Read the entire contents of the file into a string variable
report_txt = file.read()
# Call ai_location_search and display results
print("Search Results: \n" + ai_location_search(report_txt))
display_menu()