Skip to content

Commit b197a55

Browse files
authored
Create searchbot.py
1 parent 13e1865 commit b197a55

File tree

1 file changed

+362
-0
lines changed

1 file changed

+362
-0
lines changed

searchbot.py

Lines changed: 362 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,362 @@
1+
#!/usr/bin/env python3
2+
"""
3+
MansionNet SearchBot
4+
An IRC bot that provides private search functionality using the Hearch API
5+
with privacy protection and rate limiting.
6+
"""
7+
8+
import socket
9+
import ssl
10+
import time
11+
import json
12+
import base64
13+
import requests
14+
from datetime import datetime, timedelta
15+
from collections import defaultdict, deque
16+
from typing import Dict, List, Optional
17+
18+
class RateLimiter:
19+
def __init__(self, requests_per_minute: int, requests_per_day: int):
20+
self.requests_per_minute = requests_per_minute
21+
self.requests_per_day = requests_per_day
22+
self.minute_window = deque()
23+
self.day_window = deque()
24+
25+
def can_make_request(self) -> bool:
26+
now = datetime.now()
27+
while self.minute_window and self.minute_window[0] < now - timedelta(minutes=1):
28+
self.minute_window.popleft()
29+
while self.day_window and self.day_window[0] < now - timedelta(days=1):
30+
self.day_window.popleft()
31+
32+
return (len(self.minute_window) < self.requests_per_minute and
33+
len(self.day_window) < self.requests_per_day)
34+
35+
def add_request(self):
36+
now = datetime.now()
37+
self.minute_window.append(now)
38+
self.day_window.append(now)
39+
40+
class SearchBot:
41+
def __init__(self):
42+
# IRC Configuration
43+
self.server = "irc.example.com"
44+
self.port = 6697 # SSL port
45+
self.nickname = "SearchBot"
46+
self.channels = ["#test_room"]
47+
48+
# Rate Limiting - more conservative than MistralBot
49+
self.rate_limiter = RateLimiter(
50+
requests_per_minute=5,
51+
requests_per_day=500
52+
)
53+
54+
# Store ongoing private searches
55+
self.active_searches = {}
56+
57+
# SSL Configuration
58+
self.ssl_context = ssl.create_default_context()
59+
self.ssl_context.check_hostname = False
60+
self.ssl_context.verify_mode = ssl.CERT_NONE
61+
62+
def connect(self) -> bool:
63+
"""Establish connection to the IRC server"""
64+
try:
65+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
66+
self.irc = self.ssl_context.wrap_socket(sock)
67+
68+
print(f"Connecting to {self.server}:{self.port}...")
69+
self.irc.connect((self.server, self.port))
70+
71+
self.send(f"NICK {self.nickname}")
72+
self.send(f"USER {self.nickname} 0 * :MansionNet Search Bot")
73+
74+
buffer = ""
75+
while True:
76+
temp = self.irc.recv(2048).decode("UTF-8")
77+
buffer += temp
78+
79+
if "PING" in buffer:
80+
ping_token = buffer[buffer.find("PING"):].split()[1]
81+
self.send(f"PONG {ping_token}")
82+
83+
if "001" in buffer: # RPL_WELCOME
84+
for channel in self.channels:
85+
self.send(f"JOIN {channel}")
86+
time.sleep(1)
87+
return True
88+
89+
if "Closing Link" in buffer or "ERROR" in buffer:
90+
return False
91+
92+
except Exception as e:
93+
print(f"Connection error: {str(e)}")
94+
return False
95+
96+
def send(self, message: str):
97+
"""Send a raw message to the IRC server"""
98+
try:
99+
self.irc.send(bytes(f"{message}\r\n", "UTF-8"))
100+
print(f"Sent: {message}")
101+
except Exception as e:
102+
print(f"Error sending message: {str(e)}")
103+
104+
def send_private_message(self, target: str, message: str):
105+
"""Send a private message to a user"""
106+
try:
107+
# Split long messages to avoid truncation
108+
max_length = 400 # IRC message length limit with safety margin
109+
110+
while message:
111+
if len(message) <= max_length:
112+
self.send(f"PRIVMSG {target} :{message}")
113+
break
114+
115+
# Find a good breaking point
116+
split_point = message[:max_length].rfind(' ')
117+
if split_point == -1:
118+
split_point = max_length
119+
120+
self.send(f"PRIVMSG {target} :{message[:split_point]}")
121+
message = message[split_point:].lstrip()
122+
time.sleep(0.5) # Avoid flooding
123+
124+
except Exception as e:
125+
print(f"Error sending private message: {str(e)}")
126+
self.send(f"PRIVMSG {target} :Error: Message delivery failed.")
127+
128+
def search_hearch(self, query: str) -> List[Dict]:
129+
"""Perform a search using the Hearch API"""
130+
try:
131+
# Match exactly the config from the network request
132+
config = {
133+
"engines": {
134+
"bing": {"enabled": True, "required": False, "requiredbyorigin": True, "preferred": False, "preferredbyorigin": False},
135+
"brave": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": True, "preferredbyorigin": False},
136+
"duckduckgo": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": False, "preferredbyorigin": False},
137+
"etools": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": False, "preferredbyorigin": False},
138+
"google": {"enabled": True, "required": False, "requiredbyorigin": True, "preferred": False, "preferredbyorigin": False},
139+
"mojeek": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": True, "preferredbyorigin": False},
140+
"presearch": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": False, "preferredbyorigin": False},
141+
"qwant": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": False, "preferredbyorigin": False},
142+
"startpage": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": False, "preferredbyorigin": False},
143+
"swisscows": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": False, "preferredbyorigin": False},
144+
"yahoo": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": False, "preferredbyorigin": False}
145+
},
146+
"ranking": {
147+
"rankexp": 0.5,
148+
"rankmul": 1,
149+
"rankconst": 0,
150+
"rankscoremul": 1,
151+
"rankscoreadd": 0,
152+
"timesreturnedmul": 1,
153+
"timesreturnedadd": 0,
154+
"timesreturnedscoremul": 1,
155+
"timesreturnedscoreadd": 0,
156+
"engines": {
157+
"bing": {"mul": 1.5, "add": 0},
158+
"brave": {"mul": 1, "add": 0},
159+
"duckduckgo": {"mul": 1.25, "add": 0},
160+
"etools": {"mul": 1, "add": 0},
161+
"google": {"mul": 1.5, "add": 0},
162+
"mojeek": {"mul": 1, "add": 0},
163+
"presearch": {"mul": 1.1, "add": 0},
164+
"qwant": {"mul": 1.1, "add": 0},
165+
"startpage": {"mul": 1.25, "add": 0},
166+
"swisscows": {"mul": 1, "add": 0},
167+
"yahoo": {"mul": 1.1, "add": 0}
168+
}
169+
},
170+
"timings": {
171+
"preferredtimeout": "500",
172+
"hardtimeout": "1500"
173+
}
174+
}
175+
176+
# Base64 encode the config
177+
config_b64 = base64.b64encode(json.dumps(config).encode()).decode()
178+
179+
# Build the URL with correct query parameters
180+
params = {
181+
'category': config_b64,
182+
'pages': '1',
183+
'q': query,
184+
'start': '1'
185+
}
186+
187+
url = 'https://api.hearch.co/search/web'
188+
189+
headers = {
190+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
191+
'Accept': 'application/json',
192+
'Origin': 'https://hearch.co',
193+
'Referer': 'https://hearch.co/'
194+
}
195+
196+
print(f"\nDebug - Making request to: {url}")
197+
print(f"Debug - Query: {query}")
198+
print(f"Debug - Full URL with params: {url}?{'&'.join(f'{k}={v}' for k, v in params.items())}")
199+
200+
response = requests.get(url, params=params, headers=headers, timeout=10)
201+
202+
print(f"Debug - Response status: {response.status_code}")
203+
print(f"Debug - Response headers: {dict(response.headers)}")
204+
print(f"Debug - Response content: {response.text[:500]}")
205+
206+
if response.status_code == 200:
207+
data = response.json()
208+
results = data.get('results', [])
209+
print(f"Debug - Found {len(results)} results")
210+
return results[:5]
211+
else:
212+
print(f"API Error: {response.status_code} - {response.text}")
213+
return []
214+
215+
except Exception as e:
216+
print(f"Search error: {str(e)}")
217+
return []
218+
219+
except Exception as e:
220+
print(f"Search error: {str(e)}")
221+
return []
222+
223+
def format_search_result(self, index: int, result: dict) -> str:
224+
"""Format a single search result with IRC colors"""
225+
# IRC color codes
226+
BOLD = '\x02' # Bold text
227+
COLOR = '\x03' # Color indicator
228+
RESET = '\x0F' # Reset formatting
229+
BLUE = '12' # Blue for URLs
230+
GREEN = '03' # Green for titles
231+
GRAY = '14' # Gray for descriptions
232+
233+
title = result.get('title', 'No title').strip()
234+
url = result.get('url', 'No URL').strip()
235+
desc = result.get('description', '').strip()
236+
237+
# Clean up description (remove duplicate URLs and unnecessary text)
238+
desc = desc.replace(url, '')
239+
desc = ' '.join(desc.split()) # Normalize whitespace
240+
241+
# Keep lengths reasonable but longer than before
242+
if len(title) > 100:
243+
title = title[:97] + "..."
244+
if len(url) > 100:
245+
url = url[:97] + "..."
246+
if len(desc) > 200: # Allow longer descriptions
247+
desc = desc[:197] + "..."
248+
249+
# Format result with colors
250+
result_line = (
251+
f"{index}. {COLOR}{GREEN}{title}{RESET} | " # Green title
252+
f"{COLOR}{BLUE}{url}{RESET}" # Blue URL
253+
)
254+
255+
if desc and len(desc) > 20: # Only add if description is meaningful
256+
result_line += f" | {COLOR}{GRAY}{desc}{RESET}" # Gray description
257+
258+
return result_line
259+
260+
def handle_private_message(self, sender: str, message: str):
261+
"""Handle private messages and search commands"""
262+
try:
263+
if message.startswith("!search "):
264+
if not self.rate_limiter.can_make_request():
265+
self.send_private_message(sender, "Rate limit exceeded. Please try again later.")
266+
return
267+
268+
query = message[8:].strip()
269+
if not query:
270+
self.send_private_message(sender, "Usage: !search <query>")
271+
return
272+
273+
# Perform search and send results privately
274+
results = self.search_hearch(query)
275+
self.rate_limiter.add_request()
276+
277+
if not results:
278+
self.send_private_message(sender, "No results found.")
279+
return
280+
281+
# Send each result as a separate message
282+
for i, result in enumerate(results[:5], 1):
283+
formatted_result = self.format_search_result(i, result)
284+
self.send_private_message(sender, formatted_result)
285+
time.sleep(0.5) # Small delay between messages to prevent flooding
286+
287+
# Add attribution message
288+
GRAY = '\x0314' # IRC color code for gray
289+
BLUE = '\x0312' # IRC color code for blue
290+
RESET = '\x0F' # Reset formatting
291+
attribution = f"{GRAY}Search results powered by {BLUE}https://hearch.co/{GRAY} - Privacy-focused metasearch{RESET}"
292+
time.sleep(0.5) # Small delay before attribution
293+
self.send_private_message(sender, attribution)
294+
295+
elif message == "!help":
296+
help_msg = ("SearchBot Commands: "
297+
"!search <query> - Search the web privately (results sent via PM) | "
298+
"!help - Show this help message")
299+
self.send_private_message(sender, help_msg)
300+
301+
except Exception as e:
302+
print(f"Error handling private message: {str(e)}")
303+
self.send_private_message(sender, "An error occurred processing your request.")
304+
305+
def handle_channel_message(self, sender: str, channel: str, message: str):
306+
"""Handle channel messages"""
307+
if message == "!help":
308+
help_msg = ("SearchBot: Use !search <query> in a private message to search privately. "
309+
"Results will be sent to you directly.")
310+
self.send(f"PRIVMSG {channel} :{help_msg}")
311+
elif message.startswith("!search"):
312+
self.send(f"PRIVMSG {channel} :{sender}: To protect your privacy, please use search commands in a private message.")
313+
314+
def run(self):
315+
"""Main bot loop"""
316+
while True:
317+
try:
318+
if self.connect():
319+
buffer = ""
320+
321+
while True:
322+
try:
323+
buffer += self.irc.recv(2048).decode("UTF-8")
324+
lines = buffer.split("\r\n")
325+
buffer = lines.pop()
326+
327+
for line in lines:
328+
print(line) # Debug output
329+
330+
if line.startswith("PING"):
331+
ping_token = line.split()[1]
332+
self.send(f"PONG {ping_token}")
333+
334+
if "PRIVMSG" in line:
335+
sender = line.split("!")[0][1:]
336+
try:
337+
msg_parts = line.split("PRIVMSG ", 1)[1]
338+
target, message = msg_parts.split(":", 1)
339+
target = target.strip()
340+
message = message.strip()
341+
342+
# Handle private messages differently from channel messages
343+
if target == self.nickname:
344+
self.handle_private_message(sender, message)
345+
elif target in self.channels:
346+
self.handle_channel_message(sender, target, message)
347+
348+
except IndexError:
349+
continue
350+
351+
except UnicodeDecodeError:
352+
buffer = ""
353+
continue
354+
355+
except Exception as e:
356+
print(f"Error in main loop: {str(e)}")
357+
time.sleep(30)
358+
continue
359+
360+
if __name__ == "__main__":
361+
bot = SearchBot()
362+
bot.run()

0 commit comments

Comments
 (0)