|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +MansionNet SearchBot |
| 4 | +An IRC bot that provides private search functionality using the Hearch API |
| 5 | +with privacy protection and rate limiting. |
| 6 | +""" |
| 7 | + |
| 8 | +import socket |
| 9 | +import ssl |
| 10 | +import time |
| 11 | +import json |
| 12 | +import base64 |
| 13 | +import requests |
| 14 | +from datetime import datetime, timedelta |
| 15 | +from collections import defaultdict, deque |
| 16 | +from typing import Dict, List, Optional |
| 17 | + |
| 18 | +class RateLimiter: |
| 19 | + def __init__(self, requests_per_minute: int, requests_per_day: int): |
| 20 | + self.requests_per_minute = requests_per_minute |
| 21 | + self.requests_per_day = requests_per_day |
| 22 | + self.minute_window = deque() |
| 23 | + self.day_window = deque() |
| 24 | + |
| 25 | + def can_make_request(self) -> bool: |
| 26 | + now = datetime.now() |
| 27 | + while self.minute_window and self.minute_window[0] < now - timedelta(minutes=1): |
| 28 | + self.minute_window.popleft() |
| 29 | + while self.day_window and self.day_window[0] < now - timedelta(days=1): |
| 30 | + self.day_window.popleft() |
| 31 | + |
| 32 | + return (len(self.minute_window) < self.requests_per_minute and |
| 33 | + len(self.day_window) < self.requests_per_day) |
| 34 | + |
| 35 | + def add_request(self): |
| 36 | + now = datetime.now() |
| 37 | + self.minute_window.append(now) |
| 38 | + self.day_window.append(now) |
| 39 | + |
| 40 | +class SearchBot: |
| 41 | + def __init__(self): |
| 42 | + # IRC Configuration |
| 43 | + self.server = "irc.example.com" |
| 44 | + self.port = 6697 # SSL port |
| 45 | + self.nickname = "SearchBot" |
| 46 | + self.channels = ["#test_room"] |
| 47 | + |
| 48 | + # Rate Limiting - more conservative than MistralBot |
| 49 | + self.rate_limiter = RateLimiter( |
| 50 | + requests_per_minute=5, |
| 51 | + requests_per_day=500 |
| 52 | + ) |
| 53 | + |
| 54 | + # Store ongoing private searches |
| 55 | + self.active_searches = {} |
| 56 | + |
| 57 | + # SSL Configuration |
| 58 | + self.ssl_context = ssl.create_default_context() |
| 59 | + self.ssl_context.check_hostname = False |
| 60 | + self.ssl_context.verify_mode = ssl.CERT_NONE |
| 61 | + |
| 62 | + def connect(self) -> bool: |
| 63 | + """Establish connection to the IRC server""" |
| 64 | + try: |
| 65 | + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
| 66 | + self.irc = self.ssl_context.wrap_socket(sock) |
| 67 | + |
| 68 | + print(f"Connecting to {self.server}:{self.port}...") |
| 69 | + self.irc.connect((self.server, self.port)) |
| 70 | + |
| 71 | + self.send(f"NICK {self.nickname}") |
| 72 | + self.send(f"USER {self.nickname} 0 * :MansionNet Search Bot") |
| 73 | + |
| 74 | + buffer = "" |
| 75 | + while True: |
| 76 | + temp = self.irc.recv(2048).decode("UTF-8") |
| 77 | + buffer += temp |
| 78 | + |
| 79 | + if "PING" in buffer: |
| 80 | + ping_token = buffer[buffer.find("PING"):].split()[1] |
| 81 | + self.send(f"PONG {ping_token}") |
| 82 | + |
| 83 | + if "001" in buffer: # RPL_WELCOME |
| 84 | + for channel in self.channels: |
| 85 | + self.send(f"JOIN {channel}") |
| 86 | + time.sleep(1) |
| 87 | + return True |
| 88 | + |
| 89 | + if "Closing Link" in buffer or "ERROR" in buffer: |
| 90 | + return False |
| 91 | + |
| 92 | + except Exception as e: |
| 93 | + print(f"Connection error: {str(e)}") |
| 94 | + return False |
| 95 | + |
| 96 | + def send(self, message: str): |
| 97 | + """Send a raw message to the IRC server""" |
| 98 | + try: |
| 99 | + self.irc.send(bytes(f"{message}\r\n", "UTF-8")) |
| 100 | + print(f"Sent: {message}") |
| 101 | + except Exception as e: |
| 102 | + print(f"Error sending message: {str(e)}") |
| 103 | + |
| 104 | + def send_private_message(self, target: str, message: str): |
| 105 | + """Send a private message to a user""" |
| 106 | + try: |
| 107 | + # Split long messages to avoid truncation |
| 108 | + max_length = 400 # IRC message length limit with safety margin |
| 109 | + |
| 110 | + while message: |
| 111 | + if len(message) <= max_length: |
| 112 | + self.send(f"PRIVMSG {target} :{message}") |
| 113 | + break |
| 114 | + |
| 115 | + # Find a good breaking point |
| 116 | + split_point = message[:max_length].rfind(' ') |
| 117 | + if split_point == -1: |
| 118 | + split_point = max_length |
| 119 | + |
| 120 | + self.send(f"PRIVMSG {target} :{message[:split_point]}") |
| 121 | + message = message[split_point:].lstrip() |
| 122 | + time.sleep(0.5) # Avoid flooding |
| 123 | + |
| 124 | + except Exception as e: |
| 125 | + print(f"Error sending private message: {str(e)}") |
| 126 | + self.send(f"PRIVMSG {target} :Error: Message delivery failed.") |
| 127 | + |
| 128 | + def search_hearch(self, query: str) -> List[Dict]: |
| 129 | + """Perform a search using the Hearch API""" |
| 130 | + try: |
| 131 | + # Match exactly the config from the network request |
| 132 | + config = { |
| 133 | + "engines": { |
| 134 | + "bing": {"enabled": True, "required": False, "requiredbyorigin": True, "preferred": False, "preferredbyorigin": False}, |
| 135 | + "brave": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": True, "preferredbyorigin": False}, |
| 136 | + "duckduckgo": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": False, "preferredbyorigin": False}, |
| 137 | + "etools": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": False, "preferredbyorigin": False}, |
| 138 | + "google": {"enabled": True, "required": False, "requiredbyorigin": True, "preferred": False, "preferredbyorigin": False}, |
| 139 | + "mojeek": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": True, "preferredbyorigin": False}, |
| 140 | + "presearch": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": False, "preferredbyorigin": False}, |
| 141 | + "qwant": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": False, "preferredbyorigin": False}, |
| 142 | + "startpage": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": False, "preferredbyorigin": False}, |
| 143 | + "swisscows": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": False, "preferredbyorigin": False}, |
| 144 | + "yahoo": {"enabled": True, "required": False, "requiredbyorigin": False, "preferred": False, "preferredbyorigin": False} |
| 145 | + }, |
| 146 | + "ranking": { |
| 147 | + "rankexp": 0.5, |
| 148 | + "rankmul": 1, |
| 149 | + "rankconst": 0, |
| 150 | + "rankscoremul": 1, |
| 151 | + "rankscoreadd": 0, |
| 152 | + "timesreturnedmul": 1, |
| 153 | + "timesreturnedadd": 0, |
| 154 | + "timesreturnedscoremul": 1, |
| 155 | + "timesreturnedscoreadd": 0, |
| 156 | + "engines": { |
| 157 | + "bing": {"mul": 1.5, "add": 0}, |
| 158 | + "brave": {"mul": 1, "add": 0}, |
| 159 | + "duckduckgo": {"mul": 1.25, "add": 0}, |
| 160 | + "etools": {"mul": 1, "add": 0}, |
| 161 | + "google": {"mul": 1.5, "add": 0}, |
| 162 | + "mojeek": {"mul": 1, "add": 0}, |
| 163 | + "presearch": {"mul": 1.1, "add": 0}, |
| 164 | + "qwant": {"mul": 1.1, "add": 0}, |
| 165 | + "startpage": {"mul": 1.25, "add": 0}, |
| 166 | + "swisscows": {"mul": 1, "add": 0}, |
| 167 | + "yahoo": {"mul": 1.1, "add": 0} |
| 168 | + } |
| 169 | + }, |
| 170 | + "timings": { |
| 171 | + "preferredtimeout": "500", |
| 172 | + "hardtimeout": "1500" |
| 173 | + } |
| 174 | + } |
| 175 | + |
| 176 | + # Base64 encode the config |
| 177 | + config_b64 = base64.b64encode(json.dumps(config).encode()).decode() |
| 178 | + |
| 179 | + # Build the URL with correct query parameters |
| 180 | + params = { |
| 181 | + 'category': config_b64, |
| 182 | + 'pages': '1', |
| 183 | + 'q': query, |
| 184 | + 'start': '1' |
| 185 | + } |
| 186 | + |
| 187 | + url = 'https://api.hearch.co/search/web' |
| 188 | + |
| 189 | + headers = { |
| 190 | + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', |
| 191 | + 'Accept': 'application/json', |
| 192 | + 'Origin': 'https://hearch.co', |
| 193 | + 'Referer': 'https://hearch.co/' |
| 194 | + } |
| 195 | + |
| 196 | + print(f"\nDebug - Making request to: {url}") |
| 197 | + print(f"Debug - Query: {query}") |
| 198 | + print(f"Debug - Full URL with params: {url}?{'&'.join(f'{k}={v}' for k, v in params.items())}") |
| 199 | + |
| 200 | + response = requests.get(url, params=params, headers=headers, timeout=10) |
| 201 | + |
| 202 | + print(f"Debug - Response status: {response.status_code}") |
| 203 | + print(f"Debug - Response headers: {dict(response.headers)}") |
| 204 | + print(f"Debug - Response content: {response.text[:500]}") |
| 205 | + |
| 206 | + if response.status_code == 200: |
| 207 | + data = response.json() |
| 208 | + results = data.get('results', []) |
| 209 | + print(f"Debug - Found {len(results)} results") |
| 210 | + return results[:5] |
| 211 | + else: |
| 212 | + print(f"API Error: {response.status_code} - {response.text}") |
| 213 | + return [] |
| 214 | + |
| 215 | + except Exception as e: |
| 216 | + print(f"Search error: {str(e)}") |
| 217 | + return [] |
| 218 | + |
| 219 | + except Exception as e: |
| 220 | + print(f"Search error: {str(e)}") |
| 221 | + return [] |
| 222 | + |
| 223 | + def format_search_result(self, index: int, result: dict) -> str: |
| 224 | + """Format a single search result with IRC colors""" |
| 225 | + # IRC color codes |
| 226 | + BOLD = '\x02' # Bold text |
| 227 | + COLOR = '\x03' # Color indicator |
| 228 | + RESET = '\x0F' # Reset formatting |
| 229 | + BLUE = '12' # Blue for URLs |
| 230 | + GREEN = '03' # Green for titles |
| 231 | + GRAY = '14' # Gray for descriptions |
| 232 | + |
| 233 | + title = result.get('title', 'No title').strip() |
| 234 | + url = result.get('url', 'No URL').strip() |
| 235 | + desc = result.get('description', '').strip() |
| 236 | + |
| 237 | + # Clean up description (remove duplicate URLs and unnecessary text) |
| 238 | + desc = desc.replace(url, '') |
| 239 | + desc = ' '.join(desc.split()) # Normalize whitespace |
| 240 | + |
| 241 | + # Keep lengths reasonable but longer than before |
| 242 | + if len(title) > 100: |
| 243 | + title = title[:97] + "..." |
| 244 | + if len(url) > 100: |
| 245 | + url = url[:97] + "..." |
| 246 | + if len(desc) > 200: # Allow longer descriptions |
| 247 | + desc = desc[:197] + "..." |
| 248 | + |
| 249 | + # Format result with colors |
| 250 | + result_line = ( |
| 251 | + f"{index}. {COLOR}{GREEN}{title}{RESET} | " # Green title |
| 252 | + f"{COLOR}{BLUE}{url}{RESET}" # Blue URL |
| 253 | + ) |
| 254 | + |
| 255 | + if desc and len(desc) > 20: # Only add if description is meaningful |
| 256 | + result_line += f" | {COLOR}{GRAY}{desc}{RESET}" # Gray description |
| 257 | + |
| 258 | + return result_line |
| 259 | + |
| 260 | + def handle_private_message(self, sender: str, message: str): |
| 261 | + """Handle private messages and search commands""" |
| 262 | + try: |
| 263 | + if message.startswith("!search "): |
| 264 | + if not self.rate_limiter.can_make_request(): |
| 265 | + self.send_private_message(sender, "Rate limit exceeded. Please try again later.") |
| 266 | + return |
| 267 | + |
| 268 | + query = message[8:].strip() |
| 269 | + if not query: |
| 270 | + self.send_private_message(sender, "Usage: !search <query>") |
| 271 | + return |
| 272 | + |
| 273 | + # Perform search and send results privately |
| 274 | + results = self.search_hearch(query) |
| 275 | + self.rate_limiter.add_request() |
| 276 | + |
| 277 | + if not results: |
| 278 | + self.send_private_message(sender, "No results found.") |
| 279 | + return |
| 280 | + |
| 281 | + # Send each result as a separate message |
| 282 | + for i, result in enumerate(results[:5], 1): |
| 283 | + formatted_result = self.format_search_result(i, result) |
| 284 | + self.send_private_message(sender, formatted_result) |
| 285 | + time.sleep(0.5) # Small delay between messages to prevent flooding |
| 286 | + |
| 287 | + # Add attribution message |
| 288 | + GRAY = '\x0314' # IRC color code for gray |
| 289 | + BLUE = '\x0312' # IRC color code for blue |
| 290 | + RESET = '\x0F' # Reset formatting |
| 291 | + attribution = f"{GRAY}Search results powered by {BLUE}https://hearch.co/{GRAY} - Privacy-focused metasearch{RESET}" |
| 292 | + time.sleep(0.5) # Small delay before attribution |
| 293 | + self.send_private_message(sender, attribution) |
| 294 | + |
| 295 | + elif message == "!help": |
| 296 | + help_msg = ("SearchBot Commands: " |
| 297 | + "!search <query> - Search the web privately (results sent via PM) | " |
| 298 | + "!help - Show this help message") |
| 299 | + self.send_private_message(sender, help_msg) |
| 300 | + |
| 301 | + except Exception as e: |
| 302 | + print(f"Error handling private message: {str(e)}") |
| 303 | + self.send_private_message(sender, "An error occurred processing your request.") |
| 304 | + |
| 305 | + def handle_channel_message(self, sender: str, channel: str, message: str): |
| 306 | + """Handle channel messages""" |
| 307 | + if message == "!help": |
| 308 | + help_msg = ("SearchBot: Use !search <query> in a private message to search privately. " |
| 309 | + "Results will be sent to you directly.") |
| 310 | + self.send(f"PRIVMSG {channel} :{help_msg}") |
| 311 | + elif message.startswith("!search"): |
| 312 | + self.send(f"PRIVMSG {channel} :{sender}: To protect your privacy, please use search commands in a private message.") |
| 313 | + |
| 314 | + def run(self): |
| 315 | + """Main bot loop""" |
| 316 | + while True: |
| 317 | + try: |
| 318 | + if self.connect(): |
| 319 | + buffer = "" |
| 320 | + |
| 321 | + while True: |
| 322 | + try: |
| 323 | + buffer += self.irc.recv(2048).decode("UTF-8") |
| 324 | + lines = buffer.split("\r\n") |
| 325 | + buffer = lines.pop() |
| 326 | + |
| 327 | + for line in lines: |
| 328 | + print(line) # Debug output |
| 329 | + |
| 330 | + if line.startswith("PING"): |
| 331 | + ping_token = line.split()[1] |
| 332 | + self.send(f"PONG {ping_token}") |
| 333 | + |
| 334 | + if "PRIVMSG" in line: |
| 335 | + sender = line.split("!")[0][1:] |
| 336 | + try: |
| 337 | + msg_parts = line.split("PRIVMSG ", 1)[1] |
| 338 | + target, message = msg_parts.split(":", 1) |
| 339 | + target = target.strip() |
| 340 | + message = message.strip() |
| 341 | + |
| 342 | + # Handle private messages differently from channel messages |
| 343 | + if target == self.nickname: |
| 344 | + self.handle_private_message(sender, message) |
| 345 | + elif target in self.channels: |
| 346 | + self.handle_channel_message(sender, target, message) |
| 347 | + |
| 348 | + except IndexError: |
| 349 | + continue |
| 350 | + |
| 351 | + except UnicodeDecodeError: |
| 352 | + buffer = "" |
| 353 | + continue |
| 354 | + |
| 355 | + except Exception as e: |
| 356 | + print(f"Error in main loop: {str(e)}") |
| 357 | + time.sleep(30) |
| 358 | + continue |
| 359 | + |
| 360 | +if __name__ == "__main__": |
| 361 | + bot = SearchBot() |
| 362 | + bot.run() |
0 commit comments