Skip to content

Commit

Permalink
Merge pull request #2 from gnh1201/serp
Browse files Browse the repository at this point in the history
change SERP API server
  • Loading branch information
AkiaCode authored Oct 19, 2024
2 parents 0adfee5 + 95a7530 commit e7ff4fc
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 16 deletions.
43 changes: 37 additions & 6 deletions alwaysonline.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# Created at: 2024-07-31
# Updated at: 2024-10-19
#
import re
import socket
import ssl
import requests
Expand All @@ -24,6 +25,7 @@
client_encoding = config("CLIENT_ENCODING")
es_host = config("ES_HOST")
es_index = config("ES_INDEX")
librey_url = config("LIBREY_URL", default="https://serp.catswords.net")
except Exception as e:
logger.error("[*] Invalid configuration", exc_info=e)

Expand Down Expand Up @@ -111,6 +113,30 @@ def fetch_origin_server(url: str):
return 502, str(e).encode(client_encoding)


def query_to_serp(url: str):
try:
# Process both removal of http:// or https:// and replacement of special characters at once
# ^https?:\/\/ removes http:// or https://, [^\w\s] removes special characters
q = re.sub(r'^https?:\/\/|[^\w\s]', ' ', url)

url = "%s/api.php?q=%s" % (librey_url, q)
response = requests.get(url)
if response.status_code != 200:
return response.status_code, f"SERP API server returned status code {response.status_code}".encode(client_encoding)

return 200, response.content
except Exception as e:
return 502, f"Error querying SERP API: {str(e)}".encode(client_encoding)


def query_to_llm(text: str):
try:
# todo
return 502, ""
except Exception as e:
return 502, str(e).encode(client_encoding)


class AlwaysOnline(Extension):
def __init__(self):
self.type = "connector" # this is a connector
Expand All @@ -123,7 +149,6 @@ def connect(self, conn: socket.socket, data: bytes, webserver: bytes, port: byte
connected = False

is_ssl = scheme in [b"https", b"tls", b"ssl"]
cache_hit = 0
buffered = b""

def sendall(_sock: socket.socket, _conn: socket.socket, _data: bytes):
Expand Down Expand Up @@ -156,21 +181,27 @@ def sendall(_sock: socket.socket, _conn: socket.socket, _data: bytes):
status_code, content = fetch_cache_from_elasticsearch(target_url)
if status_code == 200:
buffered += content
cache_hit += 1
connected = True

if not connected:
logger.info("Trying get data from Wayback Machine...")
status_code, content = fetch_cache_from_internet_archive(target_url)
if status_code == 200:
buffered += content
cache_hit += 1
connected = True

if cache_hit == 0:
if not connected:
status_code, content = fetch_origin_server(target_url)
buffered += content
push_cache_to_elasticsearch(target_url, buffered)
if status_code == 200:
buffered += content
push_cache_to_elasticsearch(target_url, buffered)
connected = True

if not connected:
status_code, content = query_to_serp(target_url)
if status_code == 200:
buffered += content
connected = True

conn.send(buffered)
else:
Expand Down
21 changes: 11 additions & 10 deletions fediverse.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
# Caterpillar Proxy - The simple and parasitic web proxy with SPAM filter (formerly, php-httpproxy)
# Namyheon Go (Catswords Research) <abuse@catswords.net>
# https://github.com/gnh1201/caterpillar
# https://github.com/gnh1201/caterpillar/wiki/Fediverse
# https://catswords-oss.rdbl.io/1155378128/3821602484
#
# Created in: 2022-10-06
# Updated in: 2024-10-08
# Updated in: 2024-10-19
#
import base64
import hashlib
Expand All @@ -26,16 +26,17 @@

logger = Logger(name="fediverse", level=logging.WARNING)

# Read this: https://catswords-oss.rdbl.io/1155378128/3821602484
try:
client_encoding = config("CLIENT_ENCODING", default="utf-8")
truecaptcha_userid = config("TRUECAPTCHA_USERID") # truecaptcha.org
truecaptcha_apikey = config("TRUECAPTCHA_APIKEY") # truecaptcha.org
truecaptcha_userid = config("TRUECAPTCHA_USERID")
truecaptcha_apikey = config("TRUECAPTCHA_APIKEY")
dictionary_file = config(
"DICTIONARY_FILE", default="words_alpha.txt"
) # https://github.com/dwyl/english-words
librey_apiurl = config(
"LIBREY_APIURL", default="https://serp.catswords.net"
) # https://github.com/Ahwxorg/librey
)
librey_url = config(
"LIBREY_URL", default="https://serp.catswords.net"
)
bad_domain = config("BAD_DOMAIN", default="")
except Exception as e:
logger.error("[*] Invalid configuration", exc_info=e)
Expand Down Expand Up @@ -115,7 +116,7 @@ def vowel_ratio_test(s):
strategies.append("KnownWords4")

# check ID with SearchEngine3 strategy
if librey_apiurl != "" and all(map(self.search_engine_test, matches)):
if librey_url != "" and all(map(self.search_engine_test, matches)):
score += 1
strategies.append("SearchEngine3")

Expand Down Expand Up @@ -298,7 +299,7 @@ def is_known_word(s):

# Strategy: SearchEngine3
def search_engine_test(self, s):
url = "%s/api.php?q=%s" % (librey_apiurl, s)
url = "%s/api.php?q=%s" % (librey_url, s)
response = requests.get(url, verify=False)
if response.status_code != 200:
return False
Expand Down

0 comments on commit e7ff4fc

Please sign in to comment.