forked from searxng/searx-space
-
Notifications
You must be signed in to change notification settings - Fork 0
/
basic.py
120 lines (101 loc) · 4.49 KB
/
basic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# pylint: disable=invalid-name
import re
import concurrent.futures
from searxstats.model import SearxStatisticsResult
from searxstats.common.foreach import for_each
from searxstats.common.utils import dict_merge
from searxstats.common.http import new_client, get, get_host, get_network_type, NetworkType
from searxstats.common.ssl_info import get_ssl_info
from searxstats.common.memoize import MemoizeToDisk
from searxstats.common.response_time import ResponseTimeStats
from searxstats.config import DEFAULT_HEADERS
# in a HTML page produced by searx, regex to find the searx version
SEARX_VERSION_RE = r'<meta name=[\"]?generator[\"]? content="searx/([^\"]+)">'
async def get_searx_version(response):
results = re.findall(SEARX_VERSION_RE, response.text)
if len(results) > 0 and len(results[0]) > 0:
return results[0]
else:
return None
@MemoizeToDisk(expire_time=3600)
async def fetch_one(instance_url: str, private: bool) -> dict:
detail = dict()
# no cookie ( cookies=DEFAULT_COOKIES, )
network_type = get_network_type(instance_url)
detail = {
'network_type': network_type.name.lower(),
'http': {
},
'version': None,
}
try:
async with new_client(network_type=network_type) as session:
response, error = await get(session, instance_url,
headers=DEFAULT_HEADERS, timeout=10)
status_code = response.status_code if response is not None else None
detail['http'] = {
'status_code': status_code,
'error': error,
}
if response is not None:
detail['version'] = await get_searx_version(response)
detail['timing'] = {}
response_time_stats = ResponseTimeStats()
response_time_stats.add_response(response)
detail['timing']['initial'] = response_time_stats.get()
response_url = str(response.url)
# add trailing slash
if not response_url.endswith('/'):
response_url = response_url + '/'
# redirect
if 'alternativeUrls' not in detail:
detail['alternativeUrls'] = dict()
if response_url != instance_url:
detail['alternativeUrls'][instance_url] = 'redirect from'
instance_url = response_url
except concurrent.futures.TimeoutError:
# This exception occurs on new_client()
error = 'Timeout error'
if (detail['version'] is not None or private) and network_type == NetworkType.NORMAL:
detail['tls'] = get_ssl_info(get_host(instance_url))
if error is not None:
detail['http']['error'] = error
detail['error'] = error
return instance_url, detail
async def fetch_one_display(url: str, private: bool) -> dict:
# basic checks
url, detail = await fetch_one(url, private)
# output
error = detail['http']['error'] or ''
http_status_code = detail['http'].get('status_code', '') or ''
searx_version = detail.get('version', '') or ''
timing = detail.get('timing', {}).get('initial', {}).get('all', {}).get('value', None)
cert_orgname = detail.get('tls', {}).get('certificate', {}).get('issuer', {}).get('organizationName', '')
if error != '':
icon = '❌'
elif searx_version == '':
icon = '👽'
else:
icon = '🍰'
if timing:
timing = '{:.3f}'.format(timing)
else:
timing = ' '
print('{0:3} {1} {2:20} {3} {4:60} {5:30} {6:50}'.
format(http_status_code, icon, searx_version, timing, url, cert_orgname, error))
return url, detail
async def fetch(searx_stats_result: SearxStatisticsResult):
url_to_deleted = []
async def fetch_and_set_async(url: str, detail, *_, **__):
if 'version' not in detail:
r_url, r_detail = await fetch_one_display(url, searx_stats_result.private)
dict_merge(r_detail, detail)
if r_url != url:
# another r_url will never be url (the variable)
# since r_url is the result of following HTTP redirect
url_to_deleted.append(url)
searx_stats_result.update_instance(r_url, r_detail)
instance_iterator = searx_stats_result.iter_instances(only_valid=False, valid_or_private=False)
await for_each(instance_iterator, fetch_and_set_async, limit=1)
for url in url_to_deleted:
del searx_stats_result.instances[url]