-
Notifications
You must be signed in to change notification settings - Fork 9
/
coronaviruspages.py
executable file
·75 lines (63 loc) · 1.92 KB
/
coronaviruspages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python3
#
# This script will find all sites that have valid /coronavirus pages
#
import os
import sys
import requests
if "APIHOST" in os.environ:
apihost = os.environ["APIHOST"]
else:
apihost = "site-scanning.app.cloud.gov"
# use this to specify whether we want sites that were
# redirected to other sites or not. If not specified,
# get both.
samedomain = None
if "-samedomain" in sys.argv:
samedomain = True
if "-notsamedomain" in sys.argv:
samedomain = False
# if this is set, also include sites that redirect all pages
if "-allowredirectall" in sys.argv:
allowredirectall = True
else:
allowredirectall = False
# Specify whether we want ssl or not
# Used to test against http://localhost:8000/, for example.
if "-nossl" in sys.argv:
scheme = "http"
else:
scheme = "https"
# start a session up to make it more efficient to grab pages
session = requests.Session()
# use this to slurp the items on the pages down
def get_pages(url):
first_page = session.get(url + "&page=1").json()
for i in first_page["results"]:
yield i
num_pages = int(first_page["count"] / 100) + 1
for page in range(2, num_pages):
next_page = session.get(url + "&page=" + str(page)).json()
for i in next_page["results"]:
yield i
domains = []
url = (
scheme
+ "://"
+ apihost
+ "/api/v1/scans/pagedata/?page_size=100&data.%2Fcoronavirus.responsecode=200"
)
if samedomain is not None:
if samedomain:
url = url + "&data.%2Fcoronavirus.final_url_in_same_domain=true"
else:
url = url + "&data.%2Fcoronavirus.final_url_in_same_domain=false"
for page in get_pages(url):
if allowredirectall is False:
if page["data"]["/redirecttest-foo-bar-baz"]["responsecode"] != "200":
domains.append(page["domain"])
else:
domains.append(page["domain"])
print("# domains with valid /coronavirus pages")
for i in domains:
print(i)