-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathharvest.py
84 lines (71 loc) · 2.34 KB
/
harvest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import datetime
import re
import sys
import requests
import json
QUERY_URL = "https://api.github.com/search/repositories?q=neovim"
SECRET_KEY = ""
with open("secretkey", "r") as reader:
SECRET_KEY = reader.read().strip()
HEADERS = {
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {SECRET_KEY}",
"X-GitHub-Api-Version": "2022-11-28",
}
def get_total():
total_query = requests.get(QUERY_URL, headers=HEADERS).json()
return total_query["total_count"]
def get_page(url):
results = requests.get(url, headers=HEADERS).json()
try:
return results["items"]
except Exception:
return {}
result = []
pages = get_total() / 100
rounded = int(pages)
if rounded < pages:
rounded += 1
for i in range(0, rounded):
url = f"{QUERY_URL}&page={i}&per_page=100"
page_result = get_page(url)
if page_result == {}:
break
for item in page_result:
if item["private"]:
continue
if item["archived"]:
continue
if item["disabled"]:
continue
repo = {}
repo["full_name"] = item["full_name"]
# "watchers_count" is unreliable...
for key in ["name", "html_url", "description", "updated_at",
"forks_count", "language", "stargazers_count", "created_at"]:
data = item[key]
if not data:
data = ""
repo.update({key: data})
if item["license"]:
repo.update({"license_name": item["license"]["name"]})
result.append(repo)
print(url, file=sys.stderr)
with open("data.json", "w") as writer:
writer.write(json.dumps(result, indent=4))
for count_type in ["stargazers_count", "forks_count", "created_at"]:
result = sorted(result, key=lambda x: str(x[(
count_type)]), reverse=True)
for i, r in enumerate(result):
r.update({(count_type + "_sort"): i})
p = f"{count_type}.json"
p = p.replace("_count", "")
with open(p, "w") as writer:
writer.write(json.dumps(result[0:99], indent=4))
with open("index.html", "r") as reader:
index = reader.read()
regex = re.compile(r'<span> :: Update.*</span>')
date = str(datetime.datetime.now()).split(" ")[0]
index = re.sub(regex, f"<span> :: Update {date}</span>", index)
with open("index.html", "w") as writer:
writer.write(index)