forked from hunkim/DeepLearningStars
-
Notifications
You must be signed in to change notification settings - Fork 0
/
list2md.multiprocess.py
137 lines (95 loc) · 2.99 KB
/
list2md.multiprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
"""
Multiprocess version of list2md.py
Examples
----------
$ python list2md.multiprocess.py
"""
import requests
import time
import config
from multiprocessing.pool import Pool
def write_md(dict_list, filepath="README.md"):
"""Given a list of dict, write a markdown file
Parameters
----------
dict_list : list
Each element is an dictionary of
{"name": "Tensorflow",
"url": "https://github.com/tensorflow/tensorflow",
"stars": 55359,
"description": "Computation using data flow graph ..."}
filepath : str
Readme path
Returns
----------
bool
Returns True If everything went smooth
"""
HEAD = """# Top Deep Learning Projects
A list of popular github projects related to deep learning (ranked by stars automatically).
Please update list.txt (via pull requests)
|Project Name| Stars | Description |
| ---------- |:-----:| ----------- |
"""
TAIL = f"""
Last Automatic Update: {time.strftime("%c")}
Inspired by https://github.com/aymericdamien/TopDeepLearning
"""
# sort descending by n_stars
dict_list = sorted(dict_list, key=lambda x: x['stars'], reverse=True)
# each data is a string (see `dict2md`)
data_list = list(map(dict2md, dict_list))
with open(filepath, 'w') as out:
out.write(HEAD)
out.write("\n".join(data_list))
out.write(TAIL)
return True
def dict2md(dict_):
"""Convert a dictionary to a markdown format"""
return "| [{name}]({url}) | {stars} | {description} |".format(**dict_)
def get_url_list(filepath="list.txt"):
"""Read list.txt and returns a list of API urls"""
def preprocess_url(url):
"""Returns an API url"""
return "https://api.github.com/repos/" + url[19:].strip().strip("/")
with open(filepath, 'r') as f:
data = f.readlines()
return list(map(preprocess_url, data))
def grab_data(url):
"""Go to the URL and grab a data
Parameters
----------
url : str
URL to a github repo
Returns
----------
dict
dict_keys(['name',
'description',
'forks',
'created',
'updated',
'url',
'stars'])
"""
params = {
"access_token": config.ACCESS_TOKEN
}
try:
data_dict = requests.get(url, params=params).json()
return {'name': data_dict['name'],
'description': data_dict['description'],
'forks': data_dict['forks_count'],
'created': data_dict['created_at'],
'updated': data_dict['updated_at'],
'url': data_dict['html_url'],
'stars': data_dict['stargazers_count']}
except KeyError:
raise Exception(f"{data_dict}")
def main():
url_list = get_url_list()
pool = Pool(processes=config.N_WORKERS)
result = pool.map_async(grab_data, url_list)
write_md(result.get())
if __name__ == '__main__':
main()