-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_meta_data.py
96 lines (75 loc) · 2.48 KB
/
generate_meta_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import os
import json
## Arguments
NOTEBOOK_DIR = ""
DIR_STATIC = "files"
MODULE_ID = 93400
TAGS = "DB"
COMMON_CODE_URL = "https://github.com/as641651/DjangoAPI-TravisCI-submodule"
AUTHOR = "Aravind Sankaran"
# setting this to false will over-write code_urls.txt and your changes will be lost
CODE_URLS_REUSE = False
MODULE_NAME = os.getcwd().split('/')[-1]
# Relative path. leave it blank if notebook files are in current dir
NOTEBOOK_DIR = os.path.join(os.getcwd(),NOTEBOOK_DIR)
configs = {}
configs["dir_path"] = NOTEBOOK_DIR
configs["dir_static"] = DIR_STATIC
configs["module_name"] = MODULE_NAME
configs["module_id"] = MODULE_ID
configs["author"] = AUTHOR
configs["tags"] = TAGS
def get_list_all_files_name(dir_path):
all_files_path = []
dnt = []
if os.path.exists(os.path.join(dir_path, "DoNotTrack")):
dnt = open(os.path.join(dir_path, "DoNotTrack"), 'r').read().split()
# print(dnt)
for f in os.listdir(dir_path):
if os.path.isfile(os.path.join(dir_path, f)):
if f.endswith('.ipynb') or f.endswith('.pdf'):
if f not in dnt:
all_files_path.append(os.path.join(f))
return all_files_path
def parse_code_urls():
with open('code_urls.txt', 'r') as f:
while(True):
x = f.readline().strip()
if x is "":
return
y = f.readline().strip()
configs[x]['code_url'] = y
f.readline()
file_list = get_list_all_files_name(NOTEBOOK_DIR)
file_list.sort()
configs['file_list'] = file_list
for f in file_list:
configs[f] = {}
page_id = 1
code_urls_txt = ""
for f in file_list:
page_name = f.split('.ipynb')[0]
pdf = False
if ".pdf" in page_name:
pdf = True
page_name = page_name.split(".pdf")[0]
code_url = ""
if os.path.exists(os.path.join(os.getcwd(),page_name)):
code_url = "https://github.com/as641651/" + MODULE_NAME + "/tree/master/" + page_name
if COMMON_CODE_URL != "":
code_url = COMMON_CODE_URL
configs[f]["code_url"] = code_url
configs[f]["page_name"] = page_name
configs[f]["pdf"] = pdf
configs[f]["page_id"] = MODULE_ID + page_id
page_id = page_id+1
code_urls_txt += f + "\n" + code_url + "\n\n"
# overwrite code urls from cache
if CODE_URLS_REUSE:
parse_code_urls()
else:
with open('code_urls.txt', 'w') as f:
f.write(code_urls_txt)
with open('configs.json', 'w') as outfile:
json.dump(configs, outfile, indent=4, sort_keys=True)
print(configs)