-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain.py
166 lines (144 loc) · 5.45 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import argparse
import re
import shutil
import subprocess
from os.path import abspath
import chm
from lxml import etree, html
from path import path as Path
SOURCE = Path('complete\\source').abspath()
OUTPUT = Path('complete\\output').abspath()
CHM = chm.DocChm('Qt-5.7.0', default_topic='qtdoc/index.html', title='Qt 5.7.0')
def can_skip_section(section):
for sec in section.findall('section'):
if sec.get('title') not in (
'List of all members',
'Obsolete members'
):
return False
return True
def parse_file_toc(file, parent):
if not file.exists():
return
with open(file, encoding='utf-8') as f:
tree = html.parse(f)
for toc in tree.xpath('//div[@class="toc"]'):
prev_level = None
prev_item = None
stack = [parent]
for li in toc.findall('ul/li'):
level = int(li.get('class')[-1])
# some HTML have 2 as the first level
if prev_level is None:
prev_level = level
a = li.find('a')
title = html.tostring(a, encoding='unicode', method='text').strip()
href = a.get('href')
if href[0] == '#':
href = file.basename() + href
href = OUTPUT.relpathto(file.dirname() / href)
if level > prev_level:
stack.append(prev_item)
elif level < prev_level:
for i in range(level, prev_level):
stack.pop()
item = stack[-1].append(title, href)
prev_level = level
prev_item = item
if file.basename() == 'qtexamplesandtutorials.html':
for multicolumn in tree.xpath('//div[@class="multi-column"]'):
for doccolumn in multicolumn.findall('div[@class="doc-column"]'):
title = html.tostring(doccolumn.find('p'), encoding='unicode', method='text').strip()
doccolumn_toc = parent.append(title, OUTPUT.relpathto(file))
for li in doccolumn.findall('ul/li'):
a = li.find('a')
href = a.get('href')
if href.startswith('http://doc.qt.io'):
continue
title = html.tostring(a, encoding='unicode', method='text').strip()
doccolumn_toc.append(title, OUTPUT.relpathto(file.dirname() / href))
def process_section(elem, parent, module):
for section in elem.findall('section'):
title = section.get('title').strip()
# title could be empty
if title:
href = module.basename() / section.get('ref')
child_toc = parent.append(title, href)
else:
child_toc = parent
if not can_skip_section(section):
process_section(section, child_toc, module)
elif '#' not in href:
parse_file_toc(OUTPUT / href, child_toc)
def process_qhp(file, module):
with open(file, encoding='utf-8'):
tree = etree.parse(file)
toc = tree.xpath('//toc')
if len(toc):
process_section(toc[0], CHM.toc, module)
keywords = tree.xpath('//keywords')
index = CHM.index
if len(keywords):
for keyword in keywords[0].findall('keyword'):
name = keyword.get('name')
# too many topics
if name.startswith('operator') and ' ' not in name:
continue
href = module.basename() / keyword.get('ref')
title = keyword.get('ref')
index.append(name, href, title)
def process_resource(dir, output_dir):
if dir.basename() == 'style':
return
target = output_dir / dir.basename()
if not target.exists():
print('Copying', dir)
shutil.copytree(dir, target)
for file in target.files():
CHM.append(OUTPUT.relpathto(file))
style_re = re.compile(r'<link.*?</script>', re.S)
def process_html(file, output_dir):
global args
target = output_dir / file.basename()
if not target.exists() or args.force:
print('Processing', file)
with open(file, encoding='utf-8') as r, open(target, 'w', encoding='utf-8') as w:
content = r.read()
# remove stylesheet set via javascript
content = style_re.sub('<link rel="stylesheet" type="text/css" href="../style.css" />', content, 1)
# remove empty paragraph after navigation button
content = content.replace('</p><p/>', '</p>')
w.write(content)
CHM.append(OUTPUT.relpathto(target))
def process_module(module):
print(module)
output_dir = OUTPUT / module.basename()
output_dir.mkdir_p()
qhp = None
for file in module.files():
if file.ext == '.html':
process_html(file, output_dir)
elif file.ext == '.qhp':
qhp = file
if qhp:
process_qhp(qhp, module)
for dir in module.dirs():
process_resource(dir, output_dir)
def main():
# put qtdoc first
process_module(SOURCE / 'qtdoc')
for module in SOURCE.dirs():
if module.basename() != 'qtdoc':
process_module(module)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--force', action='store_true', help='Force process HTML')
args = parser.parse_args()
OUTPUT.mkdir_p()
main()
ostyle = OUTPUT / 'style.css'
if not ostyle.exists():
subprocess.call(['cmd.exe', 'mklink', ostyle, abspath('style.css')])
CHM.append('style.css')
with OUTPUT:
CHM.save()