-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathepub_maker.py
182 lines (147 loc) · 6.53 KB
/
epub_maker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import os
import re
import shutil
import zipfile
import random
import requests
import requests_cache
requests_cache.install_cache('img_cache')
def rewrite_file(file_path):
f = open(file_path, 'r', encoding='utf-8')
file_content = f.read()
f.close()
f = open(file_path, 'w', encoding='utf-8')
return file_content, f
def copy_file(src_path, dst_path):
with open(src_path, 'r', encoding='utf-8') as f:
content = f.read()
return content, open(dst_path, 'w', encoding='utf-8')
def get_random_hex(length):
hex_str = '0123456789abcdef'
return ''.join([random.choice(hex_str) for _ in range(length)])
src_re = re.compile(r'src="([^"]+)"')
def get_src_from_text(text):
src = src_re.findall(text)
return src[0] if src else None
play_order_re = re.compile(r'playOrder="([^"]+)"')
def get_all_play_order(text):
return play_order_re.findall(text)
class EpubMaker:
work_dir = ''
template_dir = 'EpubTemplate/'
def __init__(self, work_dir, title, author, push_date=None, source=None, enable_download_img=True):
self.work_dir = work_dir
self.title = title
self.author = author
self.push_date = push_date
self.source = source
self.play_order = 0
self.enable_download_img = enable_download_img
self.make_work_dir()
def make_work_dir(self):
if os.path.exists(self.work_dir):
self.init_play_order()
return
shutil.copytree(self.template_dir, self.work_dir)
self.init_coverpage()
self.init_fb_ncx()
self.init_fb_opf()
self.init_play_order()
def zip_dir_to_epub(self, path, file_name):
zip_handler = zipfile.ZipFile('result/{name}.epub'.format(name=file_name), 'w', zipfile.ZIP_DEFLATED)
for root, dirs, files in os.walk(path):
for file in files:
zip_handler.write(os.path.join(root, file), os.path.join(root.replace(self.work_dir, ''), file))
def init_coverpage(self):
file_content, f = rewrite_file(self.work_dir + 'OPS/coverpage.html')
file_content = file_content.replace('{Title}', self.title)
file_content = file_content.replace('{Author}', '作者:%s' % self.author)
file_content = file_content.replace('{PushTime}', '发表时间:%s' % self.push_date)
file_content = file_content.replace('{Source}', self.source)
f.write(file_content)
f.close()
def init_fb_ncx(self):
fc, f = rewrite_file(self.work_dir + 'OPS/fb.ncx')
# 51037e82-03ff-11dd-9fbb-0018f369440e
uid = '%s-%s-%s-%s-%s' % \
(get_random_hex(8), get_random_hex(4), get_random_hex(4), get_random_hex(4), get_random_hex(12))
fc = fc.replace('{Uid}', uid)
fc = fc.replace('{Title}', self.title)
fc = fc.replace('{Author}', self.author)
f.write(fc)
f.close()
def init_fb_opf(self):
fc, f = rewrite_file(self.work_dir + 'OPS/fb.opf')
fc = fc.replace('{Title}', self.title)
fc = fc.replace('{Author}', self.author)
fc = fc.replace('{PushDate}', self.push_date)
fc = fc.replace('{Source}', self.source)
f.write(fc)
f.close()
def init_play_order(self):
with open(self.work_dir + 'OPS/fb.ncx', encoding='utf-8') as f:
content = f.read()
po_list = get_all_play_order(content)
self.play_order = max([int(po) for po in po_list])
def add_chapter(self, chapter_name, content, file_name=None):
if not file_name:
file_name = self.play_order
is_need_update_index = \
not os.path.exists(self.work_dir + 'OPS/{file_name}.html'.format(file_name=file_name))
fc, f = copy_file(self.template_dir + 'OPS/chapter.html',
self.work_dir + 'OPS/{file_name}.html'.format(file_name=file_name))
body_html = ''
p_content = ''
lines = content.split('\n')
for line in lines:
if self.enable_download_img and line.startswith('<img'):
src = get_src_from_text(line)
img_src = self.download_img(src)
p_content += '<img src="{img_src}" class="manga">'.format(img_src=img_src)
elif line.startswith('<br'):
body_html += '<p>{content}</p>\n'.format(content=p_content)
p_content = ''
else:
p_content += line
if len(p_content) > 0:
body_html += '<p>{content}</p>\n'.format(content=p_content)
fc = fc.replace('{Title}', chapter_name)
fc = fc.replace('<!-- title -->', '<h2>{title}</h2>'.format(title=chapter_name))
fc = fc.replace('{MainContent}', body_html)
f.write(fc)
f.close()
if is_need_update_index:
self.add_index(chapter_name, file_name)
def add_index(self, chapter_name, file_name):
nav_point_html = '''
<navPoint id="chapter{play_order}" playOrder="{play_order}">
<navLabel><text>{chapter_name}</text></navLabel>
<content src="{file_name}.html"/>
</navPoint>\n\n
<!--ANOTHER NAVPOINT-->\n
'''.format(play_order=self.play_order, file_name=file_name, chapter_name=chapter_name)
fc, f = rewrite_file(self.work_dir + 'OPS/fb.ncx')
f.write(fc.replace('<!--ANOTHER NAVPOINT-->', nav_point_html))
f.close()
item_html = """
<item id="chapter{play_order}" href="{file_name}.html" media-type="application/xhtml+xml"/>\n\n<!-- ANOTHER ITEM -->
""".format(play_order=self.play_order, file_name=file_name)
itemref_html = """
<itemref idref="chapter{play_order}" linear="yes"/>\n\n<!-- ANOTHER ITEMREF -->
""".format(play_order=self.play_order)
fc, f = rewrite_file(self.work_dir + 'OPS/fb.opf')
fc = fc.replace('<!-- ANOTHER ITEM -->', item_html)
fc = fc.replace('<!-- ANOTHER ITEMREF -->', itemref_html)
f.write(fc)
f.close()
self.play_order += 1
def download_img(self, img_url):
p = requests.get(img_url)
img_name, img_type = re.findall(r'(\w+)\.(jpg|png|gif|jpeg)', img_url)[0]
img_path = self.work_dir + 'OPS/images/' + img_name + '.' + img_type
with open(img_path, 'wb') as f:
f.write(p.content)
return 'images/' + img_name + '.' + img_type
def make_epub_file(self, file_name=None):
epub_name = file_name if file_name else '{author} - {title}'.format(author=self.author, title=self.title)
self.zip_dir_to_epub(self.work_dir, epub_name)