-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplit.py
46 lines (41 loc) · 1.79 KB
/
split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/python
# Taken from http://tex.stackexchange.com/questions/24397/extract-document-statistics-how-many-pages-has-chapter-xy-count-fixmes/24480#24480
import re, os
chap_list = []
end = 40
front_matter = 13
with open('main.toc') as file:
for l in file:
#if (not '{part}' in l) and (not '{section}' in l): continue
m = re.match(r'^\\contentsline\W*{(section|part|chapter)}{(.*)}{([0-9]+)}({[^}]*})?$',l)
if not m: continue
print('Match:',m.group(1, 2, 3))
type, raw, page = m.group(1, 2, 3)
if type == 'chapter':
m = re.match(r'^\\numberline\W*{([0-9]+)}(.*)$',raw)
if not m: continue
num, raw = m.group(1,2)
raw = re.sub(r'\\FN@sf@gobble@opt .*$','',raw) # strip footnote
raw = re.sub(r'\\IeC\W*{.*?([a-zA-Z]) ?}',r'\1',raw) # remove accents
raw = re.sub(r'\\emph\W*{(.*?)}',r'\1',raw) # remove \\emph
raw = re.sub(r'(:|\W*\\&|\W*\().*$','',raw) # take just the "first part" as name
raw = re.sub(r' a ',r' ',raw) # remove 'a' as conjunction
raw = re.sub(r'[^a-zA-Z]+','_',raw) # remove commans
raw = raw.lower()
chapter = {'page': int(page), 'name': raw, 'number': int(num)}
chap_list.append(chapter)
for index, chapter in enumerate(chap_list):
print(chapter)
start = chapter['page'] + front_matter
if index + 1 < len(chap_list):
next_chapter = chap_list[index + 1]['page']
end = next_chapter - 1 + front_matter
else:
end = 'end'
try:
spec = "{}-{}".format(start, end)
print(spec)
out = "split/{}.pdf".format(chapter['name'])
os.system('pdf-stapler cat pdf/skripsi.pdf %s "%s"' % (spec, out))
except Exception as e:
print(e)