-
Notifications
You must be signed in to change notification settings - Fork 0
/
conmd.py
executable file
·60 lines (59 loc) · 1.79 KB
/
conmd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python
import argparse
import os
from pathlib import Path
import re
import subprocess
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Convert Word documents to Markdown files.')
parser.add_argument(
'docx_path',
help='Absolute directory path containing the Word documents'
)
parser.add_argument(
'md_path',
help='Absolute directory path in which create Markdown files'
)
parser.add_argument(
'-s',
help='Include subdirectories (Y by default)',
default='Y',
choices=('Y', 'N')
)
args = parser.parse_args()
if args.s == 'N':
glob_str = '/*.docx'
else:
glob_str = '**/*.docx'
idx = 0
conv_errors = 0
conv_done = 0
for match in Path(args.docx_path).glob(glob_str):
#if idx == 3:
# break
print(f'Processing {match}')
subdir = str(match.joinpath().parent)[str(match.joinpath().parent).find(args.docx_path)+len(str(args.docx_path)):]
if subdir != '':
Path(args.md_path+subdir).mkdir(parents=True, exist_ok=True)
mdfile = f"{args.md_path}{subdir}/{match.stem}.md"
result = subprocess.run(
["pandoc",
"-f",
"docx",
"-t",
"gfm",
"--extract-media=.",
match,
"-o",
mdfile],
capture_output=True,
text=True
)
if result.stderr == '':
conv_done += 1
print(f'Successfully created {mdfile}')
else:
conv_errors += 1
print(f'Error processing {match} as follows {result.stderr}')
idx += 1
print(f'Finished processing {idx} Word documents (converted:{conv_done}; errors:{conv_errors})')