-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcodeblocks.py
124 lines (113 loc) · 4.7 KB
/
codeblocks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env python3
from enum import Enum
from itertools import chain
from tempfile import NamedTemporaryFile
import argparse
import glob
import os
import os.path
import re
import shutil
import sys
class Codeblock(Enum):
IN = 1
IN_WITH_LANG = 2
OUT = 3
INTLINE_PATTERN = re.compile("```+[^`]+`")
DEFAULT_LANG = "txt"
try:
from magika import Magika
magika = Magika()
def guess_language(code):
codebytes = code.encode(encoding="utf-8")
lang = magika.identify_bytes(codebytes).prediction.output.label
return lang if lang != "unknown" else DEFAULT_LANG
except ImportError:
try:
from guesslang import Guess
guesslang = Guess()
def guess_language(code):
lang = guesslang.language_name(code)
return lang.lower() if lang else DEFAULT_LANG
except ImportError:
print("Magika or Guesslang is required to run this script. Install one of them to proceed!")
sys.exit(1)
def add_language(files, edit_files):
def is_made_of_char(str, char):
return len(str) == str.count(char)
# newline="" is important. See below.
temp = NamedTemporaryFile("w+", encoding="utf-8", newline="", delete=False)
for file in files:
blockstate = Codeblock.OUT
code = []
# The argument newline="" is important here and in the NamedTemporaryFile() call above.
# We don't want to change line endings in a file which we're editing.
with open(file, encoding="utf-8", newline="") as f:
for linenum, line in enumerate(f, 1):
stripped = line.strip()
if (stripped.startswith("```") and not INTLINE_PATTERN.match(stripped)
and (blockstate == Codeblock.OUT or is_made_of_char(stripped, "`")
and len(stripped) >= backticks_num)):
if blockstate == Codeblock.IN_WITH_LANG:
blockstate = Codeblock.OUT
if edit_files: temp.write(line)
elif blockstate == Codeblock.IN:
blockstate = Codeblock.OUT
code_str = "\n".join(line.removeprefix(indent).rstrip() for line in code) + "\n"
lang = guess_language(code_str) if code_str else ""
if edit_files:
# When editing files, txt is not very useful edit.
if lang == "txt": lang = ""
fence_start = "`" * backticks_num
temp.write(fence.replace(fence_start, fence_start + lang))
temp.writelines(code)
temp.write(line)
elif lang:
print(f"{file}:{linenum - len(code) - 1}")
print(("`" * backticks_num) + lang + "\n" + code_str + stripped)
print()
code = []
elif is_made_of_char(stripped, "`"):
backticks_num = len(stripped)
blockstate = Codeblock.IN
count = len(line) - len(line.lstrip())
indent = line[:count]
fence = line
else:
backticks_num = stripped.count("`")
blockstate = Codeblock.IN_WITH_LANG
if edit_files: temp.write(line)
elif blockstate == Codeblock.IN:
code.append(line)
elif edit_files:
temp.write(line)
if edit_files:
if code:
# non-terminated fence
temp.write(fence)
temp.writelines(code)
temp.flush()
shutil.copy(temp.name, file)
temp.seek(0)
temp.truncate(0)
temp.close()
os.unlink(temp.name)
def main():
parser = argparse.ArgumentParser(description="Detect and insert the language in the Markdown code blocks.")
parser.add_argument("--edit", action="store_true", help="Edit files by inserting the language")
parser.add_argument("path", nargs="+", help="Paths to process")
args = parser.parse_args()
files = []
iters = []
for path in args.path:
if os.path.isfile(path):
files.append(path)
elif os.path.isdir(path):
iters.append(glob.iglob(os.path.join(path, "**", "*.md"), recursive=True))
else:
print(f"Path doesn't exist: \"{path}\"")
sys.exit(1)
iters.append(iter(files))
add_language(chain.from_iterable(iters), args.edit)
if __name__ == "__main__":
main()