forked from PaddlePaddle/docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_api_label_cn.py
138 lines (120 loc) · 3.97 KB
/
check_api_label_cn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import argparse
import logging
import os
import re
import sys
from pathlib import Path
logger = logging.getLogger()
if logger.handlers:
# we assume the first handler is the one we want to configure
console = logger.handlers[0]
else:
console = logging.StreamHandler()
logger.addHandler(console)
console.setFormatter(
logging.Formatter(
"%(asctime)s - %(funcName)s:%(lineno)d - %(levelname)s - %(message)s"
)
)
logger.setLevel(logging.INFO)
# check file's api_label
def check_api_label(rootdir, file):
real_file = Path(rootdir) / file
with open(real_file, "r", encoding="utf-8") as f:
first_line = f.readline().strip()
return first_line == generate_en_label_by_path(file)
# path -> api_label (the first line's style)
def generate_en_label_by_path(file):
result = file.removesuffix("_cn.rst")
result = "_".join(Path(result).parts)
result = f".. _cn_{result}:"
return result
# traverse doc/api to append api_label in list
def find_all_api_labels_in_dir(rootdir):
all_api_labels = []
for root, dirs, files in os.walk(rootdir + API):
for file in files:
real_path = Path(root) / file
path = str(real_path).removeprefix(rootdir)
if not should_test(path):
continue
for label in find_api_labels_in_one_file(real_path):
all_api_labels.append(label)
return all_api_labels
# api_labels in a file
def find_api_labels_in_one_file(file_path):
api_labels_in_one_file = []
with open(file_path, "r", encoding="utf-8") as f:
lines = f.readlines()
for line in lines:
line = re.search(".. _([a-zA-Z0-9_]+)", line)
if not line:
continue
api_labels_in_one_file.append(line.group(1))
return api_labels_in_one_file
# api doc for checking
def should_test(file):
return (
file.endswith("_cn.rst")
and not Path(file).name == "Overview_cn.rst"
and not Path(file).name == "index_cn.rst"
and file.startswith(API)
)
def run_cn_api_label_checking(rootdir, files):
for file in files:
if should_test(file) and not check_api_label(rootdir, file):
logger.error(
f"The first line in {rootdir}/{file} is not avaiable, please re-check it!"
)
sys.exit(1)
valid_api_labels = find_all_api_labels_in_dir(rootdir)
for file in files:
if not file.endswith(".rst"):
continue
with open(Path(rootdir) / file, "r", encoding="utf-8") as f:
pattern = f.read()
matches = re.findall(r":ref:`([^`]+)`", pattern)
for match in matches:
api_label = match
if api_label_match := re.match(
r".+<(?P<api_label>.+?)>", api_label
):
api_label = api_label_match.group("api_label")
if (
api_label.startswith("cn_api_paddle")
and api_label not in valid_api_labels
):
logger.error(
f"Found api label {api_label} in {rootdir}/{file}, but it is not a valid api label, please re-check it!"
)
sys.exit(1)
print("All api_label check success in PR !")
def parse_args():
"""
Parse input arguments
"""
parser = argparse.ArgumentParser(description="cn api_label checking")
parser.add_argument(
"rootdir",
help="the dir DOCROOT",
type=str,
default="/FluidDoc/docs/",
)
parser.add_argument(
"apiroot",
type=str,
help="the dir APIROOT",
default="/FluidDoc/docs/api/",
)
parser.add_argument(
"all_git_files",
type=str,
nargs="*",
help="files need to check",
)
args = parser.parse_args()
return args
if __name__ == "__main__":
args = parse_args()
API = args.apiroot.removeprefix(args.rootdir + "/")
run_cn_api_label_checking(args.rootdir, args.all_git_files)