Skip to content

Commit 3757712

Browse files
author
awu42
committed
validate_rst_title_capitalization.py MomIsBestFriend edits (pandas-dev#26941)
1 parent 0e344ad commit 3757712

File tree

1 file changed

+19
-18
lines changed

1 file changed

+19
-18
lines changed

scripts/validate_rst_title_capitalization.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
err_msg = "Heading capitalization formatted incorrectly. Please correctly capitalize"
6666

6767

68-
def follow_capitalization_convention(title: str) -> bool:
68+
def is_following_capitalization_convention(title: str) -> bool:
6969
"""
7070
Algorithm to determine if a heading follows the capitalization convention
7171
@@ -84,7 +84,10 @@ def follow_capitalization_convention(title: str) -> bool:
8484
8585
"""
8686

87-
# split with delimiters comma, semicolon and space, parentheses, colon, slashes
87+
# Remove https link if present in heading
88+
title = re.sub(r"<https?:\/\/.*[\r\n]*>", "", title)
89+
90+
# Split with delimiters comma, semicolon and space, parentheses, colon, slashes
8891
word_list = re.split(r"[;,-/():\s]\s*", title)
8992

9093
# Edge Case: First word is an empty string
@@ -148,10 +151,9 @@ def findTitles(rst_file: str) -> Generator[List[str], List[int], None]:
148151
# List of line numbers that corresponding headings in title_list can be found at
149152
line_number_list: List[int] = []
150153

151-
# Open and read the .rst file and store the string of data into input
152-
f = open(rst_file, "r")
153-
input = f.read().split("\n")
154-
f.close()
154+
# Open and read the .rst file and store the string of data into lines
155+
with open(rst_file, "r") as file_obj:
156+
lines = file_obj.read().split("\n")
155157

156158
# Regular expressions that denote a title beforehand
157159
regex = {
@@ -167,20 +169,20 @@ def findTitles(rst_file: str) -> Generator[List[str], List[int], None]:
167169
# '*`_' markers are removed from original string text.
168170
table = str.maketrans("", "", "*`_")
169171

170-
# Loop through input lines, appending if they are considered headings
171-
for lineno in range(1, len(input)):
172-
if len(input[lineno]) != 0 and len(input[lineno - 1]) != 0:
172+
# Loop through lines lines, appending if they are considered headings
173+
for lineno in range(1, len(lines)):
174+
if len(lines[lineno]) != 0 and len(lines[lineno - 1]) != 0:
173175
for key in regex:
174-
match = re.search(regex[key], input[lineno])
176+
match = re.search(regex[key], lines[lineno])
175177
if match is not None:
176178
if lineno >= 2:
177-
if input[lineno] == input[lineno - 2]:
178-
if len(input[lineno]) == len(input[lineno - 1]):
179-
title_list.append(input[lineno - 1].translate(table))
179+
if lines[lineno] == lines[lineno - 2]:
180+
if len(lines[lineno]) == len(lines[lineno - 1]):
181+
title_list.append(lines[lineno - 1].translate(table))
180182
line_number_list.append(lineno)
181183
break
182-
if len(input[lineno]) >= len(input[lineno - 1]):
183-
title_list.append(input[lineno - 1].translate(table))
184+
if len(lines[lineno]) >= len(lines[lineno - 1]):
185+
title_list.append(lines[lineno - 1].translate(table))
184186
line_number_list.append(lineno)
185187

186188
return title_list, line_number_list
@@ -206,14 +208,14 @@ def fill_bad_title_dict(rst_file: str) -> None:
206208

207209
# Append the bad_title_dict if the capitalization convention not followed
208210
for i in range(len(title_list)):
209-
if not follow_capitalization_convention(title_list[i]):
211+
if not is_following_capitalization_convention(title_list[i]):
210212
if rst_file not in bad_title_dict:
211213
bad_title_dict[rst_file] = [(title_list[i], line_number_list[i])]
212214
else:
213215
bad_title_dict[rst_file].append((title_list[i], line_number_list[i]))
214216

215217

216-
def find_rst_files(source_paths: List[str]) -> List[str]:
218+
def find_rst_files(source_paths: List[str]) -> Generator[str, None, None]:
217219
"""
218220
Given the command line arguments of directory paths, this method
219221
yields the strings of the .rst file directories that these paths contain
@@ -269,7 +271,6 @@ def main(source_paths: List[str], output_format: str) -> bool:
269271
directory_list = find_rst_files(source_paths)
270272

271273
# Fill the bad_title_dict, which contains all incorrectly capitalized headings
272-
# with suppress_stdout_stderr():
273274
for filename in directory_list:
274275
fill_bad_title_dict(filename)
275276

0 commit comments

Comments
 (0)