Skip to content

Commit bc8c281

Browse files
authored
Fix bug warning about some invalid directive arguments (#261)
1 parent 253cbc4 commit bc8c281

File tree

6 files changed

+5239
-62
lines changed

6 files changed

+5239
-62
lines changed

.gitattributes

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
* text=auto
2-
examples/a-lot-of-includes/ -diff
2+
examples/a-lot-of-includes/docs/index.md -diff

examples/a-lot-of-includes/docs/index.md

Lines changed: 5095 additions & 0 deletions
Large diffs are not rendered by default.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "mkdocs-include-markdown-plugin"
3-
version = "7.1.4"
3+
version = "7.1.5"
44
description = "Mkdocs Markdown includer plugin."
55
readme = "README.md"
66
license = "Apache-2.0"

src/mkdocs_include_markdown_plugin/directive.py

Lines changed: 76 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class DirectiveBoolArgument: # noqa: D101
2424

2525

2626
if TYPE_CHECKING: # pragma: no cover
27+
from collections.abc import Iterable
2728
from typing import Callable, Literal, TypedDict
2829

2930
DirectiveBoolArgumentsDict = dict[str, DirectiveBoolArgument]
@@ -50,18 +51,18 @@ class DirectiveBoolArgument: # noqa: D101
5051
DOUBLE_QUOTED_STR_RE = r'([^"]|(?<=\\)")+'
5152
SINGLE_QUOTED_STR_RE = r"([^']|(?<=\\)')+"
5253

53-
# In the following regular expression, the substrings "$OPENING_TAG"
54-
# and "$CLOSING_TAG" will be replaced by the effective opening and
55-
# closing tags in the `on_config` plugin event.
56-
INCLUDE_TAG_RE = rf'''
57-
(?P<_includer_indent>[ \t\w\\.]*?)$OPENING_TAG
54+
# In the following regular expression, the substrings "\{%", "%\}"
55+
# will be replaced by custom opening and closing tags in the `on_config`
56+
# plugin event if required.
57+
INCLUDE_TAG_RE = r'''
58+
(?P<_includer_indent>[ \t\w\\.]*?)\{%
5859
\s*
5960
include
6061
\s+
61-
(?:"(?P<double_quoted_filename>{DOUBLE_QUOTED_STR_RE})")?(?:'(?P<single_quoted_filename>{SINGLE_QUOTED_STR_RE})')?
62+
(?:"(?P<double_quoted_filename>''' + DOUBLE_QUOTED_STR_RE + r''')")?(?:'(?P<single_quoted_filename>''' + SINGLE_QUOTED_STR_RE + r''')')?
6263
(?P<arguments>.*?)
6364
\s*
64-
$CLOSING_TAG
65+
%\}
6566
''' # noqa: E501
6667

6768
TRUE_FALSE_STR_BOOL = {
@@ -110,6 +111,7 @@ def str_arg(arg: str) -> re.Pattern[str]:
110111
'heading-offset': functools.partial(arg, 'heading-offset'),
111112
}
112113

114+
INCLUDE_MARKDOWN_DIRECTIVE_ARGS = set(ARGUMENT_REGEXES)
113115
INCLUDE_DIRECTIVE_ARGS = {
114116
key for key in ARGUMENT_REGEXES if key not in (
115117
'rewrite-relative-urls', 'heading-offset', 'comments',
@@ -121,6 +123,45 @@ def str_arg(arg: str) -> re.Pattern[str]:
121123
)
122124

123125

126+
def _maybe_arguments_iter(arguments_string: str) -> Iterable[str]:
127+
"""Iterate over parts of the string that look like arguments."""
128+
current_string_opening = '' # can be either `'` or `"`
129+
inside_string = False
130+
escaping = False
131+
opening_argument = False # whether we are at the beginning of an argument
132+
current_value = ''
133+
134+
for c in arguments_string:
135+
if inside_string:
136+
if c == '\\':
137+
escaping = not escaping
138+
continue
139+
elif c == current_string_opening and not escaping:
140+
inside_string = False
141+
current_string_opening = ''
142+
else:
143+
escaping = False
144+
elif c == '=':
145+
new_current_value = ''
146+
for ch in reversed(current_value):
147+
if ch in string.whitespace:
148+
current_value = new_current_value[::-1]
149+
break
150+
new_current_value += ch
151+
yield current_value
152+
current_value = ''
153+
opening_argument = True
154+
elif opening_argument:
155+
opening_argument = False
156+
if c in ('"', "'"):
157+
current_string_opening = c
158+
inside_string = True
159+
current_value += c
160+
current_value += c
161+
else:
162+
current_value += c
163+
164+
124165
def warn_invalid_directive_arguments(
125166
arguments_string: str,
126167
directive_lineno: Callable[[], int],
@@ -130,18 +171,17 @@ def warn_invalid_directive_arguments(
130171
) -> None:
131172
"""Warns about the invalid arguments passed to a directive."""
132173
valid_args = (
133-
INCLUDE_DIRECTIVE_ARGS if directive == 'include'
134-
else set(ARGUMENT_REGEXES)
174+
INCLUDE_DIRECTIVE_ARGS
175+
if directive == 'include'
176+
else INCLUDE_MARKDOWN_DIRECTIVE_ARGS
135177
)
136-
for arg_value in WARN_INVALID_DIRECTIVE_ARGS_REGEX.findall(
137-
arguments_string,
138-
):
139-
if arg_value.split('=', 1)[0] not in valid_args:
178+
for maybe_arg in _maybe_arguments_iter(arguments_string):
179+
if maybe_arg not in valid_args:
140180
location = process.file_lineno_message(
141181
page_src_path, docs_dir, directive_lineno(),
142182
)
143183
logger.warning(
144-
f"Invalid argument '{arg_value}' in"
184+
f"Invalid argument '{maybe_arg}' in"
145185
f" '{directive}' directive at {location}. Ignoring...",
146186
)
147187

@@ -156,9 +196,9 @@ def parse_filename_argument(
156196
if raw_filename is None:
157197
filename = None
158198
else:
159-
filename = raw_filename.replace("\\'", "'")
199+
filename = raw_filename.replace(r"\'", "'")
160200
else:
161-
filename = raw_filename.replace('\\"', '"')
201+
filename = raw_filename.replace(r'\"', '"')
162202
return filename, raw_filename
163203

164204

@@ -168,9 +208,9 @@ def parse_string_argument(match: re.Match[str]) -> str | None:
168208
if value is None:
169209
value = match[3]
170210
if value is not None:
171-
value = value.replace("\\'", "'")
211+
value = value.replace(r"\'", "'")
172212
else:
173-
value = value.replace('\\"', '"')
213+
value = value.replace(r'\"', '"')
174214
return value
175215

176216

@@ -182,12 +222,24 @@ def create_include_tag(
182222
Replaces the substrings '$OPENING_TAG' and '$CLOSING_TAG' from
183223
INCLUDE_TAG_RE by the effective tag.
184224
"""
185-
return re.compile(
186-
INCLUDE_TAG_RE.replace(' include', f' {tag}', 1).replace(
187-
'$OPENING_TAG', re.escape(opening_tag), 1,
188-
).replace('$CLOSING_TAG', re.escape(closing_tag), 1),
189-
flags=re.VERBOSE | re.DOTALL,
190-
)
225+
pattern = INCLUDE_TAG_RE
226+
if tag != 'include':
227+
pattern = pattern.replace(
228+
' include',
229+
(
230+
' include-markdown' if tag == 'include-markdown'
231+
else f' {re.escape(tag)}'
232+
),
233+
1,
234+
)
235+
236+
if opening_tag != '{%':
237+
pattern = pattern.replace(r'\{%', re.escape(opening_tag), 1)
238+
239+
if closing_tag != '%}':
240+
pattern = pattern.replace(r'%\}', re.escape(closing_tag), 1)
241+
242+
return re.compile(pattern, flags=re.VERBOSE | re.DOTALL)
191243

192244

193245
def parse_bool_options(

src/mkdocs_include_markdown_plugin/process.py

Lines changed: 65 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -249,9 +249,10 @@ def transform_line_by_line_skipping_codeblocks(
249249
markdown: str,
250250
func: Callable[[str], str],
251251
) -> str:
252-
"""Apply a transformation line by line in a Markdown text using a function.
252+
"""Apply a transformation line by line in a Markdown text using a function,.
253253
254-
Skip fenced codeblock lines, where the transformation never is applied.
254+
Skip fenced codeblock lines and empty lines, where the transformation
255+
is never applied.
255256
256257
Indented codeblocks are not taken into account because in the practice
257258
this function is only used for transformations of heading prefixes. See
@@ -263,13 +264,15 @@ def transform_line_by_line_skipping_codeblocks(
263264

264265
lines = []
265266
for line in io.StringIO(markdown):
267+
lstripped_line = line.lstrip()
266268
if not _current_fcodeblock_delimiter:
267-
lstripped_line = line.lstrip()
268-
if lstripped_line.startswith(('```', '~~~')):
269-
_current_fcodeblock_delimiter = lstripped_line[:3]
269+
if lstripped_line.startswith('```'):
270+
_current_fcodeblock_delimiter = '```'
271+
elif lstripped_line.startswith('~~~'):
272+
_current_fcodeblock_delimiter = '~~~'
270273
else:
271274
line = func(line) # noqa: PLW2901
272-
elif line.lstrip().startswith(_current_fcodeblock_delimiter):
275+
elif lstripped_line.startswith(_current_fcodeblock_delimiter):
273276
_current_fcodeblock_delimiter = ''
274277
lines.append(line)
275278

@@ -287,39 +290,27 @@ def rewrite_relative_urls(
287290
``source_path`` will still work when inserted into a file at
288291
``destination_path``.
289292
"""
290-
from urllib.parse import urlparse, urlunparse
291-
292293
def rewrite_url(url: str) -> str:
293-
if is_url(url):
294-
return url
295-
296-
scheme, netloc, path, params, query, fragment = urlparse(url)
297-
298-
# absolute or mail
299-
if path.startswith('/') or scheme == 'mailto':
294+
if is_url(url) or is_absolute_path(url):
300295
return url
301296

302297
new_path = os.path.relpath(
303-
os.path.join(os.path.dirname(source_path), path),
298+
os.path.join(os.path.dirname(source_path), url),
304299
os.path.dirname(destination_path),
305300
)
306301

307302
# ensure forward slashes are used, on Windows
308303
new_path = new_path.replace('\\', '/').replace('//', '/')
309304

310305
try:
311-
if path[-1] == '/':
306+
if url[-1] == '/':
312307
# the above operation removes a trailing slash,
313308
# so add it back if it was present in the input
314309
new_path += '/'
315310
except IndexError: # pragma: no cover
316311
pass
317312

318-
# ensure that links to the same file are not rewritten
319-
if new_path == '.':
320-
new_path = ''
321-
322-
return urlunparse((scheme, netloc, new_path, params, query, fragment))
313+
return new_path
323314

324315
def found_href(m: re.Match[str], url_group_index: int = -1) -> str:
325316
match_start, match_end = m.span(0)
@@ -528,27 +519,68 @@ def filter_paths(
528519
return response
529520

530521

522+
def _is_valid_url_scheme_char(c: str) -> bool:
523+
"""Determine is a character is a valid URL scheme character.
524+
525+
Valid characters are:
526+
527+
```
528+
abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+-.
529+
```
530+
"""
531+
codepoint = ord(c)
532+
A = 65
533+
Z = 90
534+
a = 97
535+
z = 122
536+
zero = 48
537+
nine = 57
538+
dot = 46
539+
plus = 43
540+
minus = 45
541+
return (
542+
A <= codepoint <= Z
543+
or a <= codepoint <= z
544+
or zero <= codepoint <= nine
545+
or codepoint in (plus, minus, dot)
546+
)
547+
548+
531549
def is_url(string: str) -> bool:
532-
"""Determine if a string is an URL."""
533-
if ':' not in string: # fast path
550+
"""Determine if a string is an URL.
551+
552+
The implementation has been adapted from `urllib.urlparse`.
553+
"""
554+
i = string.find(':')
555+
if i <= 1: # noqa: PLR2004 -> exclude C: or D: on Windows
534556
return False
535-
from urllib.parse import urlparse
536557

537558
try:
538-
result = urlparse(string)
539-
return all([result.scheme, result.netloc])
540-
except ValueError: # pragma: no cover
559+
return all(_is_valid_url_scheme_char(string[j]) for j in range(i))
560+
except (IndexError, ValueError): # pragma: no cover
541561
return False
542562

543563

544564
def is_relative_path(string: str) -> bool:
545565
"""Check if a string looks like a relative path."""
546-
return string.startswith(('./', '../'))
566+
try:
567+
return (
568+
string[0] == '.'
569+
and (
570+
string[1] == '/'
571+
or (string[1] == '.' and string[2] == '/')
572+
)
573+
)
574+
except IndexError: # pragma: no cover
575+
return False
547576

548577

549578
def is_absolute_path(string: str) -> bool:
550579
"""Check if a string looks like an absolute path."""
551-
return string.startswith((os.sep, '/'))
580+
try:
581+
return string[0] == '/' or string[0] == os.sep
582+
except IndexError: # pragma: no cover
583+
return False
552584

553585

554586
def read_file(file_path: str, encoding: str) -> str:
@@ -581,14 +613,12 @@ def read_url(
581613
def safe_os_path_relpath(path: str, start: str) -> str:
582614
"""Return the relative path of a file from a start directory.
583615
584-
Safe version of `os.path.relpath` that catches `ValueError` exceptions
585-
on Windows and returns the original path in case of error.
616+
Safe version of `os.path.relpath` that catches possible `ValueError`
617+
exceptions and returns the original path in case of error.
586618
On Windows, `ValueError` is raised when `path` and `start` are on
587619
different drives.
588620
"""
589-
if os.name != 'nt': # pragma: nt no cover
590-
return os.path.relpath(path, start)
591-
try: # pragma: nt cover
621+
try:
592622
return os.path.relpath(path, start)
593623
except ValueError: # pragma: no cover
594624
return path

tests/test_unit/test_arguments.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ def test_invalid_argument_name(directive, page, tmp_path, plugin, caplog):
284284

285285
assert len(caplog.records) == 1
286286
assert caplog.records[0].msg == (
287-
f"Invalid argument 'invalid-argument=true' in '{directive}'"
287+
f"Invalid argument 'invalid-argument' in '{directive}'"
288288
" directive at includer.md:1. Ignoring..."
289289
)
290290

0 commit comments

Comments
 (0)