Skip to content

Commit 7faed13

Browse files
Merge pull request #70 from RonnyPfannschmidt/comments
Add IniConfig.parse() with inline comment stripping and Unicode whitespace handling
2 parents 57b7ed9 + 58c0869 commit 7faed13

File tree

4 files changed

+291
-29
lines changed

4 files changed

+291
-29
lines changed

CHANGELOG

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,21 @@
1+
2.3.0
2+
=====
3+
4+
* add IniConfig.parse() classmethod with strip_inline_comments parameter (fixes #55)
5+
- by default (strip_inline_comments=True), inline comments are properly stripped from values
6+
- set strip_inline_comments=False to preserve old behavior if needed
7+
* IniConfig() constructor maintains backward compatibility (does not strip inline comments)
8+
* users should migrate to IniConfig.parse() for correct comment handling
9+
* add strip_section_whitespace parameter to IniConfig.parse() (regarding #4)
10+
- opt-in parameter to strip Unicode whitespace from section names
11+
- when True, strips Unicode whitespace (U+00A0, U+2000, U+3000, etc.) from section names
12+
- when False (default), preserves existing behavior for backward compatibility
13+
* clarify Unicode whitespace handling (regarding #4)
14+
- since iniconfig 2.0.0 (Python 3 only), all strings are Unicode by default
15+
- Python 3's str.strip() has handled Unicode whitespace since Python 3.0 (2008)
16+
- iniconfig automatically benefits from this in all supported versions (Python >= 3.10)
17+
- key names and values have Unicode whitespace properly stripped using Python's built-in methods
18+
119
2.2.0
220
=====
321

src/iniconfig/__init__.py

Lines changed: 75 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -96,33 +96,86 @@ def __init__(
9696
path: str | os.PathLike[str],
9797
data: str | None = None,
9898
encoding: str = "utf-8",
99+
*,
100+
_sections: Mapping[str, Mapping[str, str]] | None = None,
101+
_sources: Mapping[tuple[str, str | None], int] | None = None,
99102
) -> None:
100103
self.path = os.fspath(path)
104+
105+
# Determine sections and sources
106+
if _sections is not None and _sources is not None:
107+
# Use provided pre-parsed data (called from parse())
108+
sections_data = _sections
109+
sources = _sources
110+
else:
111+
# Parse the data (backward compatible path)
112+
if data is None:
113+
with open(self.path, encoding=encoding) as fp:
114+
data = fp.read()
115+
116+
# Use old behavior (no stripping) for backward compatibility
117+
sections_data, sources = _parse.parse_ini_data(
118+
self.path, data, strip_inline_comments=False
119+
)
120+
121+
# Assign once to Final attributes
122+
self._sources = sources
123+
self.sections = sections_data
124+
125+
@classmethod
126+
def parse(
127+
cls,
128+
path: str | os.PathLike[str],
129+
data: str | None = None,
130+
encoding: str = "utf-8",
131+
*,
132+
strip_inline_comments: bool = True,
133+
strip_section_whitespace: bool = False,
134+
) -> "IniConfig":
135+
"""Parse an INI file.
136+
137+
Args:
138+
path: Path to the INI file (used for error messages)
139+
data: Optional INI content as string. If None, reads from path.
140+
encoding: Encoding to use when reading the file (default: utf-8)
141+
strip_inline_comments: Whether to strip inline comments from values
142+
(default: True). When True, comments starting with # or ; are
143+
removed from values, matching the behavior for section comments.
144+
strip_section_whitespace: Whether to strip whitespace from section and key names
145+
(default: False). When True, strips Unicode whitespace from section and key names,
146+
addressing issue #4. When False, preserves existing behavior for backward compatibility.
147+
148+
Returns:
149+
IniConfig instance with parsed configuration
150+
151+
Example:
152+
# With comment stripping (default):
153+
config = IniConfig.parse("setup.cfg")
154+
# value = "foo" instead of "foo # comment"
155+
156+
# Without comment stripping (old behavior):
157+
config = IniConfig.parse("setup.cfg", strip_inline_comments=False)
158+
# value = "foo # comment"
159+
160+
# With section name stripping (opt-in for issue #4):
161+
config = IniConfig.parse("setup.cfg", strip_section_whitespace=True)
162+
# section names and keys have Unicode whitespace stripped
163+
"""
164+
fspath = os.fspath(path)
165+
101166
if data is None:
102-
with open(self.path, encoding=encoding) as fp:
167+
with open(fspath, encoding=encoding) as fp:
103168
data = fp.read()
104169

105-
tokens = _parse.parse_lines(self.path, data.splitlines(True))
106-
107-
self._sources = {}
108-
sections_data: dict[str, dict[str, str]]
109-
self.sections = sections_data = {}
110-
111-
for lineno, section, name, value in tokens:
112-
if section is None:
113-
raise ParseError(self.path, lineno, "no section header defined")
114-
self._sources[section, name] = lineno
115-
if name is None:
116-
if section in self.sections:
117-
raise ParseError(
118-
self.path, lineno, f"duplicate section {section!r}"
119-
)
120-
sections_data[section] = {}
121-
else:
122-
if name in self.sections[section]:
123-
raise ParseError(self.path, lineno, f"duplicate name {name!r}")
124-
assert value is not None
125-
sections_data[section][name] = value
170+
sections_data, sources = _parse.parse_ini_data(
171+
fspath,
172+
data,
173+
strip_inline_comments=strip_inline_comments,
174+
strip_section_whitespace=strip_section_whitespace,
175+
)
176+
177+
# Call constructor with pre-parsed sections and sources
178+
return cls(path=fspath, _sections=sections_data, _sources=sources)
126179

127180
def lineof(self, section: str, name: str | None = None) -> int | None:
128181
lineno = self._sources.get((section, name))

src/iniconfig/_parse.py

Lines changed: 89 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from collections.abc import Mapping
12
from typing import NamedTuple
23

34
from .exceptions import ParseError
@@ -12,11 +13,67 @@ class ParsedLine(NamedTuple):
1213
value: str | None
1314

1415

15-
def parse_lines(path: str, line_iter: list[str]) -> list[ParsedLine]:
16+
def parse_ini_data(
17+
path: str,
18+
data: str,
19+
*,
20+
strip_inline_comments: bool,
21+
strip_section_whitespace: bool = False,
22+
) -> tuple[Mapping[str, Mapping[str, str]], Mapping[tuple[str, str | None], int]]:
23+
"""Parse INI data and return sections and sources mappings.
24+
25+
Args:
26+
path: Path for error messages
27+
data: INI content as string
28+
strip_inline_comments: Whether to strip inline comments from values
29+
strip_section_whitespace: Whether to strip whitespace from section and key names
30+
(default: False). When True, addresses issue #4 by stripping Unicode whitespace.
31+
32+
Returns:
33+
Tuple of (sections_data, sources) where:
34+
- sections_data: mapping of section -> {name -> value}
35+
- sources: mapping of (section, name) -> line number
36+
"""
37+
tokens = parse_lines(
38+
path,
39+
data.splitlines(True),
40+
strip_inline_comments=strip_inline_comments,
41+
strip_section_whitespace=strip_section_whitespace,
42+
)
43+
44+
sources: dict[tuple[str, str | None], int] = {}
45+
sections_data: dict[str, dict[str, str]] = {}
46+
47+
for lineno, section, name, value in tokens:
48+
if section is None:
49+
raise ParseError(path, lineno, "no section header defined")
50+
sources[section, name] = lineno
51+
if name is None:
52+
if section in sections_data:
53+
raise ParseError(path, lineno, f"duplicate section {section!r}")
54+
sections_data[section] = {}
55+
else:
56+
if name in sections_data[section]:
57+
raise ParseError(path, lineno, f"duplicate name {name!r}")
58+
assert value is not None
59+
sections_data[section][name] = value
60+
61+
return sections_data, sources
62+
63+
64+
def parse_lines(
65+
path: str,
66+
line_iter: list[str],
67+
*,
68+
strip_inline_comments: bool = False,
69+
strip_section_whitespace: bool = False,
70+
) -> list[ParsedLine]:
1671
result: list[ParsedLine] = []
1772
section = None
1873
for lineno, line in enumerate(line_iter):
19-
name, data = _parseline(path, line, lineno)
74+
name, data = _parseline(
75+
path, line, lineno, strip_inline_comments, strip_section_whitespace
76+
)
2077
# new value
2178
if name is not None and data is not None:
2279
result.append(ParsedLine(lineno, section, name, data))
@@ -42,7 +99,13 @@ def parse_lines(path: str, line_iter: list[str]) -> list[ParsedLine]:
4299
return result
43100

44101

45-
def _parseline(path: str, line: str, lineno: int) -> tuple[str | None, str | None]:
102+
def _parseline(
103+
path: str,
104+
line: str,
105+
lineno: int,
106+
strip_inline_comments: bool,
107+
strip_section_whitespace: bool,
108+
) -> tuple[str | None, str | None]:
46109
# blank lines
47110
if iscommentline(line):
48111
line = ""
@@ -56,7 +119,11 @@ def _parseline(path: str, line: str, lineno: int) -> tuple[str | None, str | Non
56119
for c in COMMENTCHARS:
57120
line = line.split(c)[0].rstrip()
58121
if line[-1] == "]":
59-
return line[1:-1], None
122+
section_name = line[1:-1]
123+
# Optionally strip whitespace from section name (issue #4)
124+
if strip_section_whitespace:
125+
section_name = section_name.strip()
126+
return section_name, None
60127
return None, realline.strip()
61128
# value
62129
elif not line[0].isspace():
@@ -69,10 +136,26 @@ def _parseline(path: str, line: str, lineno: int) -> tuple[str | None, str | Non
69136
name, value = line.split(":", 1)
70137
except ValueError:
71138
raise ParseError(path, lineno, f"unexpected line: {line!r}") from None
72-
return name.strip(), value.strip()
139+
140+
# Strip key name (always for backward compatibility, optionally with unicode awareness)
141+
key_name = name.strip()
142+
143+
# Strip value
144+
value = value.strip()
145+
# Strip inline comments from values if requested (issue #55)
146+
if strip_inline_comments:
147+
for c in COMMENTCHARS:
148+
value = value.split(c)[0].rstrip()
149+
150+
return key_name, value
73151
# continuation
74152
else:
75-
return None, line.strip()
153+
line = line.strip()
154+
# Strip inline comments from continuations if requested (issue #55)
155+
if strip_inline_comments:
156+
for c in COMMENTCHARS:
157+
line = line.split(c)[0].rstrip()
158+
return None, line
76159

77160

78161
def iscommentline(line: str) -> bool:

testing/test_iniconfig.py

Lines changed: 109 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def test_iniconfig_from_file(tmp_path: Path) -> None:
125125
config = IniConfig(str(path), "[diff]")
126126
assert list(config.sections) == ["diff"]
127127
with pytest.raises(TypeError):
128-
IniConfig(data=path.read_text()) # type: ignore
128+
IniConfig(data=path.read_text()) # type: ignore[call-arg]
129129

130130

131131
def test_iniconfig_section_first() -> None:
@@ -304,3 +304,111 @@ def test_api_import() -> None:
304304
)
305305
def test_iscommentline_true(line: str) -> None:
306306
assert iscommentline(line)
307+
308+
309+
def test_parse_strips_inline_comments() -> None:
310+
"""Test that IniConfig.parse() strips inline comments from values by default."""
311+
config = IniConfig.parse(
312+
"test.ini",
313+
data=dedent(
314+
"""
315+
[section1]
316+
name1 = value1 # this is a comment
317+
name2 = value2 ; this is also a comment
318+
name3 = value3# no space before comment
319+
list = a, b, c # some items
320+
"""
321+
),
322+
)
323+
assert config["section1"]["name1"] == "value1"
324+
assert config["section1"]["name2"] == "value2"
325+
assert config["section1"]["name3"] == "value3"
326+
assert config["section1"]["list"] == "a, b, c"
327+
328+
329+
def test_parse_strips_inline_comments_from_continuations() -> None:
330+
"""Test that inline comments are stripped from continuation lines."""
331+
config = IniConfig.parse(
332+
"test.ini",
333+
data=dedent(
334+
"""
335+
[section]
336+
names =
337+
Alice # first person
338+
Bob ; second person
339+
Charlie
340+
"""
341+
),
342+
)
343+
assert config["section"]["names"] == "Alice\nBob\nCharlie"
344+
345+
346+
def test_parse_preserves_inline_comments_when_disabled() -> None:
347+
"""Test that IniConfig.parse(strip_inline_comments=False) preserves comments."""
348+
config = IniConfig.parse(
349+
"test.ini",
350+
data=dedent(
351+
"""
352+
[section1]
353+
name1 = value1 # this is a comment
354+
name2 = value2 ; this is also a comment
355+
list = a, b, c # some items
356+
"""
357+
),
358+
strip_inline_comments=False,
359+
)
360+
assert config["section1"]["name1"] == "value1 # this is a comment"
361+
assert config["section1"]["name2"] == "value2 ; this is also a comment"
362+
assert config["section1"]["list"] == "a, b, c # some items"
363+
364+
365+
def test_constructor_preserves_inline_comments_for_backward_compatibility() -> None:
366+
"""Test that IniConfig() constructor preserves old behavior (no stripping)."""
367+
config = IniConfig(
368+
"test.ini",
369+
data=dedent(
370+
"""
371+
[section1]
372+
name1 = value1 # this is a comment
373+
name2 = value2 ; this is also a comment
374+
"""
375+
),
376+
)
377+
assert config["section1"]["name1"] == "value1 # this is a comment"
378+
assert config["section1"]["name2"] == "value2 ; this is also a comment"
379+
380+
381+
def test_unicode_whitespace_stripped() -> None:
382+
"""Test that Unicode whitespace is stripped (issue #4)."""
383+
config = IniConfig(
384+
"test.ini",
385+
data="[section]\n"
386+
+ "name1 = \u00a0value1\u00a0\n" # NO-BREAK SPACE
387+
+ "name2 = \u2000value2\u2000\n" # EN QUAD
388+
+ "name3 = \u3000value3\u3000\n", # IDEOGRAPHIC SPACE
389+
)
390+
assert config["section"]["name1"] == "value1"
391+
assert config["section"]["name2"] == "value2"
392+
assert config["section"]["name3"] == "value3"
393+
394+
395+
def test_unicode_whitespace_in_section_names_with_opt_in() -> None:
396+
"""Test that Unicode whitespace can be stripped from section names with opt-in (issue #4)."""
397+
config = IniConfig.parse(
398+
"test.ini",
399+
data="[section\u00a0]\n" # NO-BREAK SPACE at end
400+
+ "key = value\n",
401+
strip_section_whitespace=True,
402+
)
403+
assert "section" in config
404+
assert config["section"]["key"] == "value"
405+
406+
407+
def test_unicode_whitespace_in_key_names() -> None:
408+
"""Test that Unicode whitespace is stripped from key names (issue #4)."""
409+
config = IniConfig(
410+
"test.ini",
411+
data="[section]\n" + "key\u00a0 = value\n", # NO-BREAK SPACE after key
412+
)
413+
assert "key" in config["section"]
414+
assert config["section"]["key"] == "value"

0 commit comments

Comments
 (0)