Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split newlines only at Python universal newlines (LF, CRLF, CR) #87

Merged
merged 3 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ Removed

Fixed
-----
- Only split input files at Python's universal newlines (LF, CRLF, CR), not on more
exotic newline sequences. This fixes some edge cases in Darker.


2.1.0_ - 2024-11-19
Expand Down
6 changes: 6 additions & 0 deletions src/darkgraylib/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,12 @@ def test_textdocument_encoded_string(encoding, newline, expect):
dict(
doc=TextDocument(string="zéro\r\nun\r\n", newline="\r\n"), expect=("zéro", "un")
),
dict(
doc=TextDocument(
string="# coding: iso-8859-5\n# б\x85б\x86\n", encoding="iso-8859-5"
),
expect=("# coding: iso-8859-5", "# б\x85б\x86"),
),
)
def test_textdocument_lines(doc, expect):
"""TextDocument.lines is correct after parsing a string with different newlines"""
Expand Down
18 changes: 17 additions & 1 deletion src/darkgraylib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,22 @@ def detect_newline(string: str) -> str:
return "\n"


def normalize_newlines(string: str) -> str:
"""Normalize newlines in a string to LF"""
return io.IncrementalNewlineDecoder(None, True).decode(string)


def splitlines(string: str) -> list[str]:
"""Split a string into lines at universal newlines."""
if not string:
return []
return (
normalize_newlines(string) # Normalize newlines to LF
.rstrip("\n") # Remove trailing newline
.split("\n") # Split into lines
)


class TextDocument:
"""Store & handle a multi-line text document, either as a string or list of lines"""

Expand Down Expand Up @@ -65,7 +81,7 @@ def encoded_string(self) -> bytes:
def lines(self) -> TextLines:
"""Return the document as a list of lines converting and caching if necessary"""
if self._lines is None:
self._lines = tuple((self._string or "").splitlines())
self._lines = tuple(splitlines(self._string or ""))
return self._lines

@property
Expand Down
Loading