Skip to content

Commit

Permalink
Merge pull request #87 from akaihola/newline-splitting
Browse files Browse the repository at this point in the history
Split newlines only at Python universal newlines (LF, CRLF, CR)
  • Loading branch information
akaihola authored Jan 7, 2025
2 parents 5d3dbe4 + 3035dcc commit b859b19
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 1 deletion.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Removed

Fixed
-----
- Only split input files at Python's universal newlines (LF, CRLF, CR), not on more
exotic newline sequences. This fixes some edge cases in Darker.


2.1.0_ - 2024-11-19
Expand Down
6 changes: 6 additions & 0 deletions src/darkgraylib/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,12 @@ def test_textdocument_encoded_string(encoding, newline, expect):
dict(
doc=TextDocument(string="zéro\r\nun\r\n", newline="\r\n"), expect=("zéro", "un")
),
dict(
doc=TextDocument(
string="# coding: iso-8859-5\n# б\x85б\x86\n", encoding="iso-8859-5"
),
expect=("# coding: iso-8859-5", "# б\x85б\x86"),
),
)
def test_textdocument_lines(doc, expect):
"""TextDocument.lines is correct after parsing a string with different newlines"""
Expand Down
18 changes: 17 additions & 1 deletion src/darkgraylib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,22 @@ def detect_newline(string: str) -> str:
return "\n"


def normalize_newlines(string: str) -> str:
"""Normalize newlines in a string to LF"""
return io.IncrementalNewlineDecoder(None, True).decode(string)


def splitlines(string: str) -> list[str]:
"""Split a string into lines at universal newlines."""
if not string:
return []
return (
normalize_newlines(string) # Normalize newlines to LF
.rstrip("\n") # Remove trailing newline
.split("\n") # Split into lines
)


class TextDocument:
"""Store & handle a multi-line text document, either as a string or list of lines"""

Expand Down Expand Up @@ -65,7 +81,7 @@ def encoded_string(self) -> bytes:
def lines(self) -> TextLines:
"""Return the document as a list of lines converting and caching if necessary"""
if self._lines is None:
self._lines = tuple((self._string or "").splitlines())
self._lines = tuple(splitlines(self._string or ""))
return self._lines

@property
Expand Down

0 comments on commit b859b19

Please sign in to comment.