Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(term): ansi: account for some wrap edge cases #59

Merged
merged 9 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 61 additions & 41 deletions exp/term/ansi/wrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ import (
"github.com/rivo/uniseg"
)

// nbsp is a non-breaking space
const nbsp = 0xA0

// Hardwrap wraps a string or a block of text to a given line length, breaking
// word boundaries. This will preserve ANSI escape codes and will account for
// wide-characters in the string.
Expand Down Expand Up @@ -105,14 +108,13 @@ func Hardwrap(s string, limit int, preserveSpace bool) string {
// The breakpoints string is a list of characters that are considered
// breakpoints for word wrapping. A hyphen (-) is always considered a
// breakpoint.
//
// Note: breakpoints must be a string of 1-cell wide rune characters.
func Wordwrap(s string, limit int, breakpoints string) string {
if limit < 1 {
return s
}

// Add a hyphen to the breakpoints
breakpoints += "-"

var (
cluster []byte
buf bytes.Buffer
Expand All @@ -135,6 +137,7 @@ func Wordwrap(s string, limit int, breakpoints string) string {
if word.Len() == 0 {
return
}

addSpace()
curWidth += wordLen
buf.Write(word.Bytes())
Expand All @@ -160,7 +163,7 @@ func Wordwrap(s string, limit int, breakpoints string) string {
i += len(cluster)

r, _ := utf8.DecodeRune(cluster)
if r != utf8.RuneError && unicode.IsSpace(r) {
if r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp {
addWord()
space.WriteRune(r)
} else if bytes.ContainsAny(cluster, breakpoints) {
Expand Down Expand Up @@ -199,6 +202,8 @@ func Wordwrap(s string, limit int, breakpoints string) string {
case unicode.IsSpace(r):
addWord()
space.WriteByte(b[i])
case r == '-':

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would there be value in having a small helper function IsDash to be consistent to how it is done for IsSpace?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps, if we add support to other unicode dash characters 🤔

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something to consider, certainly nothing to stop you merging once you do decide.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking more about this, there are many variants of dashes defined in unicode. People who wish to use custom breakpoints can do so using the breakpoints parameter

fallthrough
case runeContainsAny(r, breakpoints):
addSpace()
addWord()
Expand Down Expand Up @@ -234,21 +239,20 @@ func Wordwrap(s string, limit int, breakpoints string) string {
// account for wide-characters in the string. The breakpoints string is a list
// of characters that are considered breakpoints for word wrapping. A hyphen
// (-) is always considered a breakpoint.
//
// Note: breakpoints must be a string of 1-cell wide rune characters.
func Wrap(s string, limit int, breakpoints string) string {
if limit < 1 {
return s
}

// Add a hyphen to the breakpoints
breakpoints += "-"

var (
cluster []byte
buf bytes.Buffer
word bytes.Buffer
space bytes.Buffer
curWidth int
wordLen int
curWidth int // written width of the line
wordLen int // word buffer len without ANSI escape codes
gstate = -1
pstate = parser.GroundState // initial state
b = []byte(s)
Expand All @@ -264,6 +268,7 @@ func Wrap(s string, limit int, breakpoints string) string {
if word.Len() == 0 {
return
}

addSpace()
curWidth += wordLen
buf.Write(word.Bytes())
Expand All @@ -289,44 +294,47 @@ func Wrap(s string, limit int, breakpoints string) string {
i += len(cluster)

r, _ := utf8.DecodeRune(cluster)
if r != utf8.RuneError && unicode.IsSpace(r) {
switch {
case r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp: // nbsp is a non-breaking space
addWord()
space.WriteRune(r)
} else if bytes.ContainsAny(cluster, breakpoints) {
case bytes.ContainsAny(cluster, breakpoints):
addSpace()
addWord()
buf.Write(cluster)
curWidth++
} else {
if curWidth+wordLen+width > limit {
word.Write(cluster)
wordLen += width
} else {
addWord()
buf.Write(cluster)
curWidth += width
}
default:
if wordLen+width > limit {
// Hardwrap the word if it's too long
addWord()
addNewline()
}

word.Write(cluster)
wordLen += width
if curWidth+space.Len()+wordLen > limit &&
wordLen < limit {

if curWidth+wordLen+space.Len() > limit {
addNewline()
} else if curWidth+wordLen >= limit {
addWord()
if i < len(b)-1 {
addNewline()
}
}
}

pstate = parser.GroundState
continue
}

fallthrough
case parser.ExecuteAction:
r := rune(b[i])
switch {
switch r := rune(b[i]); {
case r == '\n':
if wordLen == 0 {
if curWidth+space.Len() > limit {
curWidth = 0
} else {
// preserve whitespaces
buf.Write(space.Bytes())
}
space.Reset()
Expand All @@ -336,27 +344,32 @@ func Wrap(s string, limit int, breakpoints string) string {
addNewline()
case unicode.IsSpace(r):
addWord()
space.WriteByte(b[i])
space.WriteRune(r)
case r == '-':
fallthrough
case runeContainsAny(r, breakpoints):
addSpace()
addWord()
buf.WriteByte(b[i])
curWidth++
default:
if wordLen+1 > limit {
if curWidth+wordLen >= limit {
// We can't fit the breakpoint in the current line, treat
// it as part of the word.
word.WriteRune(r)
wordLen++
} else {
addWord()
addNewline()
buf.WriteRune(r)
curWidth++
}
word.WriteByte(b[i])
default:
word.WriteRune(r)
wordLen++
if curWidth+space.Len()+wordLen > limit &&
wordLen < limit {
addNewline()
} else if curWidth+wordLen >= limit {

if wordLen == limit {
// Hardwrap the word if it's too long
addWord()
if i < len(b)-1 {
addNewline()
}
}

if curWidth+wordLen+space.Len() > limit {
addNewline()
}
}

Expand All @@ -371,7 +384,14 @@ func Wrap(s string, limit int, breakpoints string) string {
i++
}

addWord()
if word.Len() != 0 {
// Preserve ANSI wrapped spaces at the end of string
if curWidth+space.Len() > limit {
buf.WriteByte('\n')
}
addSpace()
}
buf.Write(word.Bytes())

return buf.String()
}
Expand Down
65 changes: 58 additions & 7 deletions exp/term/ansi/wrap_test.go
aymanbagabas marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ var cases = []struct {
{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\สวัสดีสวัสดี\x1b]8;;\x1b\\", 8, "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\", false},
}

func TestWrap(t *testing.T) {
func TestHardwrap(t *testing.T) {
for i, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
if got := ansi.Hardwrap(tt.input, tt.limit, tt.preserveSpace); got != tt.expected {
Expand Down Expand Up @@ -92,7 +92,7 @@ func TestWrapWordwrap(t *testing.T) {
}
}

var smartWrapCases = []struct {
var wrapCases = []struct {
name string
input string
expected string
Expand Down Expand Up @@ -128,6 +128,12 @@ var smartWrapCases = []struct {
expected: "\x1B[38;2;249;38;114ma really\nlong\nstring\x1B[0m",
width: 10,
},
{
name: "long style nbsp",
input: "\x1B[38;2;249;38;114ma really\u00a0long string\x1B[0m",
expected: "\x1b[38;2;249;38;114ma\nreally\u00a0lon\ng string\x1b[0m",
width: 10,
},
{
name: "longer",
input: "the quick brown foxxxxxxxxxxxxxxxx jumped over the lazy dog.",
Expand All @@ -143,29 +149,74 @@ var smartWrapCases = []struct {
{
name: "long input",
input: "Rotated keys for a-good-offensive-cheat-code-incorporated/animal-like-law-on-the-rocks.",
expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/animal-like-law-on\n-the-rocks.",
expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/animal-like-law-\non-the-rocks.",
width: 76,
},
{
name: "long input2",
input: "Rotated keys for a-good-offensive-cheat-code-incorporated/crypto-line-operating-system.",
expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/crypto-line-operat\ning-system.",
aymanbagabas marked this conversation as resolved.
Show resolved Hide resolved
expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/crypto-line-\noperating-system.",
width: 76,
},
{
name: "hyphen breakpoint",
input: "a-good-offensive-cheat-code",
expected: "a-good-\noffensive-\ncheat-code",
width: 10,
},
{
name: "exact",
input: "\x1b[91mfoo\x1b[0",
expected: "\x1b[91mfoo\x1b[0",
width: 3,
},
{
// XXX: Should we preserve spaces on text wrapping?
name: "extra space",
input: "foo ",
expected: "foo",
width: 3,
},
{
name: "extra space style",
input: "\x1b[mfoo \x1b[m",
expected: "\x1b[mfoo\n \x1b[m",
width: 3,
},
{
name: "paragraph with styles",
input: "Lorem ipsum dolor \x1b[1msit\x1b[m amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. \x1b[31mUt enim\x1b[m ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea \x1b[38;5;200mcommodo consequat\x1b[m. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. \x1b[1;2;33mExcepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\x1b[m",
expected: "Lorem ipsum dolor \x1b[1msit\x1b[m amet,\nconsectetur adipiscing elit,\nsed do eiusmod tempor\nincididunt ut labore et dolore\nmagna aliqua. \x1b[31mUt enim\x1b[m ad minim\nveniam, quis nostrud\nexercitation ullamco laboris\nnisi ut aliquip ex ea \x1b[38;5;200mcommodo\nconsequat\x1b[m. Duis aute irure\ndolor in reprehenderit in\nvoluptate velit esse cillum\ndolore eu fugiat nulla\npariatur. \x1b[1;2;33mExcepteur sint\noccaecat cupidatat non\nproident, sunt in culpa qui\nofficia deserunt mollit anim\nid est laborum.\x1b[m",
width: 30,
},
{"hyphen break", "foo-bar", "foo-\nbar", 5},
{"double space", "f bar foobaz", "f bar\nfoobaz", 6},
{"passthrough", "foobar\n ", "foobar\n ", 0},
{"pass", "foo", "foo", 3},
{"toolong", "foobarfoo", "foob\narfo\no", 4},
{"white space", "foo bar foo", "foo\nbar\nfoo", 4},
{"broken_at_spaces", "foo bars foobars", "foo\nbars\nfoob\nars", 4},
{"hyphen", "foob-foobar", "foob\n-foo\nbar", 4},
{"wide_emoji_breakpoint", "foo🫧 foobar", "foo\n🫧\nfoob\nar", 4},
{"space_breakpoint", "foo --bar", "foo --bar", 9},
{"simple", "foo bars foobars", "foo\nbars\nfoob\nars", 4},
{"limit", "foo bar", "foo\nbar", 5},
{"remove white spaces", "foo \nb ar ", "foo\nb\nar", 4},
{"white space trail width", "foo\nb\t a\n bar", "foo\nb\t a\n bar", 4},
{"explicit_line_break", "foo bar foo\n", "foo\nbar\nfoo\n", 4},
{"explicit_breaks", "\nfoo bar\n\n\nfoo\n", "\nfoo\nbar\n\n\nfoo\n", 4},
{"example", " This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* foo \nbar ", " This\nis a\nlist: \n\n\t* foo\n\t* bar\n\n\n\t* foo\nbar", 6},
{"style_code_dont_affect_length", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7},
{"style_code_dont_get_wrapped", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", "\x1b[38;2;249;38;114m(\x1b[0m\x1b[38;2;248;248;242mjust\nanother\ntest\x1b[38;2;249;38;114m)\x1b[0m", 7},
{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\ สวัสดีสวัสดี\x1b]8;;\x1b\\", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\", 8},
}

func TestSmartWrap(t *testing.T) {
for i, tc := range smartWrapCases {
func TestWrap(t *testing.T) {
for i, tc := range wrapCases {
t.Run(tc.name, func(t *testing.T) {
output := ansi.Wrap(tc.input, tc.width, "")
if output != tc.expected {
t.Errorf("case %d, expected %q, got %q", i+1, tc.expected, output)
t.Errorf("case %d, input %q, expected %q, got %q", i+1, tc.input, tc.expected, output)
}
})
}
Expand Down
Loading