From 5b3147b8bb415192a53a05cb2312b840b79abf67 Mon Sep 17 00:00:00 2001
From: Ayman Bagabas <ayman.bagabas@gmail.com>
Date: Fri, 5 Apr 2024 14:46:18 +0300
Subject: [PATCH] fix: preserve spaces in ansi strings and account for
 breakpoints

Breakpoints are now respected and wrapped properly.
Support non-breaking spaces
---
 exp/term/ansi/wrap.go      | 106 +++++++++++++++++++++----------------
 exp/term/ansi/wrap_test.go |  14 +++--
 2 files changed, 71 insertions(+), 49 deletions(-)

diff --git a/exp/term/ansi/wrap.go b/exp/term/ansi/wrap.go
index c3c39132..c83e3483 100644
--- a/exp/term/ansi/wrap.go
+++ b/exp/term/ansi/wrap.go
@@ -9,6 +9,9 @@ import (
 	"github.com/rivo/uniseg"
 )
 
+// nbsp is a non-breaking space
+const nbsp = 0xA0
+
 // Hardwrap wraps a string or a block of text to a given line length, breaking
 // word boundaries. This will preserve ANSI escape codes and will account for
 // wide-characters in the string.
@@ -106,15 +109,12 @@ func Hardwrap(s string, limit int, preserveSpace bool) string {
 // breakpoints for word wrapping. A hyphen (-) is always considered a
 // breakpoint.
 //
-// Note: breakpoints must be a string of 1-cell wide rune character.
+// Note: breakpoints must be a string of 1-cell wide rune characters.
 func Wordwrap(s string, limit int, breakpoints string) string {
 	if limit < 1 {
 		return s
 	}
 
-	// Add a hyphen to the breakpoints
-	breakpoints += "-"
-
 	var (
 		cluster  []byte
 		buf      bytes.Buffer
@@ -163,9 +163,14 @@ func Wordwrap(s string, limit int, breakpoints string) string {
 				i += len(cluster)
 
 				r, _ := utf8.DecodeRune(cluster)
-				if r != utf8.RuneError && unicode.IsSpace(r) {
+				if r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp {
 					addWord()
 					space.WriteRune(r)
+				} else if bytes.ContainsAny(cluster, breakpoints) {
+					addSpace()
+					addWord()
+					buf.Write(cluster)
+					curWidth++
 				} else {
 					word.Write(cluster)
 					wordLen += width
@@ -197,6 +202,8 @@ func Wordwrap(s string, limit int, breakpoints string) string {
 			case unicode.IsSpace(r):
 				addWord()
 				space.WriteByte(b[i])
+			case r == '-':
+				fallthrough
 			case runeContainsAny(r, breakpoints):
 				addSpace()
 				addWord()
@@ -233,34 +240,24 @@ func Wordwrap(s string, limit int, breakpoints string) string {
 // of characters that are considered breakpoints for word wrapping. A hyphen
 // (-) is always considered a breakpoint.
 //
-// Note: breakpoints must be a string of 1-cell wide rune character.
+// Note: breakpoints must be a string of 1-cell wide rune characters.
 func Wrap(s string, limit int, breakpoints string) string {
 	if limit < 1 {
 		return s
 	}
 
-	// Add a hyphen to the breakpoints
-	breakpoints += "-"
-
 	var (
 		cluster  []byte
 		buf      bytes.Buffer
 		word     bytes.Buffer
 		space    bytes.Buffer
-		bpoint   bytes.Buffer
-		curWidth int
-		wordLen  int
+		curWidth int // written width of the line
+		wordLen  int // word buffer len without ANSI escape codes
 		gstate   = -1
 		pstate   = parser.GroundState // initial state
 		b        = []byte(s)
 	)
 
-	addBpoint := func() {
-		curWidth += bpoint.Len()
-		buf.Write(bpoint.Bytes())
-		bpoint.Reset()
-	}
-
 	addSpace := func() {
 		curWidth += space.Len()
 		buf.Write(space.Bytes())
@@ -268,7 +265,6 @@ func Wrap(s string, limit int, breakpoints string) string {
 	}
 
 	addWord := func() {
-		addBpoint()
 		if word.Len() == 0 {
 			return
 		}
@@ -298,23 +294,30 @@ func Wrap(s string, limit int, breakpoints string) string {
 				i += len(cluster)
 
 				r, _ := utf8.DecodeRune(cluster)
-				if r != utf8.RuneError && unicode.IsSpace(r) {
+				switch {
+				case r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp: // nbsp is a non-breaking space
 					addWord()
 					space.WriteRune(r)
-				} else if bytes.ContainsAny(cluster, breakpoints) {
+				case bytes.ContainsAny(cluster, breakpoints):
 					addSpace()
-					addWord()
-					buf.Write(cluster)
-					curWidth++
-				} else {
+					if curWidth+wordLen+width > limit {
+						word.Write(cluster)
+						wordLen += width
+					} else {
+						addWord()
+						buf.Write(cluster)
+						curWidth += width
+					}
+				default:
 					if wordLen+width > limit {
-						// If the word is longer than the limit, we break it
+						// Hardwrap the word if it's too long
 						addWord()
 					}
+
 					word.Write(cluster)
 					wordLen += width
-					if curWidth+space.Len()+wordLen+bpoint.Len() > limit {
-						addBpoint()
+
+					if curWidth+wordLen+space.Len() > limit {
 						addNewline()
 					}
 				}
@@ -322,15 +325,16 @@ func Wrap(s string, limit int, breakpoints string) string {
 				pstate = parser.GroundState
 				continue
 			}
+
 			fallthrough
 		case parser.ExecuteAction:
-			r := rune(b[i])
-			switch {
+			switch r := rune(b[i]); {
 			case r == '\n':
 				if wordLen == 0 {
 					if curWidth+space.Len() > limit {
 						curWidth = 0
 					} else {
+						// preserve whitespaces
 						buf.Write(space.Bytes())
 					}
 					space.Reset()
@@ -340,26 +344,31 @@ func Wrap(s string, limit int, breakpoints string) string {
 				addNewline()
 			case unicode.IsSpace(r):
 				addWord()
-				space.WriteByte(b[i])
+				space.WriteRune(r)
+			case r == '-':
+				fallthrough
 			case runeContainsAny(r, breakpoints):
 				addSpace()
-				addWord()
-				if curWidth+1 <= limit {
-					bpoint.WriteByte(b[i])
-					break
+				if curWidth+wordLen+1 > limit {
+					// We can't fit the breakpoint in the current line, treat
+					// it as part of the word.
+					word.WriteRune(r)
+					wordLen++
+				} else {
+					addWord()
+					buf.WriteRune(r)
+					curWidth++
 				}
-				// If we can't fit the breakpoint in the current line, we treat
-				// it as a word character.
-				fallthrough
 			default:
-				if wordLen >= limit {
-					// If the word is longer than the limit, we break it
+				word.WriteRune(r)
+				wordLen++
+
+				if wordLen == limit {
+					// Hardwrap the word if it's too long
 					addWord()
 				}
-				word.WriteByte(b[i])
-				wordLen++
-				if curWidth+space.Len()+wordLen+bpoint.Len() > limit {
-					addBpoint()
+
+				if curWidth+wordLen+space.Len() > limit {
 					addNewline()
 				}
 			}
@@ -375,7 +384,14 @@ func Wrap(s string, limit int, breakpoints string) string {
 		i++
 	}
 
-	addWord()
+	if word.Len() != 0 {
+		// Preserve ANSI wrapped spaces at the end of string
+		if curWidth+space.Len() > limit {
+			buf.WriteByte('\n')
+		}
+		addSpace()
+	}
+	buf.Write(word.Bytes())
 
 	return buf.String()
 }
diff --git a/exp/term/ansi/wrap_test.go b/exp/term/ansi/wrap_test.go
index 9a9a361c..026c3b5f 100644
--- a/exp/term/ansi/wrap_test.go
+++ b/exp/term/ansi/wrap_test.go
@@ -128,6 +128,12 @@ var wrapCases = []struct {
 		expected: "\x1B[38;2;249;38;114ma really\nlong\nstring\x1B[0m",
 		width:    10,
 	},
+	{
+		name:     "long style nbsp",
+		input:    "\x1B[38;2;249;38;114ma really\u00a0long string\x1B[0m",
+		expected: "\x1b[38;2;249;38;114ma\nreally\u00a0lon\ng string\x1b[0m",
+		width:    10,
+	},
 	{
 		name:     "longer",
 		input:    "the quick brown foxxxxxxxxxxxxxxxx jumped over the lazy dog.",
@@ -143,7 +149,7 @@ var wrapCases = []struct {
 	{
 		name:     "long input",
 		input:    "Rotated keys for a-good-offensive-cheat-code-incorporated/animal-like-law-on-the-rocks.",
-		expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/animal-like-law-on\n-the-rocks.",
+		expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/animal-like-law-\non-the-rocks.",
 		width:    76,
 	},
 	{
@@ -165,16 +171,16 @@ var wrapCases = []struct {
 		width:    3,
 	},
 	{
+		// XXX: Should we preserve spaces on text wrapping?
 		name:     "extra space",
 		input:    "foo ",
 		expected: "foo",
 		width:    3,
 	},
 	{
-		// FIXME: invalid expected
 		name:     "extra space style",
 		input:    "\x1b[mfoo \x1b[m",
-		expected: "\x1b[mfoo \x1b[m",
+		expected: "\x1b[mfoo\n \x1b[m",
 		width:    3,
 	},
 	{
@@ -210,7 +216,7 @@ func TestWrap(t *testing.T) {
 		t.Run(tc.name, func(t *testing.T) {
 			output := ansi.Wrap(tc.input, tc.width, "")
 			if output != tc.expected {
-				t.Errorf("case %d, expected %q, got %q", i+1, tc.expected, output)
+				t.Errorf("case %d, input %q, expected %q, got %q", i+1, tc.input, tc.expected, output)
 			}
 		})
 	}