fix(term): ansi: account for some wrap edge cases (#59)

* fix(term): ansi: account for some wrap edge cases Properly count escape codes, better handling of breakpoints, and only break word/breakpoint when necessary. Fixes: #58 * Update wrap.go * Update wrap.go * Update wrap.go * Update wrap.go * wip * fix * fix: preserve spaces in ansi strings and account for breakpoints Breakpoints are now respected and wrapped properly. Support non-breaking spaces * Update wrap.go
charmbracelet · Apr 8, 2024 · c2c5fe8 · c2c5fe8
1 parent 07f093a
commit c2c5fe8
Show file tree

Hide file tree

Showing 2 changed files with 119 additions and 48 deletions.
diff --git a/exp/term/ansi/wrap.go b/exp/term/ansi/wrap.go
@@ -9,6 +9,9 @@ import (
 	"github.com/rivo/uniseg"
 )
 
+// nbsp is a non-breaking space
+const nbsp = 0xA0
+
 // Hardwrap wraps a string or a block of text to a given line length, breaking
 // word boundaries. This will preserve ANSI escape codes and will account for
 // wide-characters in the string.
@@ -105,14 +108,13 @@ func Hardwrap(s string, limit int, preserveSpace bool) string {
 // The breakpoints string is a list of characters that are considered
 // breakpoints for word wrapping. A hyphen (-) is always considered a
 // breakpoint.
+//
+// Note: breakpoints must be a string of 1-cell wide rune characters.
 func Wordwrap(s string, limit int, breakpoints string) string {
 	if limit < 1 {
 		return s
 	}
 
-	// Add a hyphen to the breakpoints
-	breakpoints += "-"
-
 	var (
 		cluster  []byte
 		buf      bytes.Buffer
@@ -135,6 +137,7 @@ func Wordwrap(s string, limit int, breakpoints string) string {
 		if word.Len() == 0 {
 			return
 		}
+
 		addSpace()
 		curWidth += wordLen
 		buf.Write(word.Bytes())
@@ -160,7 +163,7 @@ func Wordwrap(s string, limit int, breakpoints string) string {
 				i += len(cluster)
 
 				r, _ := utf8.DecodeRune(cluster)
-				if r != utf8.RuneError && unicode.IsSpace(r) {
+				if r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp {
 					addWord()
 					space.WriteRune(r)
 				} else if bytes.ContainsAny(cluster, breakpoints) {
@@ -199,6 +202,8 @@ func Wordwrap(s string, limit int, breakpoints string) string {
 			case unicode.IsSpace(r):
 				addWord()
 				space.WriteByte(b[i])
+			case r == '-':
+				fallthrough
 			case runeContainsAny(r, breakpoints):
 				addSpace()
 				addWord()
@@ -234,21 +239,20 @@ func Wordwrap(s string, limit int, breakpoints string) string {
 // account for wide-characters in the string. The breakpoints string is a list
 // of characters that are considered breakpoints for word wrapping. A hyphen
 // (-) is always considered a breakpoint.
+//
+// Note: breakpoints must be a string of 1-cell wide rune characters.
 func Wrap(s string, limit int, breakpoints string) string {
 	if limit < 1 {
 		return s
 	}
 
-	// Add a hyphen to the breakpoints
-	breakpoints += "-"
-
 	var (
 		cluster  []byte
 		buf      bytes.Buffer
 		word     bytes.Buffer
 		space    bytes.Buffer
-		curWidth int
-		wordLen  int
+		curWidth int // written width of the line
+		wordLen  int // word buffer len without ANSI escape codes
 		gstate   = -1
 		pstate   = parser.GroundState // initial state
 		b        = []byte(s)
@@ -264,6 +268,7 @@ func Wrap(s string, limit int, breakpoints string) string {
 		if word.Len() == 0 {
 			return
 		}
+
 		addSpace()
 		curWidth += wordLen
 		buf.Write(word.Bytes())
@@ -289,44 +294,47 @@ func Wrap(s string, limit int, breakpoints string) string {
 				i += len(cluster)
 
 				r, _ := utf8.DecodeRune(cluster)
-				if r != utf8.RuneError && unicode.IsSpace(r) {
+				switch {
+				case r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp: // nbsp is a non-breaking space
 					addWord()
 					space.WriteRune(r)
-				} else if bytes.ContainsAny(cluster, breakpoints) {
+				case bytes.ContainsAny(cluster, breakpoints):
 					addSpace()
-					addWord()
-					buf.Write(cluster)
-					curWidth++
-				} else {
+					if curWidth+wordLen+width > limit {
+						word.Write(cluster)
+						wordLen += width
+					} else {
+						addWord()
+						buf.Write(cluster)
+						curWidth += width
+					}
+				default:
 					if wordLen+width > limit {
+						// Hardwrap the word if it's too long
 						addWord()
-						addNewline()
 					}
+
 					word.Write(cluster)
 					wordLen += width
-					if curWidth+space.Len()+wordLen > limit &&
-						wordLen < limit {
+
+					if curWidth+wordLen+space.Len() > limit {
 						addNewline()
-					} else if curWidth+wordLen >= limit {
-						addWord()
-						if i < len(b)-1 {
-							addNewline()
-						}
 					}
 				}
 
 				pstate = parser.GroundState
 				continue
 			}
+
 			fallthrough
 		case parser.ExecuteAction:
-			r := rune(b[i])
-			switch {
+			switch r := rune(b[i]); {
 			case r == '\n':
 				if wordLen == 0 {
 					if curWidth+space.Len() > limit {
 						curWidth = 0
 					} else {
+						// preserve whitespaces
 						buf.Write(space.Bytes())
 					}
 					space.Reset()
@@ -336,27 +344,32 @@ func Wrap(s string, limit int, breakpoints string) string {
 				addNewline()
 			case unicode.IsSpace(r):
 				addWord()
-				space.WriteByte(b[i])
+				space.WriteRune(r)
+			case r == '-':
+				fallthrough
 			case runeContainsAny(r, breakpoints):
 				addSpace()
-				addWord()
-				buf.WriteByte(b[i])
-				curWidth++
-			default:
-				if wordLen+1 > limit {
+				if curWidth+wordLen >= limit {
+					// We can't fit the breakpoint in the current line, treat
+					// it as part of the word.
+					word.WriteRune(r)
+					wordLen++
+				} else {
 					addWord()
-					addNewline()
+					buf.WriteRune(r)
+					curWidth++
 				}
-				word.WriteByte(b[i])
+			default:
+				word.WriteRune(r)
 				wordLen++
-				if curWidth+space.Len()+wordLen > limit &&
-					wordLen < limit {
-					addNewline()
-				} else if curWidth+wordLen >= limit {
+
+				if wordLen == limit {
+					// Hardwrap the word if it's too long
 					addWord()
-					if i < len(b)-1 {
-						addNewline()
-					}
+				}
+
+				if curWidth+wordLen+space.Len() > limit {
+					addNewline()
 				}
 			}
 
@@ -371,7 +384,14 @@ func Wrap(s string, limit int, breakpoints string) string {
 		i++
 	}
 
-	addWord()
+	if word.Len() != 0 {
+		// Preserve ANSI wrapped spaces at the end of string
+		if curWidth+space.Len() > limit {
+			buf.WriteByte('\n')
+		}
+		addSpace()
+	}
+	buf.Write(word.Bytes())
 
 	return buf.String()
 }

diff --git a/exp/term/ansi/wrap_test.go b/exp/term/ansi/wrap_test.go
@@ -34,7 +34,7 @@ var cases = []struct {
 	{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\สวัสดีสวัสดี\x1b]8;;\x1b\\", 8, "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\", false},
 }
 
-func TestWrap(t *testing.T) {
+func TestHardwrap(t *testing.T) {
 	for i, tt := range cases {
 		t.Run(tt.name, func(t *testing.T) {
 			if got := ansi.Hardwrap(tt.input, tt.limit, tt.preserveSpace); got != tt.expected {
@@ -92,7 +92,7 @@ func TestWrapWordwrap(t *testing.T) {
 	}
 }
 
-var smartWrapCases = []struct {
+var wrapCases = []struct {
 	name     string
 	input    string
 	expected string
@@ -128,6 +128,12 @@ var smartWrapCases = []struct {
 		expected: "\x1B[38;2;249;38;114ma really\nlong\nstring\x1B[0m",
 		width:    10,
 	},
+	{
+		name:     "long style nbsp",
+		input:    "\x1B[38;2;249;38;114ma really\u00a0long string\x1B[0m",
+		expected: "\x1b[38;2;249;38;114ma\nreally\u00a0lon\ng string\x1b[0m",
+		width:    10,
+	},
 	{
 		name:     "longer",
 		input:    "the quick brown foxxxxxxxxxxxxxxxx jumped over the lazy dog.",
@@ -143,29 +149,74 @@ var smartWrapCases = []struct {
 	{
 		name:     "long input",
 		input:    "Rotated keys for a-good-offensive-cheat-code-incorporated/animal-like-law-on-the-rocks.",
-		expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/animal-like-law-on\n-the-rocks.",
+		expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/animal-like-law-\non-the-rocks.",
 		width:    76,
 	},
 	{
 		name:     "long input2",
 		input:    "Rotated keys for a-good-offensive-cheat-code-incorporated/crypto-line-operating-system.",
-		expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/crypto-line-operat\ning-system.",
+		expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/crypto-line-\noperating-system.",
 		width:    76,
 	},
+	{
+		name:     "hyphen breakpoint",
+		input:    "a-good-offensive-cheat-code",
+		expected: "a-good-\noffensive-\ncheat-code",
+		width:    10,
+	},
+	{
+		name:     "exact",
+		input:    "\x1b[91mfoo\x1b[0",
+		expected: "\x1b[91mfoo\x1b[0",
+		width:    3,
+	},
+	{
+		// XXX: Should we preserve spaces on text wrapping?
+		name:     "extra space",
+		input:    "foo ",
+		expected: "foo",
+		width:    3,
+	},
+	{
+		name:     "extra space style",
+		input:    "\x1b[mfoo \x1b[m",
+		expected: "\x1b[mfoo\n \x1b[m",
+		width:    3,
+	},
 	{
 		name:     "paragraph with styles",
 		input:    "Lorem ipsum dolor \x1b[1msit\x1b[m amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. \x1b[31mUt enim\x1b[m ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea \x1b[38;5;200mcommodo consequat\x1b[m. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. \x1b[1;2;33mExcepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\x1b[m",
 		expected: "Lorem ipsum dolor \x1b[1msit\x1b[m amet,\nconsectetur adipiscing elit,\nsed do eiusmod tempor\nincididunt ut labore et dolore\nmagna aliqua. \x1b[31mUt enim\x1b[m ad minim\nveniam, quis nostrud\nexercitation ullamco laboris\nnisi ut aliquip ex ea \x1b[38;5;200mcommodo\nconsequat\x1b[m. Duis aute irure\ndolor in reprehenderit in\nvoluptate velit esse cillum\ndolore eu fugiat nulla\npariatur. \x1b[1;2;33mExcepteur sint\noccaecat cupidatat non\nproident, sunt in culpa qui\nofficia deserunt mollit anim\nid est laborum.\x1b[m",
 		width:    30,
 	},
+	{"hyphen break", "foo-bar", "foo-\nbar", 5},
+	{"double space", "f  bar foobaz", "f  bar\nfoobaz", 6},
+	{"passthrough", "foobar\n ", "foobar\n ", 0},
+	{"pass", "foo", "foo", 3},
+	{"toolong", "foobarfoo", "foob\narfo\no", 4},
+	{"white space", "foo bar foo", "foo\nbar\nfoo", 4},
+	{"broken_at_spaces", "foo bars foobars", "foo\nbars\nfoob\nars", 4},
+	{"hyphen", "foob-foobar", "foob\n-foo\nbar", 4},
+	{"wide_emoji_breakpoint", "foo🫧 foobar", "foo\n🫧\nfoob\nar", 4},
+	{"space_breakpoint", "foo --bar", "foo --bar", 9},
+	{"simple", "foo bars foobars", "foo\nbars\nfoob\nars", 4},
+	{"limit", "foo bar", "foo\nbar", 5},
+	{"remove white spaces", "foo    \nb   ar   ", "foo\nb\nar", 4},
+	{"white space trail width", "foo\nb\t a\n bar", "foo\nb\t a\n bar", 4},
+	{"explicit_line_break", "foo bar foo\n", "foo\nbar\nfoo\n", 4},
+	{"explicit_breaks", "\nfoo bar\n\n\nfoo\n", "\nfoo\nbar\n\n\nfoo\n", 4},
+	{"example", " This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* foo  \nbar    ", " This\nis a\nlist: \n\n\t* foo\n\t* bar\n\n\n\t* foo\nbar", 6},
+	{"style_code_dont_affect_length", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7},
+	{"style_code_dont_get_wrapped", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", "\x1b[38;2;249;38;114m(\x1b[0m\x1b[38;2;248;248;242mjust\nanother\ntest\x1b[38;2;249;38;114m)\x1b[0m", 7},
+	{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\ สวัสดีสวัสดี\x1b]8;;\x1b\\", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\", 8},
 }
 
-func TestSmartWrap(t *testing.T) {
-	for i, tc := range smartWrapCases {
+func TestWrap(t *testing.T) {
+	for i, tc := range wrapCases {
 		t.Run(tc.name, func(t *testing.T) {
 			output := ansi.Wrap(tc.input, tc.width, "")
 			if output != tc.expected {
-				t.Errorf("case %d, expected %q, got %q", i+1, tc.expected, output)
+				t.Errorf("case %d, input %q, expected %q, got %q", i+1, tc.input, tc.expected, output)
 			}
 		})
 	}