Skip to content

Commit

Permalink
fix(term): ansi: account for some wrap edge cases (#59)
Browse files Browse the repository at this point in the history
* fix(term): ansi: account for some wrap edge cases

Properly count escape codes, better handling of breakpoints, and only
break word/breakpoint when necessary.

Fixes: #58

* Update wrap.go

* Update wrap.go

* Update wrap.go

* Update wrap.go

* wip

* fix

* fix: preserve spaces in ansi strings and account for breakpoints

Breakpoints are now respected and wrapped properly.
Support non-breaking spaces

* Update wrap.go
  • Loading branch information
aymanbagabas authored Apr 8, 2024
1 parent 07f093a commit c2c5fe8
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 48 deletions.
102 changes: 61 additions & 41 deletions exp/term/ansi/wrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ import (
"github.com/rivo/uniseg"
)

// nbsp is a non-breaking space
const nbsp = 0xA0

// Hardwrap wraps a string or a block of text to a given line length, breaking
// word boundaries. This will preserve ANSI escape codes and will account for
// wide-characters in the string.
Expand Down Expand Up @@ -105,14 +108,13 @@ func Hardwrap(s string, limit int, preserveSpace bool) string {
// The breakpoints string is a list of characters that are considered
// breakpoints for word wrapping. A hyphen (-) is always considered a
// breakpoint.
//
// Note: breakpoints must be a string of 1-cell wide rune characters.
func Wordwrap(s string, limit int, breakpoints string) string {
if limit < 1 {
return s
}

// Add a hyphen to the breakpoints
breakpoints += "-"

var (
cluster []byte
buf bytes.Buffer
Expand All @@ -135,6 +137,7 @@ func Wordwrap(s string, limit int, breakpoints string) string {
if word.Len() == 0 {
return
}

addSpace()
curWidth += wordLen
buf.Write(word.Bytes())
Expand All @@ -160,7 +163,7 @@ func Wordwrap(s string, limit int, breakpoints string) string {
i += len(cluster)

r, _ := utf8.DecodeRune(cluster)
if r != utf8.RuneError && unicode.IsSpace(r) {
if r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp {
addWord()
space.WriteRune(r)
} else if bytes.ContainsAny(cluster, breakpoints) {
Expand Down Expand Up @@ -199,6 +202,8 @@ func Wordwrap(s string, limit int, breakpoints string) string {
case unicode.IsSpace(r):
addWord()
space.WriteByte(b[i])
case r == '-':
fallthrough
case runeContainsAny(r, breakpoints):
addSpace()
addWord()
Expand Down Expand Up @@ -234,21 +239,20 @@ func Wordwrap(s string, limit int, breakpoints string) string {
// account for wide-characters in the string. The breakpoints string is a list
// of characters that are considered breakpoints for word wrapping. A hyphen
// (-) is always considered a breakpoint.
//
// Note: breakpoints must be a string of 1-cell wide rune characters.
func Wrap(s string, limit int, breakpoints string) string {
if limit < 1 {
return s
}

// Add a hyphen to the breakpoints
breakpoints += "-"

var (
cluster []byte
buf bytes.Buffer
word bytes.Buffer
space bytes.Buffer
curWidth int
wordLen int
curWidth int // written width of the line
wordLen int // word buffer len without ANSI escape codes
gstate = -1
pstate = parser.GroundState // initial state
b = []byte(s)
Expand All @@ -264,6 +268,7 @@ func Wrap(s string, limit int, breakpoints string) string {
if word.Len() == 0 {
return
}

addSpace()
curWidth += wordLen
buf.Write(word.Bytes())
Expand All @@ -289,44 +294,47 @@ func Wrap(s string, limit int, breakpoints string) string {
i += len(cluster)

r, _ := utf8.DecodeRune(cluster)
if r != utf8.RuneError && unicode.IsSpace(r) {
switch {
case r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp: // nbsp is a non-breaking space
addWord()
space.WriteRune(r)
} else if bytes.ContainsAny(cluster, breakpoints) {
case bytes.ContainsAny(cluster, breakpoints):
addSpace()
addWord()
buf.Write(cluster)
curWidth++
} else {
if curWidth+wordLen+width > limit {
word.Write(cluster)
wordLen += width
} else {
addWord()
buf.Write(cluster)
curWidth += width
}
default:
if wordLen+width > limit {
// Hardwrap the word if it's too long
addWord()
addNewline()
}

word.Write(cluster)
wordLen += width
if curWidth+space.Len()+wordLen > limit &&
wordLen < limit {

if curWidth+wordLen+space.Len() > limit {
addNewline()
} else if curWidth+wordLen >= limit {
addWord()
if i < len(b)-1 {
addNewline()
}
}
}

pstate = parser.GroundState
continue
}

fallthrough
case parser.ExecuteAction:
r := rune(b[i])
switch {
switch r := rune(b[i]); {
case r == '\n':
if wordLen == 0 {
if curWidth+space.Len() > limit {
curWidth = 0
} else {
// preserve whitespaces
buf.Write(space.Bytes())
}
space.Reset()
Expand All @@ -336,27 +344,32 @@ func Wrap(s string, limit int, breakpoints string) string {
addNewline()
case unicode.IsSpace(r):
addWord()
space.WriteByte(b[i])
space.WriteRune(r)
case r == '-':
fallthrough
case runeContainsAny(r, breakpoints):
addSpace()
addWord()
buf.WriteByte(b[i])
curWidth++
default:
if wordLen+1 > limit {
if curWidth+wordLen >= limit {
// We can't fit the breakpoint in the current line, treat
// it as part of the word.
word.WriteRune(r)
wordLen++
} else {
addWord()
addNewline()
buf.WriteRune(r)
curWidth++
}
word.WriteByte(b[i])
default:
word.WriteRune(r)
wordLen++
if curWidth+space.Len()+wordLen > limit &&
wordLen < limit {
addNewline()
} else if curWidth+wordLen >= limit {

if wordLen == limit {
// Hardwrap the word if it's too long
addWord()
if i < len(b)-1 {
addNewline()
}
}

if curWidth+wordLen+space.Len() > limit {
addNewline()
}
}

Expand All @@ -371,7 +384,14 @@ func Wrap(s string, limit int, breakpoints string) string {
i++
}

addWord()
if word.Len() != 0 {
// Preserve ANSI wrapped spaces at the end of string
if curWidth+space.Len() > limit {
buf.WriteByte('\n')
}
addSpace()
}
buf.Write(word.Bytes())

return buf.String()
}
Expand Down
65 changes: 58 additions & 7 deletions exp/term/ansi/wrap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ var cases = []struct {
{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\สวัสดีสวัสดี\x1b]8;;\x1b\\", 8, "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\", false},
}

func TestWrap(t *testing.T) {
func TestHardwrap(t *testing.T) {
for i, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
if got := ansi.Hardwrap(tt.input, tt.limit, tt.preserveSpace); got != tt.expected {
Expand Down Expand Up @@ -92,7 +92,7 @@ func TestWrapWordwrap(t *testing.T) {
}
}

var smartWrapCases = []struct {
var wrapCases = []struct {
name string
input string
expected string
Expand Down Expand Up @@ -128,6 +128,12 @@ var smartWrapCases = []struct {
expected: "\x1B[38;2;249;38;114ma really\nlong\nstring\x1B[0m",
width: 10,
},
{
name: "long style nbsp",
input: "\x1B[38;2;249;38;114ma really\u00a0long string\x1B[0m",
expected: "\x1b[38;2;249;38;114ma\nreally\u00a0lon\ng string\x1b[0m",
width: 10,
},
{
name: "longer",
input: "the quick brown foxxxxxxxxxxxxxxxx jumped over the lazy dog.",
Expand All @@ -143,29 +149,74 @@ var smartWrapCases = []struct {
{
name: "long input",
input: "Rotated keys for a-good-offensive-cheat-code-incorporated/animal-like-law-on-the-rocks.",
expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/animal-like-law-on\n-the-rocks.",
expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/animal-like-law-\non-the-rocks.",
width: 76,
},
{
name: "long input2",
input: "Rotated keys for a-good-offensive-cheat-code-incorporated/crypto-line-operating-system.",
expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/crypto-line-operat\ning-system.",
expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/crypto-line-\noperating-system.",
width: 76,
},
{
name: "hyphen breakpoint",
input: "a-good-offensive-cheat-code",
expected: "a-good-\noffensive-\ncheat-code",
width: 10,
},
{
name: "exact",
input: "\x1b[91mfoo\x1b[0",
expected: "\x1b[91mfoo\x1b[0",
width: 3,
},
{
// XXX: Should we preserve spaces on text wrapping?
name: "extra space",
input: "foo ",
expected: "foo",
width: 3,
},
{
name: "extra space style",
input: "\x1b[mfoo \x1b[m",
expected: "\x1b[mfoo\n \x1b[m",
width: 3,
},
{
name: "paragraph with styles",
input: "Lorem ipsum dolor \x1b[1msit\x1b[m amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. \x1b[31mUt enim\x1b[m ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea \x1b[38;5;200mcommodo consequat\x1b[m. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. \x1b[1;2;33mExcepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\x1b[m",
expected: "Lorem ipsum dolor \x1b[1msit\x1b[m amet,\nconsectetur adipiscing elit,\nsed do eiusmod tempor\nincididunt ut labore et dolore\nmagna aliqua. \x1b[31mUt enim\x1b[m ad minim\nveniam, quis nostrud\nexercitation ullamco laboris\nnisi ut aliquip ex ea \x1b[38;5;200mcommodo\nconsequat\x1b[m. Duis aute irure\ndolor in reprehenderit in\nvoluptate velit esse cillum\ndolore eu fugiat nulla\npariatur. \x1b[1;2;33mExcepteur sint\noccaecat cupidatat non\nproident, sunt in culpa qui\nofficia deserunt mollit anim\nid est laborum.\x1b[m",
width: 30,
},
{"hyphen break", "foo-bar", "foo-\nbar", 5},
{"double space", "f bar foobaz", "f bar\nfoobaz", 6},
{"passthrough", "foobar\n ", "foobar\n ", 0},
{"pass", "foo", "foo", 3},
{"toolong", "foobarfoo", "foob\narfo\no", 4},
{"white space", "foo bar foo", "foo\nbar\nfoo", 4},
{"broken_at_spaces", "foo bars foobars", "foo\nbars\nfoob\nars", 4},
{"hyphen", "foob-foobar", "foob\n-foo\nbar", 4},
{"wide_emoji_breakpoint", "foo🫧 foobar", "foo\n🫧\nfoob\nar", 4},
{"space_breakpoint", "foo --bar", "foo --bar", 9},
{"simple", "foo bars foobars", "foo\nbars\nfoob\nars", 4},
{"limit", "foo bar", "foo\nbar", 5},
{"remove white spaces", "foo \nb ar ", "foo\nb\nar", 4},
{"white space trail width", "foo\nb\t a\n bar", "foo\nb\t a\n bar", 4},
{"explicit_line_break", "foo bar foo\n", "foo\nbar\nfoo\n", 4},
{"explicit_breaks", "\nfoo bar\n\n\nfoo\n", "\nfoo\nbar\n\n\nfoo\n", 4},
{"example", " This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* foo \nbar ", " This\nis a\nlist: \n\n\t* foo\n\t* bar\n\n\n\t* foo\nbar", 6},
{"style_code_dont_affect_length", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7},
{"style_code_dont_get_wrapped", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", "\x1b[38;2;249;38;114m(\x1b[0m\x1b[38;2;248;248;242mjust\nanother\ntest\x1b[38;2;249;38;114m)\x1b[0m", 7},
{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\ สวัสดีสวัสดี\x1b]8;;\x1b\\", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\", 8},
}

func TestSmartWrap(t *testing.T) {
for i, tc := range smartWrapCases {
func TestWrap(t *testing.T) {
for i, tc := range wrapCases {
t.Run(tc.name, func(t *testing.T) {
output := ansi.Wrap(tc.input, tc.width, "")
if output != tc.expected {
t.Errorf("case %d, expected %q, got %q", i+1, tc.expected, output)
t.Errorf("case %d, input %q, expected %q, got %q", i+1, tc.input, tc.expected, output)
}
})
}
Expand Down

0 comments on commit c2c5fe8

Please sign in to comment.