Skip to content

Commit

Permalink
txtpbfmt: add support for wrapping without wordwrap
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 664420715
  • Loading branch information
txtpbfmt-copybara-robot committed Aug 18, 2024
1 parent 70a5980 commit a619d35
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 7 deletions.
3 changes: 3 additions & 0 deletions ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ type Node struct {
// If this is not empty, it means that formatting was disabled for this node and it contains the
// raw, unformatted node string.
Raw string
// Used when we want to break between the field name and values when a
// single-line node exceeds the requested wrap column.
PutSingleValueOnNextLine bool
}

// NodeLess is a sorting function that compares two *Nodes, possibly using the parent Node
Expand Down
2 changes: 2 additions & 0 deletions cmd/txtpbfmt/fmt.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ var (
wrapStringsAtColumn = flag.Int("wrap_strings_at_column", 0, "Max columns for string field values. (0 means no wrap.)")
wrapHTMLStrings = flag.Bool("wrap_html_strings", false, "Wrap strings containing HTML tags. (Requires wrap_strings_at_column > 0.)")
wrapStringsAfterNewlines = flag.Bool("wrap_strings_after_newlines", false, "Wrap strings after newlines.")
wrapStringsWithoutWordwrap = flag.Bool("wrap_strings_without_wordwrap", false, "Wrap strings at the given column only.")
preserveAngleBrackets = flag.Bool("preserve_angle_brackets", false, "Preserve angle brackets instead of converting to curly braces.")
smartQuotes = flag.Bool("smart_quotes", false, "Use single quotes around strings that contain double but not single quotes.")
)
Expand Down Expand Up @@ -100,6 +101,7 @@ func main() {
WrapStringsAtColumn: *wrapStringsAtColumn,
WrapHTMLStrings: *wrapHTMLStrings,
WrapStringsAfterNewlines: *wrapStringsAfterNewlines,
WrapStringsWithoutWordwrap: *wrapStringsWithoutWordwrap,
PreserveAngleBrackets: *preserveAngleBrackets,
SmartQuotes: *smartQuotes,
Logger: logger,
Expand Down
72 changes: 65 additions & 7 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ type Config struct {
// Should not be used with other Wrap* options.
WrapStringsAfterNewlines bool

// Wrap strictly at the column instead of a word boundary.
WrapStringsWithoutWordwrap bool

// Whether angle brackets used instead of curly braces should be preserved
// when outputting a formatted textproto.
PreserveAngleBrackets bool
Expand Down Expand Up @@ -398,6 +401,8 @@ func addToConfig(metaComment string, c *Config) error {
c.WrapHTMLStrings = true
case "wrap_strings_after_newlines":
c.WrapStringsAfterNewlines = true
case "wrap_strings_without_wordwrap":
c.WrapStringsWithoutWordwrap = true
case "on": // This doesn't change the overall config.
case "off": // This doesn't change the overall config.
default:
Expand Down Expand Up @@ -1182,7 +1187,7 @@ func needsWrappingAtColumn(nd *ast.Node, depth int, c Config) bool {
// Only wrap strings
return false
}
if len(v.Value) > maxLength {
if len(v.Value) > maxLength || c.WrapStringsWithoutWordwrap {
return true
}
}
Expand All @@ -1196,17 +1201,41 @@ func wrapLinesAtColumn(nd *ast.Node, depth int, c Config) error {
// This function looks at the unquoted ast.Value.Value string (i.e., with each Value's wrapping
// quote chars removed). We need to remove these quotes, since otherwise they'll be re-flowed into
// the body of the text.
lengthBuffer := 4 // Even at depth 0 we have a 2-space indent and a pair of quotes
const lengthBuffer = 4 // Even at depth 0 we have a 2-space indent and a pair of quotes
maxLength := c.WrapStringsAtColumn - lengthBuffer - (depth * len(indentSpaces))

str, quote, err := unquote.Raw(nd)
if err != nil {
return fmt.Errorf("skipping string wrapping on node %q (error unquoting string): %v", nd.Name, err)
}

// Remove one from the max length since a trailing space may be added below.
wrappedStr := wordwrap.WrapString(str, uint(maxLength)-1)
lines := strings.Split(wrappedStr, "\n")
var lines []string
if c.WrapStringsWithoutWordwrap {
// https://protobuf.dev/reference/protobuf/textformat-spec/#string.
// String literals can contain octal, hex, unicode, and C-style escape
// sequences: \a \b \f \n \r \t \v \? \' \"\ ? \\
re := regexp.MustCompile(`\\[abfnrtv?\\'"]` +
`|\\[0-7]{1,3}` +
`|\\x[0-9a-fA-F]{1,2}` +
`|\\u[0-9a-fA-F]{4}` +
`|\\U000[0-9a-fA-F]{5}` +
`|\\U0010[0-9a-fA-F]{4}` +
`|.`)
var line strings.Builder
for _, t := range re.FindAllString(str, -1) {
if line.Len()+len(t) > maxLength {
lines = append(lines, line.String())
line.Reset()
}
line.WriteString(t)
}
lines = append(lines, line.String())
} else {
// Remove one from the max length since a trailing space may be added below.
wrappedStr := wordwrap.WrapString(str, uint(maxLength)-1)
lines = strings.Split(wrappedStr, "\n")
}

newValues := make([]*ast.Value, 0, len(lines))
// The Value objects have more than just the string in them. They also have any leading and
// trailing comments. To maintain these comments we recycle the existing Value objects if
Expand All @@ -1220,9 +1249,33 @@ func wrapLinesAtColumn(nd *ast.Node, depth int, c Config) error {
} else {
v = &ast.Value{}
}
if i < len(lines)-1 {

if !c.WrapStringsWithoutWordwrap && i < len(lines)-1 {
line = line + " "
}

if c.WrapStringsWithoutWordwrap {
var lineLength = len(line)
if v.InlineComment != "" {
lineLength += len(indentSpaces) + len(v.InlineComment)
}
// field name and field value are inlined for single strings, adjust for that.
if i == 0 && len(lines) == 1 {
lineLength += len(nd.Name)
}
if lineLength > maxLength {
// If there's an inline comment, promote it to a pre-comment which will
// emit a newline.
if v.InlineComment != "" {
v.PreComments = append(v.PreComments, v.InlineComment)
v.InlineComment = ""
} else if i == 0 && len(v.PreComments) == 0 {
// It's too long and we don't have any comments.
nd.PutSingleValueOnNextLine = true
}
}
}

v.Value = fmt.Sprintf(`%c%s%c`, quote, line, quote)
newValues = append(newValues, v)
}
Expand Down Expand Up @@ -1586,7 +1639,11 @@ func (f formatter) writeNodes(nodes []*ast.Node, depth int, isSameLine, asListIt
// metadata: { ... }
// In other cases, there is a newline right after the colon, so no space required.
if nd.Children != nil || (len(nd.Values) == 1 && len(nd.Values[0].PreComments) == 0) || nd.ValuesAsList {
f.WriteString(" ")
if nd.PutSingleValueOnNextLine {
f.WriteString("\n" + indent + indentSpaces)
} else {
f.WriteString(" ")
}
}
}

Expand All @@ -1595,6 +1652,7 @@ func (f formatter) writeNodes(nodes []*ast.Node, depth int, isSameLine, asListIt
} else if len(nd.Values) > 0 {
f.writeValues(nd, nd.Values, indent+indentSpaces)
}

if nd.Children != nil { // Also for 0 Children.
if nd.ChildrenAsList {
f.writeChildrenAsListItems(nd.Children, depth+1, isSameLine || nd.ChildrenSameLine)
Expand Down
68 changes: 68 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2532,6 +2532,74 @@ s2: '''six seven \neight nine'''
" cccccccccc \n"
" dddddddddd \n"
" eeeeeeeeee\x00 \n"
`,
}, {
name: "WrapStringsAtColumn_noWordwrap",
config: Config{
WrapStringsAtColumn: 12,
WrapStringsWithoutWordwrap: true,
},
in: `# 3456789012
s: "Curabitur\040elit\x20nec mi egestas,\u000Dtincidunt \U00010309nterdum elit porta.\n"
`,
out: `# 3456789012
s:
"Curabitu"
"r\040eli"
"t\x20nec"
" mi eges"
"tas,"
"\u000Dti"
"ncidunt "
"\U00010309"
"nterdum "
"elit por"
"ta.\n"
`,
}, {
name: "WrapStringsAtColumn_noWordwrapDeep",
config: Config{
WrapStringsAtColumn: 12,
WrapStringsWithoutWordwrap: true,
},
in: `
this_field_name_displays_wider_than_the_twelve_requested: "this_goes_to_a_new_line"
`,
out: `this_field_name_displays_wider_than_the_twelve_requested:
"this_goe"
"s_to_a_n"
"ew_line"
`,
}, {
name: "WrapStringsAtColumn_noWordwrapDeepInlinePromotion",
config: Config{
WrapStringsAtColumn: 12,
WrapStringsWithoutWordwrap: true,
},
in: `
this_field_name_displays_wider_than_the_twelve_requested: "0C" # XII
`,
out: `this_field_name_displays_wider_than_the_twelve_requested:
# XII
"0C"
`,
}, {
name: "WrapStringsAtColumn_noWordwrapMetacomment",
in: `# txtpbfmt: wrap_strings_at_column=12
# txtpbfmt: wrap_strings_without_wordwrap
# 3456789012
s: "1\tone\r\n2\ttwo\r\n3\tthree\r\n4\tfour\r\n"
`,
out: `# txtpbfmt: wrap_strings_at_column=12
# txtpbfmt: wrap_strings_without_wordwrap
# 3456789012
s:
"1\tone\r"
"\n2\ttwo"
"\r\n3\tt"
"hree\r\n"
"4\tfour"
"\r\n"
`,
}, {
name: "PreserveAngleBrackets",
Expand Down

0 comments on commit a619d35

Please sign in to comment.