diff --git a/ast/ast.go b/ast/ast.go index 1f24c6b..e673cff 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -74,6 +74,9 @@ type Node struct { // If this is not empty, it means that formatting was disabled for this node and it contains the // raw, unformatted node string. Raw string + // Used when we want to break between the field name and values when a + // single-line node exceeds the requested wrap column. + PutSingleValueOnNextLine bool } // NodeLess is a sorting function that compares two *Nodes, possibly using the parent Node diff --git a/cmd/txtpbfmt/fmt.go b/cmd/txtpbfmt/fmt.go index d369a39..2590707 100644 --- a/cmd/txtpbfmt/fmt.go +++ b/cmd/txtpbfmt/fmt.go @@ -30,6 +30,7 @@ var ( wrapStringsAtColumn = flag.Int("wrap_strings_at_column", 0, "Max columns for string field values. (0 means no wrap.)") wrapHTMLStrings = flag.Bool("wrap_html_strings", false, "Wrap strings containing HTML tags. (Requires wrap_strings_at_column > 0.)") wrapStringsAfterNewlines = flag.Bool("wrap_strings_after_newlines", false, "Wrap strings after newlines.") + wrapStringsWithoutWordwrap = flag.Bool("wrap_strings_without_wordwrap", false, "Wrap strings at the given column only.") preserveAngleBrackets = flag.Bool("preserve_angle_brackets", false, "Preserve angle brackets instead of converting to curly braces.") smartQuotes = flag.Bool("smart_quotes", false, "Use single quotes around strings that contain double but not single quotes.") ) @@ -100,6 +101,7 @@ func main() { WrapStringsAtColumn: *wrapStringsAtColumn, WrapHTMLStrings: *wrapHTMLStrings, WrapStringsAfterNewlines: *wrapStringsAfterNewlines, + WrapStringsWithoutWordwrap: *wrapStringsWithoutWordwrap, PreserveAngleBrackets: *preserveAngleBrackets, SmartQuotes: *smartQuotes, Logger: logger, diff --git a/parser/parser.go b/parser/parser.go index 0fdbab0..33e0dc5 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -73,6 +73,9 @@ type Config struct { // Should not be used with other Wrap* options. WrapStringsAfterNewlines bool + // Wrap strictly at the column instead of a word boundary. + WrapStringsWithoutWordwrap bool + // Whether angle brackets used instead of curly braces should be preserved // when outputting a formatted textproto. PreserveAngleBrackets bool @@ -398,6 +401,8 @@ func addToConfig(metaComment string, c *Config) error { c.WrapHTMLStrings = true case "wrap_strings_after_newlines": c.WrapStringsAfterNewlines = true + case "wrap_strings_without_wordwrap": + c.WrapStringsWithoutWordwrap = true case "on": // This doesn't change the overall config. case "off": // This doesn't change the overall config. default: @@ -1182,7 +1187,7 @@ func needsWrappingAtColumn(nd *ast.Node, depth int, c Config) bool { // Only wrap strings return false } - if len(v.Value) > maxLength { + if len(v.Value) > maxLength || c.WrapStringsWithoutWordwrap { return true } } @@ -1196,7 +1201,7 @@ func wrapLinesAtColumn(nd *ast.Node, depth int, c Config) error { // This function looks at the unquoted ast.Value.Value string (i.e., with each Value's wrapping // quote chars removed). We need to remove these quotes, since otherwise they'll be re-flowed into // the body of the text. - lengthBuffer := 4 // Even at depth 0 we have a 2-space indent and a pair of quotes + const lengthBuffer = 4 // Even at depth 0 we have a 2-space indent and a pair of quotes maxLength := c.WrapStringsAtColumn - lengthBuffer - (depth * len(indentSpaces)) str, quote, err := unquote.Raw(nd) @@ -1204,9 +1209,33 @@ func wrapLinesAtColumn(nd *ast.Node, depth int, c Config) error { return fmt.Errorf("skipping string wrapping on node %q (error unquoting string): %v", nd.Name, err) } - // Remove one from the max length since a trailing space may be added below. - wrappedStr := wordwrap.WrapString(str, uint(maxLength)-1) - lines := strings.Split(wrappedStr, "\n") + var lines []string + if c.WrapStringsWithoutWordwrap { + // https://protobuf.dev/reference/protobuf/textformat-spec/#string. + // String literals can contain octal, hex, unicode, and C-style escape + // sequences: \a \b \f \n \r \t \v \? \' \"\ ? \\ + re := regexp.MustCompile(`\\[abfnrtv?\\'"]` + + `|\\[0-7]{1,3}` + + `|\\x[0-9a-fA-F]{1,2}` + + `|\\u[0-9a-fA-F]{4}` + + `|\\U000[0-9a-fA-F]{5}` + + `|\\U0010[0-9a-fA-F]{4}` + + `|.`) + var line strings.Builder + for _, t := range re.FindAllString(str, -1) { + if line.Len()+len(t) > maxLength { + lines = append(lines, line.String()) + line.Reset() + } + line.WriteString(t) + } + lines = append(lines, line.String()) + } else { + // Remove one from the max length since a trailing space may be added below. + wrappedStr := wordwrap.WrapString(str, uint(maxLength)-1) + lines = strings.Split(wrappedStr, "\n") + } + newValues := make([]*ast.Value, 0, len(lines)) // The Value objects have more than just the string in them. They also have any leading and // trailing comments. To maintain these comments we recycle the existing Value objects if @@ -1220,9 +1249,33 @@ func wrapLinesAtColumn(nd *ast.Node, depth int, c Config) error { } else { v = &ast.Value{} } - if i < len(lines)-1 { + + if !c.WrapStringsWithoutWordwrap && i < len(lines)-1 { line = line + " " } + + if c.WrapStringsWithoutWordwrap { + var lineLength = len(line) + if v.InlineComment != "" { + lineLength += len(indentSpaces) + len(v.InlineComment) + } + // field name and field value are inlined for single strings, adjust for that. + if i == 0 && len(lines) == 1 { + lineLength += len(nd.Name) + } + if lineLength > maxLength { + // If there's an inline comment, promote it to a pre-comment which will + // emit a newline. + if v.InlineComment != "" { + v.PreComments = append(v.PreComments, v.InlineComment) + v.InlineComment = "" + } else if i == 0 && len(v.PreComments) == 0 { + // It's too long and we don't have any comments. + nd.PutSingleValueOnNextLine = true + } + } + } + v.Value = fmt.Sprintf(`%c%s%c`, quote, line, quote) newValues = append(newValues, v) } @@ -1586,7 +1639,11 @@ func (f formatter) writeNodes(nodes []*ast.Node, depth int, isSameLine, asListIt // metadata: { ... } // In other cases, there is a newline right after the colon, so no space required. if nd.Children != nil || (len(nd.Values) == 1 && len(nd.Values[0].PreComments) == 0) || nd.ValuesAsList { - f.WriteString(" ") + if nd.PutSingleValueOnNextLine { + f.WriteString("\n" + indent + indentSpaces) + } else { + f.WriteString(" ") + } } } @@ -1595,6 +1652,7 @@ func (f formatter) writeNodes(nodes []*ast.Node, depth int, isSameLine, asListIt } else if len(nd.Values) > 0 { f.writeValues(nd, nd.Values, indent+indentSpaces) } + if nd.Children != nil { // Also for 0 Children. if nd.ChildrenAsList { f.writeChildrenAsListItems(nd.Children, depth+1, isSameLine || nd.ChildrenSameLine) diff --git a/parser/parser_test.go b/parser/parser_test.go index 7e0fa50..06a0005 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -2532,6 +2532,74 @@ s2: '''six seven \neight nine''' " cccccccccc \n" " dddddddddd \n" " eeeeeeeeee\x00 \n" +`, + }, { + name: "WrapStringsAtColumn_noWordwrap", + config: Config{ + WrapStringsAtColumn: 12, + WrapStringsWithoutWordwrap: true, + }, + in: `# 3456789012 +s: "Curabitur\040elit\x20nec mi egestas,\u000Dtincidunt \U00010309nterdum elit porta.\n" +`, + out: `# 3456789012 +s: + "Curabitu" + "r\040eli" + "t\x20nec" + " mi eges" + "tas," + "\u000Dti" + "ncidunt " + "\U00010309" + "nterdum " + "elit por" + "ta.\n" +`, + }, { + name: "WrapStringsAtColumn_noWordwrapDeep", + config: Config{ + WrapStringsAtColumn: 12, + WrapStringsWithoutWordwrap: true, + }, + in: ` +this_field_name_displays_wider_than_the_twelve_requested: "this_goes_to_a_new_line" +`, + out: `this_field_name_displays_wider_than_the_twelve_requested: + "this_goe" + "s_to_a_n" + "ew_line" +`, + }, { + name: "WrapStringsAtColumn_noWordwrapDeepInlinePromotion", + config: Config{ + WrapStringsAtColumn: 12, + WrapStringsWithoutWordwrap: true, + }, + in: ` +this_field_name_displays_wider_than_the_twelve_requested: "0C" # XII +`, + out: `this_field_name_displays_wider_than_the_twelve_requested: + # XII + "0C" +`, + }, { + name: "WrapStringsAtColumn_noWordwrapMetacomment", + in: `# txtpbfmt: wrap_strings_at_column=12 +# txtpbfmt: wrap_strings_without_wordwrap +# 3456789012 +s: "1\tone\r\n2\ttwo\r\n3\tthree\r\n4\tfour\r\n" +`, + out: `# txtpbfmt: wrap_strings_at_column=12 +# txtpbfmt: wrap_strings_without_wordwrap +# 3456789012 +s: + "1\tone\r" + "\n2\ttwo" + "\r\n3\tt" + "hree\r\n" + "4\tfour" + "\r\n" `, }, { name: "PreserveAngleBrackets",