From a4560cd20eecd7d1a01096d3370dbc35643797f2 Mon Sep 17 00:00:00 2001 From: "D. Bohdan" Date: Fri, 17 Jan 2025 11:58:42 +0000 Subject: [PATCH] feat(cli): add option `--multiline` Thanks to @alexvoss for suggesting how to implement this with current TOML Kit. --- README.md | 69 ++++++++++++++++++++++++++++++++++++----- src/remarshal/main.py | 46 +++++++++++++++++++++++++-- tests/multiline-3.toml | 32 +++++++++++++++++++ tests/multiline-5.toml | 19 ++++++++++++ tests/multiline.json | 9 ++++++ tests/multiline.toml | 7 +++++ tests/test_remarshal.py | 21 +++++++++++++ 7 files changed, 193 insertions(+), 10 deletions(-) create mode 100644 tests/multiline-3.toml create mode 100644 tests/multiline-5.toml create mode 100644 tests/multiline.json create mode 100644 tests/multiline.toml diff --git a/README.md b/README.md index 958ede9..76e111c 100644 --- a/README.md +++ b/README.md @@ -134,9 +134,9 @@ uv tool install https://github.com/remarshal-project/remarshal ```none usage: remarshal [-h] [-v] [-f {cbor,json,msgpack,toml,yaml}] [-i ] - [--indent ] [-k] [--max-values ] [-o ] [-s] - [-t {cbor,json,msgpack,python,toml,yaml}] [--unwrap ] - [--verbose] [--width ] [--wrap ] + [--indent ] [-k] [--max-values ] [--multiline ] + [-o ] [-s] [-t {cbor,json,msgpack,python,toml,yaml}] + [--unwrap ] [--verbose] [--width ] [--wrap ] [--yaml-style {,',",|,>}] [input] [output] @@ -158,6 +158,8 @@ options: keys and null values for TOML --max-values maximum number of values in input data (default 1000000, negative for unlimited) + --multiline minimum number of items to make non-nested TOML array + multiline (default 6) -o, --output output file -s, --sort-keys sort JSON, Python, and TOML keys instead of preserving @@ -286,7 +288,7 @@ $ curl -f 'https://archive-api.open-meteo.com/v1/era5?latitude=50.43&longitude=3 ; latitude = 50.439365 longitude = 30.476192 -generationtime_ms = 0.04208087921142578 +generationtime_ms = 0.03254413604736328 utc_offset_seconds = 0 timezone = "GMT" timezone_abbreviation = "GMT" @@ -297,14 +299,65 @@ time = "iso8601" temperature_2m = "°C" [hourly] -time = ["2014-10-05T00:00", "2014-10-05T01:00", "2014-10-05T02:00", "2014-10-05T03:00", "2014-10-05T04:00", "2014-10-05T05:00", "2014-10-05T06:00", "2014-10-05T07:00", "2014-10-05T08:00", "2014-10-05T09:00", "2014-10-05T10:00", "2014-10-05T11:00", "2014-10-05T12:00", "2014-10-05T13:00", "2014-10-05T14:00", "2014-10-05T15:00", "2014-10-05T16:00", "2014-10-05T17:00", "2014-10-05T18:00", "2014-10-05T19:00", "2014-10-05T20:00", "2014-10-05T21:00", "2014-10-05T22:00", "2014-10-05T23:00"] -temperature_2m = [5.7, 5.3, 5.0, 4.8, 4.6, 4.6, 7.0, 8.9, 10.8, 12.2, 13.3, 13.9, 13.9, 13.7, 13.3, 12.3, 11.1, 10.2, 9.4, 8.5, 8.2, 7.9, 8.0, 7.8] +time = [ + "2014-10-05T00:00", + "2014-10-05T01:00", + "2014-10-05T02:00", + "2014-10-05T03:00", + "2014-10-05T04:00", + "2014-10-05T05:00", + "2014-10-05T06:00", + "2014-10-05T07:00", + "2014-10-05T08:00", + "2014-10-05T09:00", + "2014-10-05T10:00", + "2014-10-05T11:00", + "2014-10-05T12:00", + "2014-10-05T13:00", + "2014-10-05T14:00", + "2014-10-05T15:00", + "2014-10-05T16:00", + "2014-10-05T17:00", + "2014-10-05T18:00", + "2014-10-05T19:00", + "2014-10-05T20:00", + "2014-10-05T21:00", + "2014-10-05T22:00", + "2014-10-05T23:00", +] +temperature_2m = [ + 5.7, + 5.3, + 5.0, + 4.8, + 4.6, + 4.6, + 7.0, + 8.9, + 10.8, + 12.2, + 13.3, + 13.9, + 13.9, + 13.7, + 13.3, + 12.3, + 11.1, + 10.2, + 9.4, + 8.5, + 8.2, + 7.9, + 8.0, + 7.8, +] ``` -Remarshal does not limit the line width in TOML. +Remarshal controls the number of items at which a TOML array becomes multiline, +but it does not control the line width. You can use [`taplo fmt`](https://taplo.tamasfe.dev/cli/usage/formatting.html) -to reformat the TOML and break up long lines with arrays. +for finer TOML formatting. ## License diff --git a/src/remarshal/main.py b/src/remarshal/main.py index a31d4bb..ecf8d31 100755 --- a/src/remarshal/main.py +++ b/src/remarshal/main.py @@ -30,6 +30,7 @@ import cbor2 # type: ignore import colorama import tomlkit +import tomlkit.items from rich_argparse import RichHelpFormatter try: @@ -58,6 +59,8 @@ class Defaults: PYTHON_INDENT = 1 YAML_INDENT = 2 + MULTILINE_THRESHOLD = 6 + WIDTH = 80 @@ -97,6 +100,7 @@ class PythonOptions(FormatOptions): @dataclass(frozen=True) class TOMLOptions(FormatOptions): indent: int | None = Defaults.INDENT + multiline_threshold: int = Defaults.MULTILINE_THRESHOLD sort_keys: bool = Defaults.SORT_KEYS stringify: bool = Defaults.STRINGIFY @@ -284,6 +288,18 @@ def _parse_command_line(argv: Sequence[str]) -> argparse.Namespace: ), ) + parser.add_argument( + "--multiline", + default=Defaults.MULTILINE_THRESHOLD, + dest="multiline_threshold", + metavar="", + type=int, + help=( + "minimum number of items to make non-nested TOML array multiline " + "(default %(default)s)" + ), + ) + output_group = parser.add_mutually_exclusive_group() output_group.add_argument("output", default="-", nargs="?", help="output file") output_group.add_argument( @@ -688,6 +704,7 @@ def _encode_python( def _encode_toml( data: Mapping[Any, Any], *, + multiline_threshold: int, sort_keys: bool, stringify: bool, ) -> str: @@ -709,14 +726,28 @@ def stringify_null(x: Any) -> Any: default_callback = stringify_null if stringify else reject_null try: - return tomlkit.dumps( + toml = tomlkit.item( traverse( data, key_callback=key_callback, default_callback=default_callback, ), - sort_keys=sort_keys, + _sort_keys=sort_keys, ) + + def multilinify(item: tomlkit.items.Item) -> None: + match item: + case tomlkit.items.Array(): + if len(item) >= multiline_threshold: + item.multiline(multiline=True) + + case tomlkit.items.AbstractTable(): + for value in item.values(): + multilinify(value) + + multilinify(toml) + + return toml.as_string() except AttributeError as e: if str(e) == "'list' object has no attribute 'as_string'": msg = ( @@ -769,6 +800,7 @@ def format_options( output_format: str, *, indent: int | None = None, + multiline_threshold: int = Defaults.MULTILINE_THRESHOLD, sort_keys: bool = False, stringify: bool = False, width: int = Defaults.WIDTH, @@ -797,6 +829,7 @@ def format_options( case "toml": return TOMLOptions( + multiline_threshold=multiline_threshold, sort_keys=sort_keys, stringify=stringify, ) @@ -824,12 +857,14 @@ def encode( if not isinstance(options, CBOROptions): msg = "expected 'options' argument to have class 'CBOROptions'" raise TypeError(msg) + encoded = _encode_cbor(data) case "json": if not isinstance(options, JSONOptions): msg = "expected 'options' argument to have class 'JSONOptions'" raise TypeError(msg) + encoded = _encode_json( data, indent=options.indent, @@ -841,12 +876,14 @@ def encode( if not isinstance(options, MsgPackOptions): msg = "expected 'options' argument to have class 'MsgPackOptions'" raise TypeError(msg) + encoded = _encode_msgpack(data) case "python": if not isinstance(options, PythonOptions): msg = "expected 'options' argument to have class 'PythonOptions'" raise TypeError(msg) + encoded = _encode_python( data, indent=options.indent, @@ -858,14 +895,17 @@ def encode( if not isinstance(options, TOMLOptions): msg = "expected 'options' argument to have class 'TOMLOptions'" raise TypeError(msg) + if not isinstance(data, Mapping): msg = ( f"Top-level value of type '{type(data).__name__}' cannot " "be encoded as TOML" ) raise TypeError(msg) + encoded = _encode_toml( data, + multiline_threshold=options.multiline_threshold, sort_keys=options.sort_keys, stringify=options.stringify, ).encode(UTF_8) @@ -874,6 +914,7 @@ def encode( if not isinstance(options, YAMLOptions): msg = "expected 'options' argument to have class 'YAMLOptions'" raise TypeError(msg) + encoded = _encode_yaml( data, indent=options.indent, @@ -959,6 +1000,7 @@ def main() -> None: options = format_options( args.output_format, indent=args.indent, + multiline_threshold=args.multiline_threshold, sort_keys=args.sort_keys, stringify=args.stringify, width=args.width, diff --git a/tests/multiline-3.toml b/tests/multiline-3.toml new file mode 100644 index 0000000..95133a5 --- /dev/null +++ b/tests/multiline-3.toml @@ -0,0 +1,32 @@ +foo = [ + 1, + 2, + [3], + 4, + 5, +] +bar = [ + 1, + [2, 3, 4, 5, 6], + 7, +] + +[baz] +qux = [ + 1, + 2, + 3, +] +quux = [ + 1, + 2, + 3, + 4, +] +quuux = [ + 1, + 2, + 3, + 4, + 5, +] diff --git a/tests/multiline-5.toml b/tests/multiline-5.toml new file mode 100644 index 0000000..a969423 --- /dev/null +++ b/tests/multiline-5.toml @@ -0,0 +1,19 @@ +foo = [ + 1, + 2, + [3], + 4, + 5, +] +bar = [1, [2, 3, 4, 5, 6], 7] + +[baz] +qux = [1, 2, 3] +quux = [1, 2, 3, 4] +quuux = [ + 1, + 2, + 3, + 4, + 5, +] diff --git a/tests/multiline.json b/tests/multiline.json new file mode 100644 index 0000000..232aa60 --- /dev/null +++ b/tests/multiline.json @@ -0,0 +1,9 @@ +{ + "foo": [1, 2, [3], 4, 5], + "bar": [1, [2, 3, 4, 5, 6], 7], + "baz": { + "qux": [1, 2, 3], + "quux": [1, 2, 3, 4], + "quuux": [1, 2, 3, 4, 5] + } +} diff --git a/tests/multiline.toml b/tests/multiline.toml new file mode 100644 index 0000000..03f06f2 --- /dev/null +++ b/tests/multiline.toml @@ -0,0 +1,7 @@ +foo = [1, 2, [3], 4, 5] +bar = [1, [2, 3, 4, 5, 6], 7] + +[baz] +qux = [1, 2, 3] +quux = [1, 2, 3, 4] +quuux = [1, 2, 3, 4, 5] diff --git a/tests/test_remarshal.py b/tests/test_remarshal.py index 221ce96..d0aa7b8 100755 --- a/tests/test_remarshal.py +++ b/tests/test_remarshal.py @@ -100,6 +100,7 @@ def _convert_and_read( # noqa: PLR0913 output_format: str, *, indent: int | None = None, + multiline_threshold: int = Defaults.MULTILINE_THRESHOLD, output_filename: str, sort_keys: bool = False, stringify: bool = False, @@ -113,6 +114,7 @@ def _convert_and_read( # noqa: PLR0913 options = remarshal.format_options( output_format, indent=indent, + multiline_threshold=multiline_threshold, sort_keys=sort_keys, stringify=stringify, width=width, @@ -221,6 +223,25 @@ def test_json2toml(self, convert_and_read) -> None: ) assert output_sig == reference_sig + def test_json2toml_multiline_default(self, convert_and_read) -> None: + output = convert_and_read("multiline.json", "json", "toml") + reference = read_file("multiline.toml") + assert output == reference + + def test_json2toml_multiline_3(self, convert_and_read) -> None: + output = convert_and_read( + "multiline.json", "json", "toml", multiline_threshold=3 + ) + reference = read_file("multiline-3.toml") + assert output == reference + + def test_json2toml_multiline_5(self, convert_and_read) -> None: + output = convert_and_read( + "multiline.json", "json", "toml", multiline_threshold=5 + ) + reference = read_file("multiline-5.toml") + assert output == reference + def test_json2yaml(self, convert_and_read) -> None: output = convert_and_read("example.json", "json", "yaml").decode("utf-8") reference = read_file("example.yaml").decode("utf-8")