From d138ecf7bcd9cb5e3d78a6e4e75172ce1d783f94 Mon Sep 17 00:00:00 2001 From: Zolisa Bleki Date: Thu, 11 Jul 2024 02:11:16 +0200 Subject: [PATCH] Support optional `chunk_key_encoding` configuration. Ensures that optional chunk_key_encoding configuration is honored. If not specified a default value is used, and this default configuration is not written to the jSON when serializing a metadata type. --- lib/extensions.ml | 44 ++++++++++++++++++++++++------------------- test/test_metadata.ml | 37 ++++++++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 25 deletions(-) diff --git a/lib/extensions.ml b/lib/extensions.ml index 1606e44d..672b9f9f 100644 --- a/lib/extensions.ml +++ b/lib/extensions.ml @@ -71,14 +71,14 @@ end module ChunkKeyEncoding = struct type kind = Default | V2 - type t = {name : kind; sep : string} + type t = {name : kind; sep : string; is_default : bool} let create = function - | `Dot -> {name = Default; sep = "."} - | `Slash -> {name = Default; sep = "/"} + | `Dot -> {name = Default; sep = "."; is_default = false} + | `Slash -> {name = Default; sep = "/"; is_default = false} (* map a chunk coordinate index to a key. E.g, (2,3,1) maps to c/2/3/1 *) - let encode {name; sep} index = + let encode {name; sep; _} index = let f i acc = string_of_int i :: acc in @@ -95,31 +95,37 @@ module ChunkKeyEncoding = struct Array.fold_right f index [] let ( = ) x y = - x.name = y.name && x.sep = y.sep + x.name = y.name && x.sep = y.sep && x.is_default = y.is_default - let to_yojson {name; sep} = + let to_yojson {name; sep; is_default} = let str = match name with | Default -> "default" | V2 -> "v2" in - `Assoc - [("name", `String str) - ;("configuration", `Assoc [("separator", `String sep)])] + if is_default then + `Assoc [("name", `String str)] + else + `Assoc + [("name", `String str) + ;("configuration", `Assoc [("separator", `String sep)])] let of_yojson x = match - Util.get_name x, - Yojson.Safe.Util.(member "configuration" x |> to_assoc) + Util.get_name x, Yojson.Safe.Util.member "configuration" x with - | "default", [("separator", `String "/")] -> - Ok {name = Default; sep = "/"} - | "default", [("separator", `String ".")] -> - Ok {name = Default; sep = "."} - | "v2", [("separator", `String "/")] -> - Ok {name = V2; sep = "/"} - | "v2", [("separator", `String ".")] -> - Ok {name = V2; sep = "."} + | "default", `Null -> + Ok {name = Default; sep = "/"; is_default = true} + | "default", `Assoc [("separator", `String "/")] -> + Ok {name = Default; sep = "/"; is_default = false} + | "default", `Assoc [("separator", `String ".")] -> + Ok {name = Default; sep = "."; is_default = false} + | "v2", `Null -> + Ok {name = V2; sep = "."; is_default = true} + | "v2", `Assoc [("separator", `String "/")] -> + Ok {name = V2; sep = "/"; is_default = false} + | "v2", `Assoc [("separator", `String ".")] -> + Ok {name = V2; sep = "."; is_default = false} | _ -> Error "Invalid chunk key encoding configuration." end diff --git a/test/test_metadata.ml b/test/test_metadata.ml index 99abce2a..e1d6dfc8 100644 --- a/test/test_metadata.ml +++ b/test/test_metadata.ml @@ -183,8 +183,7 @@ let test_encode_decode_fill_value fv = "fill_value": %s, "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": [100, 10]}}, - "chunk_key_encoding": - {"name": "default", "configuration": {"separator": "."}}, + "chunk_key_encoding": {"name": "default"}, "attributes": {"question": 7}}|} fv in let expected = Yojson.Safe.from_string str in @@ -267,8 +266,7 @@ let array = [ "zarr_format": 3, "shape": [10000, 1000], "data_type": "float64", - "chunk_key_encoding": - {"name": "v2", "configuration": {"separator": "."}}, + "chunk_key_encoding": {"name": "v2"}, "codecs": [ {"name": "bytes", "configuration": {"endian": "big"}}], "fill_value": "0x7fc00000", @@ -340,8 +338,7 @@ let array = [ "node_type": "array", "shape": [10000, 1000], "data_type": "float64", - "chunk_key_encoding": - {"name": "v2", "configuration": {"separator": "."}}, + "chunk_key_encoding": {"name": "v2"}, "codecs": {"name": "bytes", "configuration": {"endian": "big"}}, "fill_value": "0x7fc00000", @@ -469,6 +466,34 @@ let array = [ ~str:(template {|"UNKNOWN"|} {|[2, 4]|}) ~msg:"Invalid Chunk grid name or configuration."; + (* test if decoding a chunk key encoding field without a configuration + leads to a default value being used. *) + let str = {|{ + "zarr_format": 3, + "shape": [10000, 1000], + "node_type": "array", + "data_type": "float64", + "codecs": [ + {"name": "bytes", "configuration": {"endian": "big"}}], + "fill_value": "0x7fc00000", + "chunk_grid": + {"name": "regular", "configuration": {"chunk_shape": [10, 10]}}, + "chunk_key_encoding": {"name": "v2"}}|} in + (match ArrayMetadata.of_yojson @@ Yojson.Safe.from_string str with + | Ok meta -> + (* we except it to use the default "." separator. *) + assert_equal + ~printer:Fun.id "2.0.1" @@ ArrayMetadata.chunk_key meta [|2; 0; 1|]; + (* we expect the default (unspecified) config seperator to be + dropped when serializing the metadata to JSON format. *) + assert_equal + ~printer:Fun.id + Yojson.Safe.(from_string str |> to_string) @@ + ArrayMetadata.encode meta; + | Error _ -> + assert_failure + "Decoding a well formed array JSON metadata should not fail."); + (* test if the decoding fails if chunk key encoding contains unknown * separator or name. *) let str = {|{