From 8f21a53db7a2b12fc2850531339abfb955ef7537 Mon Sep 17 00:00:00 2001 From: Zolisa Bleki Date: Fri, 12 Jul 2024 14:43:08 +0200 Subject: [PATCH] Remove compute_encoded_size from public API. This function need not be exposed to the user interface. --- lib/codecs/codecs.ml | 17 ++--------------- lib/codecs/codecs.mli | 8 ++------ test/test_codecs.ml | 42 ++++++++++++++++++++++-------------------- 3 files changed, 26 insertions(+), 41 deletions(-) diff --git a/lib/codecs/codecs.ml b/lib/codecs/codecs.ml index c1696097..e2c95ed9 100644 --- a/lib/codecs/codecs.ml +++ b/lib/codecs/codecs.ml @@ -19,9 +19,9 @@ type bytestobytes = type arraytobytes = [ `Bytes of endianness - | `ShardingIndexed of sharding_config ] + | `ShardingIndexed of shard_config ] -and sharding_config = +and shard_config = {chunk_shape : int array ;codecs : bytestobytes shard_chain ;index_codecs : fixed_bytestobytes shard_chain @@ -95,19 +95,6 @@ module Chain = struct let default : t = {a2a = []; a2b = ArrayToBytes.default; b2b = []} - let compute_encoded_size : int -> t -> int = fun input_size t -> - List.fold_left BytesToBytes.compute_encoded_size - (ArrayToBytes.compute_encoded_size - (List.fold_left ArrayToArray.compute_encoded_size - input_size t.a2a) t.a2b) - (List.map - (function - | Any Crc32c -> Crc32c - | Any _ -> - let msg = - "Cannot compute encoded size for variable-size codecs." - in failwith msg) t.b2b) - let encode : type a b. t -> (a, b) Ndarray.t -> (string, [> error ]) result = fun t x -> diff --git a/lib/codecs/codecs.mli b/lib/codecs/codecs.mli index 848c8d3c..c4cb3166 100644 --- a/lib/codecs/codecs.mli +++ b/lib/codecs/codecs.mli @@ -37,10 +37,10 @@ type loc = Start | End (** The type of [array -> bytes] codecs. *) type arraytobytes = [ `Bytes of endianness - | `ShardingIndexed of sharding_config ] + | `ShardingIndexed of shard_config ] (** A type representing the Sharding indexed codec's configuration parameters. *) -and sharding_config = +and shard_config = {chunk_shape : int array ;codecs : bytestobytes shard_chain ;index_codecs : fixed_bytestobytes shard_chain @@ -85,10 +85,6 @@ module Chain : sig the required codecs as defined in the Zarr Version 3 specification. *) val default : t - (** [compute_encoded_size init t] returns the size (in bytes) of the - encoded byte string given the size [init] of its decoded representation. *) - val compute_encoded_size : int -> t -> int - (** [encode t x] computes the encoded byte string representation of array chunk [x]. Returns an error upon failure. *) val encode : diff --git a/test/test_codecs.ml b/test/test_codecs.ml index f2c7a655..d9e256bd 100644 --- a/test/test_codecs.ml +++ b/test/test_codecs.ml @@ -65,24 +65,6 @@ let tests = [ let c = Chain.create decoded_repr chain in assert_bool "" @@ Result.is_ok c; let c = Result.get_ok c in - assert_raises - ~msg:"Encoded size cannot be computed for compression codecs." - (Failure "Cannot compute encoded size for variable-size codecs.") - (fun () -> Chain.compute_encoded_size 0 c); - - let c' = - Result.get_ok @@ - Chain.create decoded_repr {chain with b2b = [`Crc32c]} - in - let init_size = - (Array.fold_left Int.mul 1 decoded_repr.shape) * - Bigarray.kind_size_in_bytes decoded_repr.kind - in - assert_equal - ~printer:string_of_int - (init_size + 4 + 4) @@ (* 2 crc32c codecs *) - Chain.compute_encoded_size init_size c'; - let arr = Ndarray.create decoded_repr.kind @@ -336,7 +318,10 @@ let tests = [ let cfg = {chunk_shape = [|3; 5; 5|] ;index_location = Start - ;index_codecs = {a2a = []; a2b = `Bytes Little; b2b = []} + ;index_codecs = + {a2a = [] + ;a2b = `Bytes Little + ;b2b = [`Crc32c]} ;codecs = {a2a = []; a2b = `Bytes Big; b2b = []}} in let chain = @@ -353,7 +338,7 @@ let tests = [ let chain = {a2a = [] - ;a2b = `ShardingIndexed {cfg with chunk_shape = [|5; 5; 5|]} + ;a2b = `ShardingIndexed {cfg with chunk_shape = [|5; 3; 5|]} ;b2b = []} in let c = Chain.create decoded_repr chain in @@ -380,6 +365,23 @@ let tests = [ assert_failure "Successfully encoded array should decode without fail"); + (* test if including a transpose codec for index_codec chain results in + a failure. *) + let chain' = + {chain with + a2b = `ShardingIndexed + {cfg with + chunk_shape = [|5; 3; 5|] + ;index_codecs = + {cfg.index_codecs with a2a = [`Transpose [|0; 3; 1; 2|]]}}} + in + let cc = Chain.create decoded_repr chain' |> Result.get_ok in + assert_bool + "shard index chain can't be encoded since Owl does not support transposing + Int64 types. See: + https://github.com/owlbarn/owl/issues/671#issuecomment-2211303040" @@ + Result.is_error @@ Chain.encode cc arr; + (* test correctness of decoding nested sharding codecs.*) let str = {|[