From 88574fd834d7aa7706b424b98207e153a449a54b Mon Sep 17 00:00:00 2001 From: Zolisa Bleki Date: Sat, 13 Jul 2024 21:50:21 +0200 Subject: [PATCH] Apply small optimizations to select storage functions. --- lib/codecs/array_to_bytes.ml | 27 ++++++++++---------------- lib/storage/storage.ml | 37 +++++++++++++++--------------------- 2 files changed, 25 insertions(+), 39 deletions(-) diff --git a/lib/codecs/array_to_bytes.ml b/lib/codecs/array_to_bytes.ml index ca2667e5..73c95631 100644 --- a/lib/codecs/array_to_bytes.ml +++ b/lib/codecs/array_to_bytes.ml @@ -267,18 +267,17 @@ end = struct ;shape = t.chunk_shape} in let cindices = ArraySet.of_seq @@ Arraytbl.to_seq_keys tbl in let buf = Buffer.create @@ Ndarray.size_in_bytes x in - let offset = ref 0L in let coord = idx_shp in ArraySet.fold (fun idx acc -> - acc >>= fun () -> + acc >>= fun offset -> (* find_all returns bindings in reverse order. To restore the * C-ordering of elements we must call List.rev. *) let vals = + Array.of_list @@ + snd @@ + List.split @@ + List.rev @@ Arraytbl.find_all tbl idx - |> List.rev - |> List.split - |> snd - |> Array.of_list in let x' = Ndarray.of_array repr.kind vals t.chunk_shape in encode_chain t.codecs x' >>| fun b -> @@ -286,24 +285,18 @@ end = struct let len = Array.length idx in Array.blit idx 0 coord 0 len; coord.(len) <- 0; - Ndarray.set shard_idx coord !offset; + Ndarray.set shard_idx coord offset; coord.(len) <- 1; let nbytes = Int64.of_int @@ String.length b in Ndarray.set shard_idx coord nbytes; - offset := Int64.add !offset nbytes) cindices (Ok ()) - >>= fun () -> + Int64.add offset nbytes) cindices (Ok 0L) + >>= fun _ -> (* convert t.index_codecs to a generic bytes-to-bytes chain. *) encode_chain (t.index_codecs :> bytestobytes internal_chain) shard_idx >>| fun b' -> match t.index_location with - | Start -> - let buf' = Buffer.create @@ String.length b' in - Buffer.add_string buf' b'; - Buffer.add_buffer buf' buf; - Buffer.contents buf' - | End -> - Buffer.add_string buf b'; - Buffer.contents buf + | Start -> b' ^ Buffer.contents buf + | End -> Buffer.(add_string buf b'; contents buf) let rec decode_chain : type a b. diff --git a/lib/storage/storage.ml b/lib/storage/storage.ml index e6274e0f..67e3a938 100644 --- a/lib/storage/storage.ml +++ b/lib/storage/storage.ml @@ -80,32 +80,27 @@ module Make (M : STORE) : S with type t = M.t = struct let find_child_nodes t node = List.fold_left - (fun (lacc, racc) pre -> + (fun (l, r) pre -> let p = "/" ^ String.(length pre - 1 |> sub pre 0) in if unsafe_node_type t (pre ^ "zarr.json") = "array" then - let x = Result.get_ok @@ ArrayNode.of_path p in - x :: lacc, racc + (Result.get_ok @@ ArrayNode.of_path p) :: l, r else - let x = Result.get_ok @@ GroupNode.of_path p in - lacc, x :: racc) + l, (Result.get_ok @@ GroupNode.of_path p) :: r) ([], []) (snd @@ list_dir t @@ GroupNode.to_prefix node) let find_all_nodes t = - let keys = - List.filter - (String.ends_with ~suffix:"/zarr.json") - (list_prefix "" t) in - let a, g = + match List.fold_left - (fun (lacc, racc) key -> - let p = "/" ^ String.(length key - 10 |> sub key 0) in - if unsafe_node_type t key = "array" then - (Result.get_ok @@ ArrayNode.of_path p) :: lacc, racc - else - lacc, (Result.get_ok @@ GroupNode.of_path p) :: racc) - ([], []) keys in - match a, g with - | [], [] -> a, g + (fun ((l, r) as acc) key -> + if String.ends_with ~suffix:"/zarr.json" key then + let p = "/" ^ String.(length key - 10 |> sub key 0) in + if unsafe_node_type t key = "array" then + (Result.get_ok @@ ArrayNode.of_path p) :: l, r + else + l, (Result.get_ok @@ GroupNode.of_path p) :: r + else acc) ([], []) (list_prefix "" t) + with + | [], [] as xs -> xs | l, r -> l, GroupNode.root :: r let erase_group_node t node = @@ -114,9 +109,7 @@ module Make (M : STORE) : S with type t = M.t = struct let erase_array_node t node = erase t @@ ArrayNode.to_metakey node - let erase_all_nodes t = - (* [erase_prefix t ""] is surely faster? *) - erase_values t @@ list_prefix "" t + let erase_all_nodes t = erase_prefix t "" let set_array : type a b.