diff --git a/lib/storage/base.ml b/lib/storage/base.ml deleted file mode 100644 index 07cc6b3a..00000000 --- a/lib/storage/base.ml +++ /dev/null @@ -1,93 +0,0 @@ -open Interface - -(* general implementation agnostic STORE interface functions *) - -module StrSet = Set.Make (String) - -let erase_values ~erase_fn t keys = - StrSet.iter (erase_fn t) @@ StrSet.of_list keys - -let erase_prefix ~list_fn ~erase_fn t pre = - List.iter (fun k -> - if String.starts_with ~prefix:pre k - then begin - erase_fn t k - end) @@ list_fn t - -let list_prefix ~list_fn t pre = - List.filter - (String.starts_with ~prefix:pre) - (list_fn t) - -let list_dir ~list_fn t pre = - let paths = - List.map - (fun k -> - Result.get_ok @@ - Node.of_path @@ - String.cat "/" k) - (list_prefix ~list_fn t pre) - in - let is_prefix_child k = - match Node.parent k with - | Some par -> - String.equal pre @@ Node.to_prefix par - | None -> false in - let keys, rest = - List.partition_map (fun k -> - match is_prefix_child k with - | true -> Either.left @@ Node.to_key k - | false -> Either.right k) - paths - in - let prefixes = - List.fold_left (fun acc k -> - match - List.find_opt - is_prefix_child - (Node.ancestors k) - with - | None -> acc - | Some v -> - let w = Node.to_prefix v in - if List.mem w acc then acc - else w :: acc) - [] rest - in - keys, prefixes - -let rec get_partial_values ~get_fn t kr_pairs = - match kr_pairs with - | [] -> [None] - | (k, r) :: xs -> - match get_fn t k with - | Error _ -> - None :: (get_partial_values ~get_fn t xs) - | Ok v -> - try - let sub = match r with - | ByteRange (rs, None) -> - String.sub v rs @@ String.length v - | ByteRange (rs, Some rl) -> - String.sub v rs rl in - Some sub :: (get_partial_values ~get_fn t xs) - with - | Invalid_argument _ -> - None :: (get_partial_values ~get_fn t xs) - -let rec set_partial_values ~set_fn ~get_fn t = function - | [] -> Ok () - | (k, rs, v) :: xs -> - match get_fn t k with - | Error _ -> - set_fn t k v; - set_partial_values ~set_fn ~get_fn t xs - | Ok ov -> - try - let ov' = Bytes.of_string ov in - String.(length v |> blit v 0 ov' rs); - set_fn t k @@ Bytes.to_string ov'; - set_partial_values ~set_fn ~get_fn t xs - with - | Invalid_argument s -> - Error (`Invalid_byte_range s) diff --git a/lib/storage/filesystem.ml b/lib/storage/filesystem.ml index 525b4527..823b6d72 100644 --- a/lib/storage/filesystem.ml +++ b/lib/storage/filesystem.ml @@ -39,7 +39,7 @@ module Impl = struct (fun oc -> Out_channel.output_string oc value) let list t = - let module StrSet = Base.StrSet in + let module StrSet = Storage_intf.Base.StrSet in let rec aux acc path = try match Sys.readdir path with @@ -70,22 +70,26 @@ module Impl = struct | Sys_error _ -> () let get_partial_values t kr_pairs = - Base.get_partial_values ~get_fn:get t kr_pairs + Storage_intf.Base.get_partial_values + ~get_fn:get t kr_pairs let set_partial_values t krv_triplet = - Base.set_partial_values ~set_fn:set ~get_fn:get t krv_triplet + Storage_intf.Base.set_partial_values + ~set_fn:set ~get_fn:get t krv_triplet let erase_values t keys = - Base.erase_values ~erase_fn:erase t keys + Storage_intf.Base.erase_values + ~erase_fn:erase t keys let erase_prefix t pre = - Base.erase_prefix ~list_fn:list ~erase_fn:erase t pre + Storage_intf.Base.erase_prefix + ~list_fn:list ~erase_fn:erase t pre let list_prefix pre t = - Base.list_prefix ~list_fn:list t pre + Storage_intf.Base.list_prefix ~list_fn:list t pre let list_dir t pre = - Base.list_dir ~list_fn:list t pre + Storage_intf.Base.list_dir ~list_fn:list t pre end let create ?(file_perm=0o640) path = diff --git a/lib/storage/interface.ml b/lib/storage/interface.ml deleted file mode 100644 index 93216d78..00000000 --- a/lib/storage/interface.ml +++ /dev/null @@ -1,349 +0,0 @@ -open Metadata -open Util.Result_syntax - -type key = string -type range = ByteRange of int * int option - -type error = - [ `Store_read of string - | `Invalid_slice of string - | `Invalid_kind of string - | `Reshape_error of string - | `Invalid_byte_range of string - | Codecs.error - | Metadata.error ] - -module type STORE = sig - type t - val get : t -> key -> (string, [> error]) result - val get_partial_values : t -> (key * range) list -> string option list - val set : t -> key -> string -> unit - val set_partial_values : t -> (key * int * string) list -> (unit, [> error]) result - val erase : t -> key -> unit - val erase_values : t -> key list -> unit - val erase_prefix : t -> key -> unit - val list : t -> key list - val list_prefix : key -> t -> key list - val list_dir : t -> key -> key list * string list - val is_member : t -> key -> bool -end - -module Ndarray = Owl.Dense.Ndarray.Generic - -module type S = sig - type t - - val create_group - : ?metadata:GroupMetadata.t -> t -> Node.t -> unit - - val create_array - : ?sep:Extensions.separator -> - ?dimension_names:string option list -> - ?attributes:Yojson.Safe.t -> - ?codecs:Codecs.chain -> - shape:int array -> - chunks:int array -> - ('a, 'b) Bigarray.kind -> - 'a -> - Node.t -> - t -> - (unit, [> error]) result - - val array_metadata - : Node.t -> t -> (ArrayMetadata.t, [> error]) result - - val group_metadata - : Node.t -> t -> (GroupMetadata.t, [> error]) result - - val find_child_nodes - : t -> Node.t -> (Node.t list * Node.t list, string) result - - val find_all_nodes : t -> Node.t list - - val erase_node : t -> Node.t -> unit - - val is_member : t -> Node.t -> bool - - val set_array - : Node.t -> - Owl_types.index array -> - ('a, 'b) Ndarray.t -> - t -> - (unit, [> error]) result - - val get_array - : Node.t -> - Owl_types.index array -> - ('a, 'b) Bigarray.kind -> - t -> - (('a, 'b) Ndarray.t, [> error]) result - - val reshape - : t -> Node.t -> int array -> (unit, [> error]) result -end - -module Make (M : STORE) : S with type t = M.t = struct - module ArraySet = Util.ArraySet - module Arraytbl = Util.Arraytbl - module AM = ArrayMetadata - module GM = GroupMetadata - include M - - (* All nodes are explicit upon creation so just check the node's metadata key.*) - let is_member t node = - M.is_member t @@ Node.to_metakey node - - let rec create_group ?metadata t node = - if is_member t node then () - else - (match metadata, Node.to_metakey node with - | Some m, k -> set t k @@ GM.encode m; - | None, k -> set t k @@ GM.(default |> encode)); - make_implicit_groups_explicit t @@ Node.parent node - - and make_implicit_groups_explicit t = function - | None -> () - | Some n -> create_group t n - - let create_array - ?(sep=Extensions.Slash) - ?(dimension_names=[]) - ?(attributes=`Null) - ?codecs - ~shape - ~chunks - kind - fill_value - node - t - = - let open Util in - let repr = {kind; fill_value; shape = chunks} in - (match codecs with - | Some c -> Codecs.Chain.create repr c - | None -> Ok Codecs.Chain.default) - >>= fun codecs -> - let meta = - AM.create - ~sep - ~codecs - ~dimension_names - ~attributes - ~shape - kind - fill_value - chunks - in - set t (Node.to_metakey node) (AM.encode meta); - Ok (make_implicit_groups_explicit t @@ Node.parent node) - - (* Assumes without checking that [metakey] is a valid node metadata key.*) - let unsafe_node_type t metakey = - let open Yojson.Safe in - get t metakey |> Result.get_ok |> from_string - |> Util.member "node_type" |> Util.to_string - - let get_metadata node t = - match is_member t node, Node.to_metakey node with - | true, k when unsafe_node_type t k = "array" -> - get t k >>= fun bytes -> - AM.decode bytes >>= fun meta -> - Ok (Either.left meta) - | true, k -> - get t k >>= fun bytes -> - GM.decode bytes >>= fun meta -> - Ok (Either.right meta) - | false, _ -> - Result.error @@ - `Store_read (Node.show node ^ " is not a store member.") - - let group_metadata node t = - match get_metadata node t with - | Ok x when Either.is_right x -> - Ok (Either.find_right x |> Option.get) - | Ok _ -> - Result.error @@ - `Store_read (Node.show node ^ " is not a group node.") - | Error _ as err -> err - - let array_metadata node t = - match get_metadata node t with - | Ok x when Either.is_left x -> - Ok (Either.find_left x |> Option.get) - | Ok _ -> - Result.error @@ - `Store_read (Node.show node ^ " is not an array node.") - | Error _ as err -> err - - let find_child_nodes t node = - match is_member t node, Node.to_metakey node with - | true, k when unsafe_node_type t k = "group" -> - Result.ok @@ - List.fold_left - (fun (lacc, racc) pre -> - let x = - Result.get_ok @@ (* this operation should not fail *) - Node.of_path @@ - "/" ^ String.(length pre - 1 |> sub pre 0) - in - if unsafe_node_type t (pre ^ "zarr.json") = "array" then - x :: lacc, racc - else - lacc, x :: racc) - ([], []) (snd @@ list_dir t @@ Node.to_prefix node) - | true, _ -> - Error (Node.show node ^ " is not a group node.") - | false, _ -> - Error (Node.show node ^ " is not a node in this heirarchy.") - - let find_all_nodes t = - let rec aux acc p = - match find_child_nodes t p with - | Error _ -> acc - | Ok ([], []) -> p :: acc - | Ok (arrays, groups) -> - arrays @ p :: List.concat_map (aux acc) groups - in aux [] Node.root - - let erase_node t node = - erase_prefix t @@ Node.to_prefix node - - let set_array - : type a b. - Node.t -> - Owl_types.index array -> - (a, b) Ndarray.t -> - t -> - (unit, [> error]) result - = fun node slice x t -> - let open Util in - get t @@ Node.to_metakey node >>= fun bytes -> - AM.decode bytes >>= fun meta -> - (if Ndarray.shape x = Indexing.slice_shape slice @@ AM.shape meta then - Ok () - else - Error (`Invalid_slice "slice and input array shapes are unequal.")) - >>= fun () -> - (if AM.is_valid_kind meta @@ Ndarray.kind x then - Ok () - else - Result.error @@ - `Invalid_kind ( - "input array's kind is not compatible with node's data type.")) - >>= fun () -> - let coords = Indexing.coords_of_slice slice @@ AM.shape meta in - let tbl = Arraytbl.create @@ Array.length coords - in - Ndarray.iteri (fun i y -> - let k, c = AM.index_coord_pair meta coords.(i) in - Arraytbl.add tbl k (c, y)) x; - let repr = - {kind = Ndarray.kind x - ;shape = AM.chunk_shape meta - ;fill_value = AM.fillvalue_of_kind meta @@ Ndarray.kind x} - in - let codecs = AM.codecs meta in - let prefix = Node.to_prefix node in - let cindices = ArraySet.of_seq @@ Arraytbl.to_seq_keys tbl in - ArraySet.fold (fun idx acc -> - acc >>= fun () -> - let chunkkey = prefix ^ AM.chunk_key meta idx in - (match get t chunkkey with - | Ok b -> - Codecs.Chain.decode codecs repr b - | Error _ -> - Ok (Ndarray.create repr.kind repr.shape repr.fill_value)) - >>= fun arr -> - (* find_all returns bindings in reverse order. To restore the - * C-ordering of elements we must call List.rev. *) - let coords, vals = - List.split @@ - List.rev @@ - Arraytbl.find_all tbl idx in - let slice' = Indexing.slice_of_coords coords in - let shape' = Indexing.slice_shape slice' repr.shape in - let x' = Ndarray.of_array repr.kind (Array.of_list vals) shape' in - (* Ndarray.set_fancy* unfortunately doesn't work for array kinds - other than Float32, Float64, Complex32 and Complex64. - See: https://github.com/owlbarn/owl/issues/671 *) - Ndarray.set_fancy_ext slice' arr x'; (* possible to rewrite this function? *) - Codecs.Chain.encode codecs arr >>| fun encoded -> - set t chunkkey encoded) cindices (Ok ()) - - let get_array - : type a b. - Node.t -> - Owl_types.index array -> - (a, b) Bigarray.kind -> - t -> - ((a, b) Ndarray.t, [> error]) result - = fun node slice kind t -> - let open Util in - get t @@ Node.to_metakey node >>= fun bytes -> - AM.decode bytes >>= fun meta -> - (if AM.is_valid_kind meta kind then - Ok () - else - Result.error @@ - `Invalid_kind ("input kind is not compatible with node's data type.")) - >>= fun () -> - (try - Ok (Indexing.slice_shape slice @@ AM.shape meta) - with - | Assert_failure _ -> - Result.error @@ - `Store_read "slice shape is not compatible with node's shape.") - >>= fun sshape -> - let pair = - Array.map - (AM.index_coord_pair meta) - (Indexing.coords_of_slice slice @@ AM.shape meta) in - let tbl = Arraytbl.create @@ Array.length pair in - let prefix = Node.to_prefix node in - let chain = AM.codecs meta in - let repr = - {kind - ;shape = AM.chunk_shape meta - ;fill_value = AM.fillvalue_of_kind meta kind} - in - Array.fold_right (fun (idx, coord) acc -> - acc >>= fun l -> - match Arraytbl.find_opt tbl idx with - | Some arr -> - Ok (Ndarray.get arr coord :: l) - | None -> - (match get t @@ prefix ^ AM.chunk_key meta idx with - | Ok b -> - Codecs.Chain.decode chain repr b - | Error _ -> - Ok (Ndarray.create repr.kind repr.shape repr.fill_value)) - >>= fun arr -> - Arraytbl.add tbl idx arr; - Ok (Ndarray.get arr coord :: l)) pair (Ok []) - >>| fun res -> - Ndarray.of_array kind (Array.of_list res) sshape - - let reshape t node shape = - let mkey = Node.to_metakey node in - (if "array" = unsafe_node_type t mkey then - Ok () - else - Error (`Reshape_error (Node.show node ^ " is not an array node."))) - >>= fun () -> - get t mkey >>= fun bytes -> - AM.decode bytes >>= fun meta -> - (if Array.length shape = Array.length @@ AM.shape meta then - Ok () - else - Error (`Reshape_error "new shape must have same number of dimensions.")) - >>= fun () -> - let pre = Node.to_prefix node in - let s = - ArraySet.of_list @@ AM.chunk_indices meta @@ AM.shape meta in - let s' = - ArraySet.of_list @@ AM.chunk_indices meta shape in - ArraySet.iter - (fun v -> erase t @@ pre ^ AM.chunk_key meta v) - ArraySet.(diff s s'); - Ok (set t mkey @@ AM.encode @@ AM.update_shape meta shape) -end diff --git a/lib/storage/memory.ml b/lib/storage/memory.ml index 7578eb4a..d68ad938 100644 --- a/lib/storage/memory.ml +++ b/lib/storage/memory.ml @@ -6,6 +6,8 @@ end module StrMap = Hashtbl.Make (HashableString) +let create () = StrMap.create 16 + module Impl = struct type t = string StrMap.t @@ -32,19 +34,19 @@ module Impl = struct Some v) t let get_partial_values t kr_pairs = - Base.get_partial_values ~get_fn:get t kr_pairs + Storage_intf.Base.get_partial_values + ~get_fn:get t kr_pairs let set_partial_values t krv_triplet = - Base.set_partial_values ~set_fn:set ~get_fn:get t krv_triplet + Storage_intf.Base.set_partial_values + ~set_fn:set ~get_fn:get t krv_triplet let erase_values t keys = - Base.erase_values ~erase_fn:erase t keys + Storage_intf.Base.erase_values ~erase_fn:erase t keys let list_prefix pre t = - Base.list_prefix ~list_fn:list t pre + Storage_intf.Base.list_prefix ~list_fn:list t pre let list_dir t pre = - Base.list_dir ~list_fn:list t pre + Storage_intf.Base.list_dir ~list_fn:list t pre end - -let create () = StrMap.create 16 diff --git a/lib/storage/storage.ml b/lib/storage/storage.ml index 19106877..78a4cac5 100644 --- a/lib/storage/storage.ml +++ b/lib/storage/storage.ml @@ -1,15 +1,276 @@ -type error = Interface.error +include Storage_intf -module type S = Interface.S +open Util.Result_syntax + +module Make (M : STORE) : S with type t = M.t = struct + module Ndarray = Owl.Dense.Ndarray.Generic + module ArraySet = Util.ArraySet + module Arraytbl = Util.Arraytbl + module AM = Metadata.ArrayMetadata + module GM = Metadata.GroupMetadata + include M + + (* All nodes are explicit upon creation so just check the node's metadata key.*) + let is_member t node = + M.is_member t @@ Node.to_metakey node + + let rec create_group ?metadata t node = + if is_member t node then () + else + (match metadata, Node.to_metakey node with + | Some m, k -> set t k @@ GM.encode m; + | None, k -> set t k @@ GM.(default |> encode)); + make_implicit_groups_explicit t @@ Node.parent node + + and make_implicit_groups_explicit t = function + | None -> () + | Some n -> create_group t n + + let create_array + ?(sep=Extensions.Slash) + ?(dimension_names=[]) + ?(attributes=`Null) + ?codecs + ~shape + ~chunks + kind + fill_value + node + t + = + let open Util in + let repr = {kind; fill_value; shape = chunks} in + (match codecs with + | Some c -> Codecs.Chain.create repr c + | None -> Ok Codecs.Chain.default) + >>= fun codecs -> + let meta = + AM.create + ~sep + ~codecs + ~dimension_names + ~attributes + ~shape + kind + fill_value + chunks + in + set t (Node.to_metakey node) (AM.encode meta); + Ok (make_implicit_groups_explicit t @@ Node.parent node) + + (* Assumes without checking that [metakey] is a valid node metadata key.*) + let unsafe_node_type t metakey = + let open Yojson.Safe in + get t metakey |> Result.get_ok |> from_string + |> Util.member "node_type" |> Util.to_string + + let group_metadata node t = + if not @@ is_member t node then + Result.error @@ + `Store_read (Node.show node ^ " is not a member of this store.") + else + get t @@ Node.to_metakey node >>= fun bytes -> + match GM.decode bytes with + | Ok meta -> Ok meta + | Error _ -> + Result.error @@ + `Store_read (Node.show node ^ " is not an array node.") + + let array_metadata node t = + if not @@ is_member t node then + Result.error @@ + `Store_read (Node.show node ^ " is not a member of this store.") + else + get t @@ Node.to_metakey node >>= fun bytes -> + match AM.decode bytes with + | Ok meta -> Ok meta + | Error _ -> + Result.error @@ + `Store_read (Node.show node ^ " is not an array node.") + + let find_child_nodes t node = + match is_member t node, Node.to_metakey node with + | true, k when unsafe_node_type t k = "group" -> + Result.ok @@ + List.fold_left + (fun (lacc, racc) pre -> + let x = + Result.get_ok @@ (* this operation should not fail *) + Node.of_path @@ + "/" ^ String.(length pre - 1 |> sub pre 0) + in + if unsafe_node_type t (pre ^ "zarr.json") = "array" then + x :: lacc, racc + else + lacc, x :: racc) + ([], []) (snd @@ list_dir t @@ Node.to_prefix node) + | true, _ -> + Result.error @@ + `Store_read (Node.show node ^ " is not a group node.") + | false, _ -> + Result.error @@ + `Store_read (Node.show node ^ " is not a node in this heirarchy.") + + let find_all_nodes t = + let rec aux acc p = + match find_child_nodes t p with + | Error _ -> acc + | Ok ([], []) -> p :: acc + | Ok (arrays, groups) -> + arrays @ p :: List.concat_map (aux acc) groups + in aux [] Node.root + + let erase_node t node = + erase_prefix t @@ Node.to_prefix node + + let set_array + : type a b. + Node.t -> + Owl_types.index array -> + (a, b) Ndarray.t -> + t -> + (unit, [> error]) result + = fun node slice x t -> + let open Util in + get t @@ Node.to_metakey node >>= fun bytes -> + AM.decode bytes >>= fun meta -> + (if Ndarray.shape x = Indexing.slice_shape slice @@ AM.shape meta then + Ok () + else + Error (`Store_write "slice and input array shapes are unequal.")) + >>= fun () -> + (if AM.is_valid_kind meta @@ Ndarray.kind x then + Ok () + else + Result.error @@ + `Store_write ( + "input array's kind is not compatible with node's data type.")) + >>= fun () -> + let coords = Indexing.coords_of_slice slice @@ AM.shape meta in + let tbl = Arraytbl.create @@ Array.length coords + in + Ndarray.iteri (fun i y -> + let k, c = AM.index_coord_pair meta coords.(i) in + Arraytbl.add tbl k (c, y)) x; + let repr = + {kind = Ndarray.kind x + ;shape = AM.chunk_shape meta + ;fill_value = AM.fillvalue_of_kind meta @@ Ndarray.kind x} + in + let codecs = AM.codecs meta in + let prefix = Node.to_prefix node in + let cindices = ArraySet.of_seq @@ Arraytbl.to_seq_keys tbl in + ArraySet.fold (fun idx acc -> + acc >>= fun () -> + let chunkkey = prefix ^ AM.chunk_key meta idx in + (match get t chunkkey with + | Ok b -> + Codecs.Chain.decode codecs repr b + | Error _ -> + Ok (Ndarray.create repr.kind repr.shape repr.fill_value)) + >>= fun arr -> + (* find_all returns bindings in reverse order. To restore the + * C-ordering of elements we must call List.rev. *) + let coords, vals = + List.split @@ + List.rev @@ + Arraytbl.find_all tbl idx in + let slice' = Indexing.slice_of_coords coords in + let shape' = Indexing.slice_shape slice' repr.shape in + let x' = Ndarray.of_array repr.kind (Array.of_list vals) shape' in + (* Ndarray.set_fancy* unfortunately doesn't work for array kinds + other than Float32, Float64, Complex32 and Complex64. + See: https://github.com/owlbarn/owl/issues/671 *) + Ndarray.set_fancy_ext slice' arr x'; (* possible to rewrite this function? *) + Codecs.Chain.encode codecs arr >>| fun encoded -> + set t chunkkey encoded) cindices (Ok ()) + + let get_array + : type a b. + Node.t -> + Owl_types.index array -> + (a, b) Bigarray.kind -> + t -> + ((a, b) Ndarray.t, [> error]) result + = fun node slice kind t -> + let open Util in + get t @@ Node.to_metakey node >>= fun bytes -> + AM.decode bytes >>= fun meta -> + (if AM.is_valid_kind meta kind then + Ok () + else + Result.error @@ + `Store_read ("input kind is not compatible with node's data type.")) + >>= fun () -> + (try + Ok (Indexing.slice_shape slice @@ AM.shape meta) + with + | Assert_failure _ -> + Result.error @@ + `Store_read "slice shape is not compatible with node's shape.") + >>= fun sshape -> + let pair = + Array.map + (AM.index_coord_pair meta) + (Indexing.coords_of_slice slice @@ AM.shape meta) in + let tbl = Arraytbl.create @@ Array.length pair in + let prefix = Node.to_prefix node in + let chain = AM.codecs meta in + let repr = + {kind + ;shape = AM.chunk_shape meta + ;fill_value = AM.fillvalue_of_kind meta kind} + in + Array.fold_right (fun (idx, coord) acc -> + acc >>= fun l -> + match Arraytbl.find_opt tbl idx with + | Some arr -> + Ok (Ndarray.get arr coord :: l) + | None -> + (match get t @@ prefix ^ AM.chunk_key meta idx with + | Ok b -> + Codecs.Chain.decode chain repr b + | Error _ -> + Ok (Ndarray.create repr.kind repr.shape repr.fill_value)) + >>= fun arr -> + Arraytbl.add tbl idx arr; + Ok (Ndarray.get arr coord :: l)) pair (Ok []) + >>| fun res -> + Ndarray.of_array kind (Array.of_list res) sshape + + let reshape t node shape = + let mkey = Node.to_metakey node in + (if "array" = unsafe_node_type t mkey then + Ok () + else + Error (`Store_write (Node.show node ^ " is not an array node."))) + >>= fun () -> + get t mkey >>= fun bytes -> + AM.decode bytes >>= fun meta -> + (if Array.length shape = Array.length @@ AM.shape meta then + Ok () + else + Error (`Store_write "new shape must have same number of dimensions.")) + >>= fun () -> + let pre = Node.to_prefix node in + let s = + ArraySet.of_list @@ AM.chunk_indices meta @@ AM.shape meta in + let s' = + ArraySet.of_list @@ AM.chunk_indices meta shape in + ArraySet.iter + (fun v -> erase t @@ pre ^ AM.chunk_key meta v) + ArraySet.(diff s s'); + Ok (set t mkey @@ AM.encode @@ AM.update_shape meta shape) +end module MemoryStore = struct - module MS = Interface.Make (Memory.Impl) + module MS = Make (Memory.Impl) let create = Memory.create include MS end module FilesystemStore = struct - module FS = Interface.Make (Filesystem.Impl) + module FS = Make (Filesystem.Impl) let create = Filesystem.create let open_store = Filesystem.open_store let open_or_create = Filesystem.open_or_create diff --git a/lib/storage/storage.mli b/lib/storage/storage.mli index 2edfbca2..723068d0 100644 --- a/lib/storage/storage.mli +++ b/lib/storage/storage.mli @@ -1,18 +1,32 @@ -type error = Interface.error - -module type S = Interface.S +include Storage_intf.Interface module MemoryStore : sig + (** An in-memory storage backend for Zarr V3 hierarchy. *) + include S + val create : unit -> t + (** [create ()] returns a new In-memory Zarr V3 store. *) end module FilesystemStore : sig + (** A local filesystem storage backend for a Zarr V3 hierarchy. *) + include S + val create : ?file_perm:Unix.file_perm -> string -> t + (** [create ~file_perm path] returns a new filesystem Zarr V3 store. This + * operatioin fails if [path] is a directory that already exists. *) + val open_store - : ?file_perm:Unix.file_perm -> string -> (t, [> error]) result + : ?file_perm:Unix.file_perm -> string -> (t, error) result + (** [open_store ~file_perm path] returns an existing filesystem Zarr V3 store. + * This operatioin fails if [path] is not a Zarr store path. *) + val open_or_create - : ?file_perm:Unix.file_perm -> string -> (t, [> error]) result + : ?file_perm:Unix.file_perm -> string -> (t, error) result + (** [open_or_create ~file_perm path] returns an existing filesystem store + * and creates it if it does not exist at path [path]. See documentation + * for {!open_store} and {!create} for more information. *) end diff --git a/lib/storage/storage_intf.ml b/lib/storage/storage_intf.ml new file mode 100644 index 00000000..8b2eccdd --- /dev/null +++ b/lib/storage/storage_intf.ml @@ -0,0 +1,268 @@ +open Metadata + +type key = string + +type range = ByteRange of int * int option + +type error = + [ `Store_read of string + | `Store_write of string + | Metadata.error + | Codecs.error ] + +module type STORE = sig + (** The abstract STORE interface that stores should implement. + + The store interface defines a set of operations involving keys and values. + In the context of this interface, a key is a Unicode string, where the final + character is not a / character. In general, a value is a sequence of bytes. + Specific stores may choose more specific storage formats, which must be + stated in the specification of the respective store. + + It is assumed that the store holds (key, value) pairs, with only one + such pair for any given key. I.e., a store is a mapping from keys to + values. It is also assumed that keys are case sensitive, i.e., the keys + “foo” and “FOO” are different. The store interface also defines some + operations involving prefixes. In the context of this interface, + a prefix is a string containing only characters that are valid for use + in keys and ending with a trailing / character. *) + + type t + val get : t -> key -> (string, [> error]) result + val get_partial_values : t -> (key * range) list -> string option list + val set : t -> key -> string -> unit + val set_partial_values : t -> (key * int * string) list -> (unit, [> error]) result + val erase : t -> key -> unit + val erase_values : t -> key list -> unit + val erase_prefix : t -> key -> unit + val list : t -> key list + val list_prefix : key -> t -> key list + val list_dir : t -> key -> key list * string list + val is_member : t -> key -> bool +end + +module Ndarray = Owl.Dense.Ndarray.Generic + +module type S = sig + type t + (** The storage type. *) + + val create_group + : ?metadata:GroupMetadata.t -> t -> Node.t -> unit + (** [create_group ~meta t node] creates a group node in store [t] + containing metadata [meta]. This is a no-op if a node [node] + is already a member of this store. *) + + val create_array + : ?sep:Extensions.separator -> + ?dimension_names:string option list -> + ?attributes:Yojson.Safe.t -> + ?codecs:Codecs.chain -> + shape:int array -> + chunks:int array -> + ('a, 'b) Bigarray.kind -> + 'a -> + Node.t -> + t -> + (unit, [> Codecs.error]) result + (** [create_array ~sep ~dimension_names ~attributes ~codecs ~shape ~chunks kind fill node t] + creates an array node in store [t] where: + - Separator [sep] is used in the array's chunk key encoding. + - Dimension names [dimension_names] and user attributes [attributes] + are included in it's metadata document. + - A codec chain defined by [codecs]. + - The array has shape [shape] and chunk shape [chunks]. + - The array has data kind [kind] and fill value [fv]. + + This operation can fail if the codec chain is not well defined. *) + + val array_metadata + : Node.t -> t -> (ArrayMetadata.t, [> error]) result + (** [array_metadata node t] returns the metadata of array node [node]. + This operation returns an error if: + - The node is not a member of store [t]. + - if node [node] is a group node. *) + + val group_metadata + : Node.t -> t -> (GroupMetadata.t, [> error]) result + (** [group_metadata node t] returns the metadata of group node [node]. + This operation returns an error if: + - The node is not a member of store [t]. + - if node [node] is an array node. *) + + val find_child_nodes + : t -> Node.t -> (Node.t list * Node.t list, [> error]) result + (** [find_child_nodes t n] returns a tuple of child nodes of group node [n]. + The first element of the tuple is a list of array child nodes, and the + second element a list of child group nodes. + This operation can fail if: + - Node [n] is not a member of store [t]. + - Node [n] is an array node of store [t]. *) + + val find_all_nodes : t -> Node.t list + (** [find_all_nodes t] returns a list of all nodes in store [t]. If the + store has no nodes, an empty list is returned. *) + + val erase_node : t -> Node.t -> unit + (** [erase_node t n] erases node [n] from store [t]. This function erases + all child nodes if [n] is a group node. If node [n] is not a member + of store [t] then this is a no-op. *) + + val is_member : t -> Node.t -> bool + (** [is_member t n] returns [true] if node [n] is a member of store [t] + and [false] otherwise. *) + + val set_array + : Node.t -> + Owl_types.index array -> + ('a, 'b) Ndarray.t -> + t -> + (unit, [> error]) result + (** [set_array n s x t] writes n-dimensional array [x] to the slice [s] + of array node [n] in store [t]. This operation fails if: + - the ndarray [x] size does not equal slice [s]. + - the kind of [x] is not compatible with node [n]'s data type as + described in its metadata document. + - If there is a problem decoding/encoding node [n] chunks.*) + + val get_array + : Node.t -> + Owl_types.index array -> + ('a, 'b) Bigarray.kind -> + t -> + (('a, 'b) Ndarray.t, [> error]) result + (** [get_array n s k t] reads an n-dimensional array of size determined + by slice [s] from array node [n]. This operation fails if: + - If there is a problem decoding/encoding node [n] chunks. + - kind [k] is not compatible with node [n]'s data type as described + in its metadata document. + - The slice [s] is not a valid slice of array node [n].*) + + val reshape : t -> Node.t -> int array -> (unit, [> error]) result + (** [reshape t n shape] resizes array node [n] of store [t] into new + size [shape]. If this operation fails, an error is returned. It + can fail if: + - Node [n] is not a valid array node. + - If [shape] does not have the same dimensions as node [n]'s shape. *) +end + +module type MAKER = functor (M : STORE) -> S with type t = M.t + +module type Interface = sig + (** A Zarr store is a system that can be used to store and retrieve data + * from a Zarr hierarchy. For a store to be compatible with this + * specification, it must support a set of operations defined in the + * Abstract store interface {!STORE}. The store interface can be + * implemented using a variety of underlying storage technologies. *) + + type error + (** The error type of supported storage backends. *) + + module type S = S + (** The public interface of all supported stores. *) + + module type STORE = STORE + (** The module interface that all supported stores must implement. *) + + module type MAKER = MAKER + + module Make : MAKER + (** A functor for minting a new storage type as long as it's argument + module implements the {!STORE} interface. *) +end + +module Base = struct + (** general implementation agnostic STORE interface functions. + * To be used as fallback functions for stores that do not + * readily provide implementations for these functions. *) + + module StrSet = Set.Make (String) + + let erase_values ~erase_fn t keys = + StrSet.iter (erase_fn t) @@ StrSet.of_list keys + + let erase_prefix ~list_fn ~erase_fn t pre = + List.iter (fun k -> + if String.starts_with ~prefix:pre k + then begin + erase_fn t k + end) @@ list_fn t + + let list_prefix ~list_fn t pre = + List.filter + (String.starts_with ~prefix:pre) + (list_fn t) + + let list_dir ~list_fn t pre = + let paths = + List.map + (fun k -> + Result.get_ok @@ + Node.of_path @@ + String.cat "/" k) + (list_prefix ~list_fn t pre) + in + let is_prefix_child k = + match Node.parent k with + | Some par -> + String.equal pre @@ Node.to_prefix par + | None -> false in + let keys, rest = + List.partition_map (fun k -> + match is_prefix_child k with + | true -> Either.left @@ Node.to_key k + | false -> Either.right k) + paths + in + let prefixes = + List.fold_left (fun acc k -> + match + List.find_opt + is_prefix_child + (Node.ancestors k) + with + | None -> acc + | Some v -> + let w = Node.to_prefix v in + if List.mem w acc then acc + else w :: acc) + [] rest + in + keys, prefixes + + let rec get_partial_values ~get_fn t kr_pairs = + match kr_pairs with + | [] -> [None] + | (k, r) :: xs -> + match get_fn t k with + | Error _ -> + None :: (get_partial_values ~get_fn t xs) + | Ok v -> + try + let sub = match r with + | ByteRange (rs, None) -> + String.sub v rs @@ String.length v + | ByteRange (rs, Some rl) -> + String.sub v rs rl in + Some sub :: (get_partial_values ~get_fn t xs) + with + | Invalid_argument _ -> + None :: (get_partial_values ~get_fn t xs) + + let rec set_partial_values ~set_fn ~get_fn t = function + | [] -> Ok () + | (k, rs, v) :: xs -> + match get_fn t k with + | Error _ -> + set_fn t k v; + set_partial_values ~set_fn ~get_fn t xs + | Ok ov -> + try + let ov' = Bytes.of_string ov in + String.(length v |> blit v 0 ov' rs); + set_fn t k @@ Bytes.to_string ov'; + set_partial_values ~set_fn ~get_fn t xs + with + | Invalid_argument s -> + Error (`Store_read s) +end