Skip to content

Commit

Permalink
Refactor codecs module.
Browse files Browse the repository at this point in the history
This commit joins multiple files under the codecs directory into a
single `codecs.ml` file
  • Loading branch information
zoj613 committed Dec 22, 2024
1 parent 94822ca commit 884eec4
Show file tree
Hide file tree
Showing 14 changed files with 966 additions and 1,136 deletions.
830 changes: 830 additions & 0 deletions zarr/src/codecs.ml

Large diffs are not rendered by default.

135 changes: 135 additions & 0 deletions zarr/src/codecs.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
(** An array has an associated list of codecs. Each codec specifies a
bidirectional transform (an encode transform and a decode transform).
This module contains building blocks for creating and working with
a chain of codecs. *)

exception Array_to_bytes_invariant
(** raised when a codec chain contains more than 1 array->bytes codec. *)

exception Invalid_transpose_order
(** raised when a codec chain contains a Transpose codec with an incorrect order. *)

exception Invalid_sharding_chunk_shape
(** raise when a codec chain contains a shardingindexed codec with an incorrect inner chunk shape. *)

exception Invalid_codec_ordering
(** raised when a codec chain has incorrect ordering of codecs. i.e if the
ordering is not [arraytoarray list -> 1 arraytobytes -> bytestobytes list]. *)

exception Invalid_zstd_level
(** raised when a codec chain contains a Zstd codec with an incorrect compression value.*)

(** The type of [array -> array] codecs. *)
type arraytoarray = [ `Transpose of int array ]

(** A type representing valid Gzip codec compression levels. *)
type compression_level = L0 | L1 | L2 | L3 | L4 | L5 | L6 | L7 | L8 | L9

(** A type representing [bytes -> bytes] codecs that produce
fixed sized encoded strings. *)
type fixed_bytestobytes = [ `Crc32c ]

(** A type representing [bytes -> bytes] codecs that produce
variable sized encoded strings. *)
type variable_bytestobytes = [ `Gzip of compression_level | `Zstd of int * bool ]

(** The type of [bytes -> bytes] codecs. *)
type bytestobytes = [ fixed_bytestobytes | variable_bytestobytes ]

(** A type representing the configured endianness of an array. *)
type endianness = LE | BE

(** A type representing the location of a shard's index array in
an encoded byte string. *)
type loc = Start | End

(** The type of [array -> bytes] codecs that produce
fixed sized encoded string. *)
type fixed_arraytobytes = [ `Bytes of endianness ]

(** The type of [array -> bytes] codecs that produce
variable sized encoded string. *)
type variable_array_tobytes = [ `ShardingIndexed of shard_config ]
and codec = [ arraytoarray | fixed_arraytobytes | `ShardingIndexed of shard_config | bytestobytes ]
and index_codec = [ arraytoarray | fixed_arraytobytes | fixed_bytestobytes ]

(** A type representing the Sharding indexed codec's configuration parameters. *)
and shard_config =
{chunk_shape : int array
;codecs : codec list
;index_codecs : index_codec list
;index_location : loc}

(** The type summarizing the decoded/encoded representation of a Zarr array
or chunk. *)
type 'a array_repr = {kind : 'a Ndarray.dtype; shape : int array}

(** A module containing functions to encode/decode an array chunk using a
predefined set of codecs. *)
module Chain : sig
(** A type representing a valid chain of codecs for
decoding/encoding a Zarr array chunk. *)
type t

(** [create s c] returns a type representing a chain of codecs defined by
chain [c] and chunk shape [s].
@raise Bytes_to_bytes_invariant
if [c] contains more than one bytes->bytes codec.
@raise Invalid_transpose_order
if [c] contains a transpose codec with invalid order array.
@raise Invalid_zstd_level
if [c] contains a Zstd codec whose compression level is invalid.
@raise Invalid_sharding_chunk_shape
if [c] contains a shardingindexed codec with an
incorrect inner chunk shape. *)
val create : int array -> codec list -> t

(** [encode t x] computes the encoded byte string representation of
array chunk [x]. *)
val encode : t -> 'a Ndarray.t -> string

(** [decode t repr x] decodes the byte string [x] using codec chain [t]
and decoded representation type [repr]. *)
val decode : t -> 'a array_repr -> string -> 'a Ndarray.t

(** [x = y] returns true if chain [x] is equal to chain [y],
and false otherwise. *)
val ( = ) : t -> t -> bool

(** [of_yojson x] returns a code chain of type {!t} from its json object
representation. *)
val of_yojson : int array -> Yojson.Safe.t -> (t, string) result

(** [to_yojson x] returns a json object representation of codec chain [x]. *)
val to_yojson : t -> Yojson.Safe.t
end

(** A functor for generating a Sharding Indexed codec that supports partial
(en/de)coding via IO operations. *)
module Make (Io : Types.IO) : sig
open Io

(** [is_just_sharding t] is [true] if the codec chain [t] contains only
the [sharding_indexed] codec. *)
val is_just_sharding : Chain.t -> bool

val partial_encode :
Chain.t ->
((int * int option) list -> string list Deferred.t) ->
(?append:bool -> (int * string) list -> unit Deferred.t) ->
int ->
'a array_repr ->
(int array * 'a) list ->
'a ->
unit Deferred.t

val partial_decode :
Chain.t ->
((int * int option) list -> string list Deferred.t) ->
int ->
'a array_repr ->
(int * int array) list ->
'a ->
(int * 'a) list Deferred.t
end
72 changes: 0 additions & 72 deletions zarr/src/codecs/array_to_array.ml

This file was deleted.

11 changes: 0 additions & 11 deletions zarr/src/codecs/array_to_array.mli

This file was deleted.

Loading

0 comments on commit 884eec4

Please sign in to comment.