From 04d12158ab8db5fb4159bc4ec1e3ec42ca702815 Mon Sep 17 00:00:00 2001 From: Thomas Gazagnaire Date: Fri, 3 Nov 2023 14:52:29 +0100 Subject: [PATCH 1/5] Support eio>=0.12 --- dune-project | 2 +- eio/tar_eio.ml | 16 ++++++++-------- eio/tar_eio.mli | 18 +++++++++--------- tar-eio.opam | 2 +- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/dune-project b/dune-project index 3e43e53..7446827 100644 --- a/dune-project +++ b/dune-project @@ -91,7 +91,7 @@ (tags ("org:xapi-project" "org:mirage")) (depends (ocaml (>= 4.08.0)) - (eio (and (>= 0.10.0) (< 0.12))) + (eio (>= 0.12)) (tar (= :version)) ) ) diff --git a/eio/tar_eio.ml b/eio/tar_eio.ml index b1d2c7f..18c1321 100644 --- a/eio/tar_eio.ml +++ b/eio/tar_eio.ml @@ -25,7 +25,7 @@ module Monad = struct end module Reader = struct - type in_channel = Flow.source + type in_channel = Flow.source_ty Resource.t type 'a t = 'a let really_read f b = Flow.read_exact f b let skip f (n: int) = @@ -43,7 +43,7 @@ end let really_read = Reader.really_read module Writer = struct - type out_channel = Flow.sink + type out_channel = Flow.sink_ty Resource.t type 'a t = 'a let really_write f b = Flow.write f [ b ] end @@ -66,7 +66,7 @@ module HR = Tar.HeaderReader(Monad)(Reader) module HW = Tar.HeaderWriter(Monad)(Writer) let get_next_header ?level ~global ic = - match HR.read ?level ~global (ic :> Flow.source) with + match HR.read ?level ~global (ic :> Eio.Flow.source_ty Eio.Flow.source) with | Error `Eof -> None | Ok hdrs -> Some hdrs @@ -95,8 +95,8 @@ let header_of_file ?level ?getpwuid ?getgrgid filepath : Tar.Header.t = Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name ?uname ?gname ~devmajor ~devminor (snd filepath) file_size -let write_block ?level ?global (header: Tar.Header.t) (body: #Flow.sink -> unit) sink = - HW.write ?level ?global header (sink :> Flow.sink); +let write_block ?level ?global (header: Tar.Header.t) body sink = + HW.write ?level ?global header (sink :> Eio.Flow.sink_ty Eio.Flow.sink); body sink; really_write sink (Tar.Header.zero_padding header) @@ -111,7 +111,7 @@ module Archive = struct should leave the fd positioned immediately after the datablock. Finally the function skips past the zero padding to the next header *) let with_next_file src ~(global: Tar.Header.Extended.t option) - (f: Eio.Flow.source -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) = + (f: _ -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) = match get_next_header ~global src with | Some (hdr, global) -> let result = f src global hdr in @@ -123,7 +123,7 @@ module Archive = struct (** List the contents of a tar *) let list ?level fd = let rec loop global acc = - match get_next_header ?level ~global (fd :> Flow.source) with + match get_next_header ?level ~global fd with | None -> List.rev acc | Some (hdr, global) -> Reader.skip fd (Int64.to_int hdr.Tar.Header.file_size); @@ -145,7 +145,7 @@ module Archive = struct in loop None () - let transform ?level f (ifd : #Flow.source) (ofd : #Flow.sink) = + let transform ?level f ifd ofd = let rec loop global () = match get_next_header ~global ifd with | None -> () diff --git a/eio/tar_eio.mli b/eio/tar_eio.mli index c604b7d..d967591 100644 --- a/eio/tar_eio.mli +++ b/eio/tar_eio.mli @@ -20,7 +20,7 @@ zero-filled blocks are discovered. Assumes stream is positioned at the possible start of a header block. @raise End_of_file if the stream unexpectedly fails. *) -val get_next_header : ?level:Tar.Header.compatibility -> global:Tar.Header.Extended.t option -> Eio.Flow.source -> +val get_next_header : ?level:Tar.Header.compatibility -> global:Tar.Header.Extended.t option -> 'a Eio.Flow.source -> (Tar.Header.t * Tar.Header.Extended.t option) option (** Return the header needed for a particular file on disk. [getpwuid] and [getgrgid] are optional @@ -30,7 +30,7 @@ val header_of_file : ?level:Tar.Header.compatibility -> ?getpwuid:(int64 -> string) -> ?getgrgid:(int64 -> string) -> - Eio.Fs.dir Eio.Path.t -> + Eio.Fs.dir_ty Eio.Path.t -> Tar.Header.t module Archive : sig @@ -39,23 +39,23 @@ module Archive : sig (** Read the next header, apply the function 'f' to the source and the header. The function should leave the source positioned immediately after the datablock. Finally the function skips past the zero padding to the next header. *) - val with_next_file : Eio.Flow.source -> global:Tar.Header.Extended.t option -> - (Eio.Flow.source -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) -> 'a option + val with_next_file : 'a Eio.Flow.source -> global:Tar.Header.Extended.t option -> + ('a Eio.Flow.source -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) -> 'a option (** List the contents of a tar to stdout. *) - val list : ?level:Tar.Header.compatibility -> #Eio.Flow.source -> Tar.Header.t list + val list : ?level:Tar.Header.compatibility -> 'a Eio.Flow.source -> Tar.Header.t list (** [extract dest] extract the contents of a tar. Apply [dest] on each source filename to change the destination filename. It only supports extracting regular files from the top-level of the archive. *) - val extract : (string -> Eio.Fs.dir Eio.Path.t) -> Eio.Flow.source -> unit + val extract : (string -> [> Eio.Fs.dir_ty] Eio.Path.t) -> 'a Eio.Flow.source -> unit (** [transform f src sink] applies [f] to the header of each file in the tar inputted in [src], and writes the resulting headers to [sink] preserving the content and structure of the archive. *) - val transform : ?level:Tar.Header.compatibility -> (Tar.Header.t -> Tar.Header.t) -> #Eio.Flow.source -> #Eio.Flow.sink -> unit + val transform : ?level:Tar.Header.compatibility -> (Tar.Header.t -> Tar.Header.t) -> 'a Eio.Flow.source -> 'a Eio.Flow.sink -> unit (** Create a tar in the sink from a list of file paths. It only supports regular files. @@ -63,7 +63,7 @@ module Archive : sig val create : ?getpwuid:(int64 -> string) -> ?getgrgid:(int64 -> string) -> - Eio.Fs.dir Eio.Path.t list -> - #Eio.Flow.sink -> + [> Eio.Fs.dir_ty ] Eio.Path.t list -> + 'a Eio.Flow.sink -> unit end diff --git a/tar-eio.opam b/tar-eio.opam index 697f742..55142dc 100644 --- a/tar-eio.opam +++ b/tar-eio.opam @@ -15,7 +15,7 @@ bug-reports: "https://github.com/mirage/ocaml-tar/issues" depends: [ "dune" {>= "2.9"} "ocaml" {>= "4.08.0"} - "eio" {>= "0.10.0" & < "0.12"} + "eio" {>= "0.12"} "tar" {= version} "odoc" {with-doc} ] From 24ba3bebbb53739b55c030a18d51b9274ea53c90 Mon Sep 17 00:00:00 2001 From: Thomas Gazagnaire Date: Sat, 4 Nov 2023 18:02:54 +0100 Subject: [PATCH 2/5] Add tar-eio.gz --- eio/dune | 7 +++++++ eio/tar_eio_gz.ml | 33 ++++++++++++++++++++++++++++++ eio/tar_eio_gz.mli | 51 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) create mode 100644 eio/tar_eio_gz.ml create mode 100644 eio/tar_eio_gz.mli diff --git a/eio/dune b/eio/dune index 0ec6d2e..35ac558 100644 --- a/eio/dune +++ b/eio/dune @@ -1,4 +1,11 @@ (library (name tar_eio) (public_name tar-eio) + (modules tar_eio) (libraries tar eio)) + +(library + (name tar_eio_gz) + (public_name tar-eio.gz) + (modules tar_eio_gz) + (libraries tar.gz tar_eio)) diff --git a/eio/tar_eio_gz.ml b/eio/tar_eio_gz.ml new file mode 100644 index 0000000..cf86f19 --- /dev/null +++ b/eio/tar_eio_gz.ml @@ -0,0 +1,33 @@ +open Eio + +module Monad = struct + type 'a t = 'a + let (>>=) a f = f a + let return = Fun.id +end + +module Reader = struct + type in_channel = Flow.source_ty Resource.t + type 'a t = 'a + let read = Flow.single_read + let really_read f b = Flow.read_exact f b + let skip f (n: int) = + let buffer_size = 32768 in + let buffer = Cstruct.create buffer_size in + let rec loop (n: int) = + if n <= 0 then () + else + let amount = min n buffer_size in + let block = Cstruct.sub buffer 0 amount in + really_read f block; + loop (n - amount) in + loop n +end + +module Writer = struct + type out_channel = Flow.sink_ty Resource.t + type 'a t = 'a + let really_write f b = Flow.write f [ b ] +end + +include Tar_gz.Make(Monad)(Writer)(Reader) diff --git a/eio/tar_eio_gz.mli b/eio/tar_eio_gz.mli new file mode 100644 index 0000000..dc46987 --- /dev/null +++ b/eio/tar_eio_gz.mli @@ -0,0 +1,51 @@ +open Eio + +(** Read tar.gz files with eio *) + +type in_channel + +val of_in_channel : internal:Cstruct.t -> Flow.source_ty Resource.t -> in_channel + +(** Returns the next header block or fails with {!Tar.Header.End_of_stream} + if two consecutive zero-filled blocks are discovered. Assumes stream is + positioned at the possible start of a header block. + + @raise Stdlib.End_of_file if the stream unexpectedly fails. *) +val get_next_header : ?level:Tar.Header.compatibility -> global:Tar.Header.Extended.t option + -> in_channel -> (Tar.Header.t * Tar.Header.Extended.t option) + +val really_read : in_channel -> Cstruct.t -> unit +(** [really_read fd buf] fills [buf] with data from [fd] or raises + {!Stdlib.End_of_file}. *) + +val skip : in_channel -> int -> unit + +type out_channel + +val of_out_channel : ?bits:int -> ?q:int -> level:int -> + mtime:int32 -> Gz.os -> Flow.sink_ty Resource.t -> out_channel + +val write_block : ?level:Tar.Header.compatibility -> ?global:Tar.Header.Extended.t -> + Tar.Header.t -> out_channel -> (unit -> string option) -> unit +(** [write_block hdr oc stream] writes [hdr], then {i deflate} the given + [stream], then zero-pads so the stream is positionned for the next + block. + + A simple usage to write a file: + {[ + let stream_of_fd fd = + let buf = Bytes.create 0x1000 in + fun () -> match Unix.read fd buf 0 (Bytes.length buf) with + | 0 -> None + | len -> Some (Bytes.sub_string buf 0 len) + | exception End_of_file -> None + + let add_file oc filename = + let fd = Unix.openfile filename Unix.[ O_RDONLY ] 0o644 in + let hdr = Tar.Header.make ... in + write_block hdr oc (stream_of_fd fd) ; + Unix.close fd + ]} *) + +val write_end : out_channel -> unit +(** [write_end oc] writes a stream terminator to [oc]. *) From 26a561e56f6b1204bcc5451533e208e2692f60f0 Mon Sep 17 00:00:00 2001 From: Thomas Gazagnaire Date: Sun, 5 Nov 2023 10:22:12 +0100 Subject: [PATCH 3/5] Add a fold function to tar_eio_gz --- eio/tar_eio_gz.ml | 37 +++++++++++++++++++++++++----- eio/tar_eio_gz.mli | 56 +++++++++++++++++++++++++++++++--------------- 2 files changed, 70 insertions(+), 23 deletions(-) diff --git a/eio/tar_eio_gz.ml b/eio/tar_eio_gz.ml index cf86f19..77e09dc 100644 --- a/eio/tar_eio_gz.ml +++ b/eio/tar_eio_gz.ml @@ -2,32 +2,59 @@ open Eio module Monad = struct type 'a t = 'a - let (>>=) a f = f a + + let ( >>= ) a f = f a let return = Fun.id end module Reader = struct type in_channel = Flow.source_ty Resource.t type 'a t = 'a + let read = Flow.single_read let really_read f b = Flow.read_exact f b - let skip f (n: int) = + + let skip f (n : int) = let buffer_size = 32768 in let buffer = Cstruct.create buffer_size in - let rec loop (n: int) = + let rec loop (n : int) = if n <= 0 then () else let amount = min n buffer_size in let block = Cstruct.sub buffer 0 amount in really_read f block; - loop (n - amount) in + loop (n - amount) + in loop n end module Writer = struct type out_channel = Flow.sink_ty Resource.t type 'a t = 'a + let really_write f b = Flow.write f [ b ] end -include Tar_gz.Make(Monad)(Writer)(Reader) +include Tar_gz.Make (Monad) (Writer) (Reader) + +type source = in_channel +type sink = out_channel + +let of_sink ?bits ?q ~level ~mtime os f = + of_out_channel ?bits ?q ~level ~mtime os (f :> Flow.sink_ty Resource.t) + +let of_source f = + of_in_channel ~internal:(Cstruct.create 65536) + (f :> Flow.source_ty Eio.Resource.t) + +let fold ?level f source init = + let rec aux global acc = + match get_next_header ?level ~global source with + | hdr, global -> + let acc = f hdr acc in + let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in + skip source to_skip; + aux global acc + | exception Tar.Header.End_of_stream -> acc + in + aux None init diff --git a/eio/tar_eio_gz.mli b/eio/tar_eio_gz.mli index dc46987..b5853d4 100644 --- a/eio/tar_eio_gz.mli +++ b/eio/tar_eio_gz.mli @@ -1,35 +1,55 @@ -open Eio - (** Read tar.gz files with eio *) -type in_channel +open Eio -val of_in_channel : internal:Cstruct.t -> Flow.source_ty Resource.t -> in_channel +type source -(** Returns the next header block or fails with {!Tar.Header.End_of_stream} - if two consecutive zero-filled blocks are discovered. Assumes stream is +val of_source : 'a Flow.source -> source + +val get_next_header : + ?level:Tar.Header.compatibility -> + global:Tar.Header.Extended.t option -> + source -> + Tar.Header.t * Tar.Header.Extended.t option +(** Returns the next header block or fails with {!Tar.Header.End_of_stream} if + two consecutive zero-filled blocks are discovered. Assumes stream is positioned at the possible start of a header block. @raise Stdlib.End_of_file if the stream unexpectedly fails. *) -val get_next_header : ?level:Tar.Header.compatibility -> global:Tar.Header.Extended.t option - -> in_channel -> (Tar.Header.t * Tar.Header.Extended.t option) -val really_read : in_channel -> Cstruct.t -> unit +val really_read : source -> Cstruct.t -> unit (** [really_read fd buf] fills [buf] with data from [fd] or raises {!Stdlib.End_of_file}. *) -val skip : in_channel -> int -> unit +val fold : + ?level:Tar.Header.compatibility -> + (Tar.Header.t -> 'a -> 'a) -> + source -> + 'a -> + 'a + +val skip : source -> int -> unit -type out_channel +type sink -val of_out_channel : ?bits:int -> ?q:int -> level:int -> - mtime:int32 -> Gz.os -> Flow.sink_ty Resource.t -> out_channel +val of_sink : + ?bits:int -> + ?q:int -> + level:int -> + mtime:int32 -> + Gz.os -> + 'a Flow.sink -> + sink -val write_block : ?level:Tar.Header.compatibility -> ?global:Tar.Header.Extended.t -> - Tar.Header.t -> out_channel -> (unit -> string option) -> unit +val write_block : + ?level:Tar.Header.compatibility -> + ?global:Tar.Header.Extended.t -> + Tar.Header.t -> + sink -> + (unit -> string option) -> + unit (** [write_block hdr oc stream] writes [hdr], then {i deflate} the given - [stream], then zero-pads so the stream is positionned for the next - block. + [stream], then zero-pads so the stream is positionned for the next block. A simple usage to write a file: {[ @@ -47,5 +67,5 @@ val write_block : ?level:Tar.Header.compatibility -> ?global:Tar.Header.Extended Unix.close fd ]} *) -val write_end : out_channel -> unit +val write_end : sink -> unit (** [write_end oc] writes a stream terminator to [oc]. *) From c3455a186bb89762dd72c695fb490b0ec6299048 Mon Sep 17 00:00:00 2001 From: Thomas Gazagnaire Date: Sun, 5 Nov 2023 10:31:44 +0100 Subject: [PATCH 4/5] Avoid an allocation on every skip Not sure it's thread safe but nobody is reading in that buffer anyway. Would be better do have Flow.skip? --- eio/tar_eio_gz.ml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/eio/tar_eio_gz.ml b/eio/tar_eio_gz.ml index 77e09dc..641e36d 100644 --- a/eio/tar_eio_gz.ml +++ b/eio/tar_eio_gz.ml @@ -13,15 +13,14 @@ module Reader = struct let read = Flow.single_read let really_read f b = Flow.read_exact f b + let buffer_null = Cstruct.create 65536 let skip f (n : int) = - let buffer_size = 32768 in - let buffer = Cstruct.create buffer_size in let rec loop (n : int) = if n <= 0 then () else - let amount = min n buffer_size in - let block = Cstruct.sub buffer 0 amount in + let amount = min n (Cstruct.length buffer_null) in + let block = Cstruct.sub buffer_null 0 amount in really_read f block; loop (n - amount) in From a7999fc339ccc58b74c805344124e00e15777766 Mon Sep 17 00:00:00 2001 From: Thomas Gazagnaire Date: Tue, 7 Nov 2023 23:14:18 +0100 Subject: [PATCH 5/5] Better implementation for fold --- eio/tar_eio_gz.ml | 45 +++++++++++++++++++++++++-------------------- eio/tar_eio_gz.mli | 5 ++++- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/eio/tar_eio_gz.ml b/eio/tar_eio_gz.ml index 641e36d..5e3aea5 100644 --- a/eio/tar_eio_gz.ml +++ b/eio/tar_eio_gz.ml @@ -8,23 +8,12 @@ module Monad = struct end module Reader = struct - type in_channel = Flow.source_ty Resource.t + type in_channel = Buf_read.t type 'a t = 'a - let read = Flow.single_read - let really_read f b = Flow.read_exact f b - let buffer_null = Cstruct.create 65536 - - let skip f (n : int) = - let rec loop (n : int) = - if n <= 0 then () - else - let amount = min n (Cstruct.length buffer_null) in - let block = Cstruct.sub buffer_null 0 amount in - really_read f block; - loop (n - amount) - in - loop n + let read f = Flow.single_read (Buf_read.as_flow f) + let really_read f b = Flow.read_exact (Buf_read.as_flow f) b + let skip = Buf_read.consume end module Writer = struct @@ -44,15 +33,31 @@ let of_sink ?bits ?q ~level ~mtime os f = let of_source f = of_in_channel ~internal:(Cstruct.create 65536) - (f :> Flow.source_ty Eio.Resource.t) + (Buf_read.of_flow ~max_size:max_int f) -let fold ?level f source init = +type filter = [ `Skip | `Header | `Header_and_file ] + +let fold ?level ?(filter = fun _ -> `Header) f source init = let rec aux global acc = match get_next_header ?level ~global source with | hdr, global -> - let acc = f hdr acc in - let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in - skip source to_skip; + let size = Int64.to_int hdr.file_size in + let padding = Tar.Header.compute_zero_padding_length hdr in + let acc = + match (filter hdr : filter) with + | `Skip -> + skip source (size + padding); + acc + | `Header -> + skip source (size + padding); + f hdr (Eio.Flow.string_source "") acc + | `Header_and_file -> + let buf = Cstruct.create size in + let src = Eio.Flow.cstruct_source [ buf ] in + really_read source buf; + skip source padding; + f hdr src acc + in aux global acc | exception Tar.Header.End_of_stream -> acc in diff --git a/eio/tar_eio_gz.mli b/eio/tar_eio_gz.mli index b5853d4..7270239 100644 --- a/eio/tar_eio_gz.mli +++ b/eio/tar_eio_gz.mli @@ -21,9 +21,12 @@ val really_read : source -> Cstruct.t -> unit (** [really_read fd buf] fills [buf] with data from [fd] or raises {!Stdlib.End_of_file}. *) +type filter = [ `Skip | `Header | `Header_and_file ] + val fold : ?level:Tar.Header.compatibility -> - (Tar.Header.t -> 'a -> 'a) -> + ?filter:(Tar.Header.t -> filter) -> + (Tar.Header.t -> Flow.source_ty Resource.t -> 'a -> 'a) -> source -> 'a -> 'a