Skip to content

Commit

Permalink
Angstrom_eio
Browse files Browse the repository at this point in the history
  • Loading branch information
SGrondin committed Jun 12, 2023
1 parent c273a94 commit cc4601d
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 98 deletions.
94 changes: 94 additions & 0 deletions src/angstrom_eio/angstrom_eio.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
(*----------------------------------------------------------------------------
Copyright (c) 2023 Inhabited Type LLC.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the author nor the names of his contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
----------------------------------------------------------------------------*)

open Angstrom.Buffered

let default_buffer_size = 4096

let default_pushback () = ()

let handle_parse_result state =
match state_to_unconsumed state with
| None -> assert false
| Some us -> us, state_to_result state

let finalize = function
| Partial feed -> feed `Eof
| state -> state

let parse ?(pushback = default_pushback) parser src =
let buf = Cstruct.create default_buffer_size in
let rec loop = function
| (Done _ as state)
| (Fail _ as state) ->
handle_parse_result state
| Partial feed as state -> (
match src#read_into buf with
| 0
| (exception End_of_file) ->
finalize state |> handle_parse_result
| len ->
let next = feed (`Bigstring (Bigstringaf.sub buf.buffer ~off:0 ~len)) in
pushback ();
loop next )
in
loop (parse parser)

let rec buffered_state_loop pushback state src (buf : Cstruct.t) =
match state with
| Partial k ->
let next =
match src#read_into buf with
| 0
| (exception End_of_file) ->
k `Eof
| len -> k (`Bigstring (Bigstringaf.sub buf.buffer ~off:0 ~len))
in
pushback ();
buffered_state_loop pushback next src buf
| state -> state

let with_buffered_parse_state ?(pushback = default_pushback) state src =
let buf = Cstruct.create default_buffer_size in
( match state with
| Partial _ -> buffered_state_loop pushback state src buf
| _ -> state )
|> handle_parse_result

let async_many e k = Angstrom.(skip_many (e <* commit >>| k) <?> "async_many")

let parse_many p write src =
let wait = ref (default_pushback ()) in
let k x = wait := write x in
let pushback () = !wait in
parse ~pushback (async_many p k) src
68 changes: 68 additions & 0 deletions src/angstrom_eio/angstrom_eio.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
(*----------------------------------------------------------------------------
Copyright (c) 2016 Inhabited Type LLC.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the author nor the names of his contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
----------------------------------------------------------------------------*)

open Angstrom

val parse
: ?pushback:(unit -> unit)
-> 'a t
-> #Eio.Flow.source
-> (Buffered.unconsumed * ('a, string) result)

val parse_many
: 'a t
-> ('a -> unit)
-> #Eio.Flow.source
-> (Buffered.unconsumed * (unit, string) result)

(** Useful for resuming a {!parse} that returns unconsumed data. Construct a
[Buffered.state] by using [Buffered.parse] and provide it into this
function. This is essentially what {!parse_many} does, so consider using
that if you don't require fine-grained control over how many times you want
the parser to succeed.
Usage example:
{[
match parse parser flow with
| { buf; off; len }, Ok a ->
let state = Buffered.parse parser in
let state = Buffered.feed state (`Bigstring (Bigstringaf.sub ~off ~len buf)) in
with_buffered_parse_state state flow
| Error err -> failwith err
]} *)
val with_buffered_parse_state
: ?pushback:(unit -> unit)
-> 'a Buffered.state
-> #Eio.Flow.source
-> (Buffered.unconsumed * ('a, string) result)
10 changes: 10 additions & 0 deletions src/angstrom_eio/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
(library
(name angstrom_eio)
(libraries
angstrom
eio_main
)
(preprocess (pps
ppx_jane
))
)
102 changes: 8 additions & 94 deletions src/parsing/basic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -150,103 +150,17 @@ let exec_parser ~on_ok ?on_error parser ~path ~language_name raw =
| None -> default_syntax_error_handler ~path ~language_name ~msg
| Some handler -> handler ~msg )

type 'a status =
| Incomplete
| Success of 'a
| Partial of {
partial: 'a;
unparsed: string;
}
| Failed of {
message: string;
unparsed: string;
}
| Not_enough_input

class virtual ['a] parser_sink =
object
inherit Eio.Flow.sink

method virtual data : 'a status
end

let parser_sink (type a) (parser : a Angstrom.t) : a parser_sink =
let data = ref Incomplete in
let to_status = function
| Angstrom.Buffered.Done ({ len = 0; _ }, x) -> Success x
| Angstrom.Buffered.Done ({ buf; off; len }, x) ->
Partial { partial = x; unparsed = Bigstringaf.substring buf ~off ~len }
| Angstrom.Buffered.Fail ({ buf; off; len }, marks, s) ->
let message = sprintf "%s (%s)" s (String.concat ~sep:", " marks) in
Failed { message; unparsed = Bigstringaf.substring buf ~off ~len }
| Angstrom.Buffered.Partial _feed -> Not_enough_input
in
let finalize state =
let final =
match state with
| (Angstrom.Buffered.Done _ as acc)
|(Angstrom.Buffered.Fail _ as acc) ->
acc
| Angstrom.Buffered.Partial feed -> feed `Eof
in
data := to_status final
in
object
inherit Eio.Flow.sink

method copy src =
let buf = Cstruct.create Utils.Io.parser_buffer_size in
let rec loop = function
| (Angstrom.Buffered.Done _ as state)
|(Angstrom.Buffered.Fail _ as state) ->
finalize state
| Angstrom.Buffered.Partial feed as state -> (
try
let got = src#read_into buf in
loop (feed (`Bigstring (Bigstringaf.sub buf.buffer ~off:0 ~len:got)))
with
| End_of_file -> finalize state )
in
loop (Angstrom.Buffered.parse parser)

method! write bufs =
List.fold_until bufs ~finish:Fn.id ~init:(Angstrom.Buffered.parse parser)
~f:(fun acc { buffer; off; len } ->
match acc with
| Angstrom.Buffered.Done _
|Angstrom.Buffered.Fail _ ->
Stop acc
| Angstrom.Buffered.Partial feed ->
Continue (feed (`Bigstring (Bigstringaf.sub buffer ~off ~len))) )
|> finalize

method data = !data
end

let to_cstructs flow =
let q = Queue.create () in
let rec loop () =
let buf = Cstruct.create Utils.Io.parser_buffer_size in
Queue.enqueue q buf;
match Eio.Flow.read_exact flow buf with
| () -> loop ()
| exception End_of_file -> ()
in
loop ();
Queue.to_list q

let exec_parser_eio ~on_ok ?on_error parser ~path ~language_name source =
let sink = parser_sink parser in
Eio.Flow.copy source sink;
(* Eio.Flow.write sink (to_cstructs source); *)
let get_handler () =
match on_error with
| None -> default_error_handler ~path ~language_name
| Some x -> x
in
match sink#data with
| Success x -> on_ok x
| Incomplete -> (get_handler ()) ~msg:"Incomplete. Please report this bug." ()
| Not_enough_input -> (get_handler ()) ~msg:"Not enough input." ()
| Partial { unparsed; _ } -> (get_handler ()) ~unparsed ~msg:"" ()
| Failed { message = msg; unparsed } -> (get_handler ()) ~unparsed ~msg ()
match Angstrom_eio.parse parser source with
| { len = 0; _ }, Ok x -> on_ok x
| { buf; len; off }, Ok _ ->
(get_handler ())
~unparsed:(Bigstringaf.substring buf ~off ~len)
~msg:"Not all input could be processed. There must be invalid syntax." ()
| { buf; len; off }, Error msg ->
(get_handler ()) ~unparsed:(Bigstringaf.substring buf ~off ~len) ~msg ()
1 change: 1 addition & 0 deletions src/parsing/dune
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
SZXX

utils
angstrom_eio
flow_parser
)
(preprocess (pps
Expand Down
4 changes: 0 additions & 4 deletions src/utils/io.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@ open Eio.Std

let flags = `Or_truncate 0o644

let parser_chunk_size = 4096

let parser_buffer_size = parser_chunk_size * 4

let num_threads = 4

let pool = Domainslib.Task.setup_pool ~num_domains:num_threads ()
Expand Down

0 comments on commit cc4601d

Please sign in to comment.