From 5d443ebcfa028c9bcb8a46bcbbf177bc2d3cf997 Mon Sep 17 00:00:00 2001 From: Bikal Lem Date: Sat, 21 Aug 2021 10:20:22 +0100 Subject: [PATCH] add parts function --- .ocamlformat | 3 +- CHANGES.md | 4 ++ dune-project | 2 +- http-multipart-formdata.opam | 2 +- http-multipart-formdata.opam.locked | 87 ++++++++++++++++++++++++++++ lib/http_multipart_formdata.ml | 59 ++++++++++++++++--- lib/http_multipart_formdata.mli | 41 ++++++++++++-- test/test.ml | 88 +++++++++++++++++++++++++++++ 8 files changed, 270 insertions(+), 16 deletions(-) create mode 100644 http-multipart-formdata.opam.locked diff --git a/.ocamlformat b/.ocamlformat index 3017ad7..9091dc3 100644 --- a/.ocamlformat +++ b/.ocamlformat @@ -1,7 +1,8 @@ profile = ocamlformat +exp-grouping=preserve module-item-spacing=compact sequence-blank-line=preserve-one single-case=compact break-cases = fit +break-infix=fit-or-vertical parse-docstrings = true - diff --git a/CHANGES.md b/CHANGES.md index 2a60eec..f222533 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,7 @@ +## v3.1.0 2021-08-21 + +- Add `parts` function to allow non-streaming parsing of multiparts. + ## v3.0.1 2021-07-24 - Improve documentation, fix some typos in code. Rename `read_result` to `read` and `read_part` to `read diff --git a/dune-project b/dune-project index 6d12619..ecea1c9 100644 --- a/dune-project +++ b/dune-project @@ -4,7 +4,7 @@ (generate_opam_files true) -(version 3.0.1) +(version 3.1.0) (source (github lemaetech/http-mutlipart-formdata)) diff --git a/http-multipart-formdata.opam b/http-multipart-formdata.opam index c332d55..c072d63 100644 --- a/http-multipart-formdata.opam +++ b/http-multipart-formdata.opam @@ -1,6 +1,6 @@ # This file is generated by dune, edit dune-project instead opam-version: "2.0" -version: "3.0.1" +version: "3.1.0" synopsis: "Http multipart/formdata parser" description: "OCaml implementation of RFC 7578 (Returning Values from Forms: multipart/form-data)- https://tools.ietf.org/html/rfc7578" diff --git a/http-multipart-formdata.opam.locked b/http-multipart-formdata.opam.locked new file mode 100644 index 0000000..73f9586 --- /dev/null +++ b/http-multipart-formdata.opam.locked @@ -0,0 +1,87 @@ +opam-version: "2.0" +name: "http-multipart-formdata" +version: "3.1.0" +synopsis: "Http multipart/formdata parser" +description: + "OCaml implementation of RFC 7578 (Returning Values from Forms: multipart/form-data)- https://tools.ietf.org/html/rfc7578" +maintainer: "Bikal Lem" +authors: "Bikal Lem, " +license: "MPL-2.0" +tags: ["http" "multipart" "formadata" "form" "web"] +homepage: "https://github.com/lemaetech/http-mutlipart-formdata" +bug-reports: "https://github.com/lemaetech/http-mutlipart-formdata/issues" +depends: [ + "angstrom" {= "0.15.0"} + "astring" {= "0.8.5" & with-doc} + "base" {= "v0.14.1" & with-test} + "base-bigarray" {= "base"} + "base-threads" {= "base"} + "base-unix" {= "base"} + "bigarray-compat" {= "1.0.0"} + "bigstringaf" {= "0.8.0"} + "cmdliner" {= "1.0.4"} + "conf-pkg-config" {= "2"} + "cppo" {= "1.6.7" & with-test} + "csexp" {= "1.5.1" & with-test} + "cstruct" {= "6.0.1"} + "dune" {= "2.9.0"} + "dune-configurator" {= "2.9.0" & with-test} + "fmt" {= "0.8.9"} + "fpath" {= "0.7.3" & with-doc} + "jane-street-headers" {= "v0.14.0" & with-test} + "jst-config" {= "v0.14.0" & with-test} + "logs" {= "0.7.0" & with-doc} + "ocaml" {= "4.12.0"} + "ocaml-base-compiler" {= "4.12.0"} + "ocaml-compiler-libs" {= "v0.12.3" & with-test} + "ocaml-config" {= "2"} + "ocaml-migrate-parsetree" {= "2.2.0" & with-test} + "ocaml-options-vanilla" {= "1"} + "ocaml-syntax-shims" {= "1.0.0"} + "ocamlbuild" {= "0.14.0"} + "ocamlfind" {= "1.9.1"} + "octavius" {= "1.2.2" & with-test} + "odoc" {= "dev" & with-doc} + "odoc-parser" {= "0.9.0" & with-doc} + "ppx_assert" {= "v0.14.0" & with-test} + "ppx_base" {= "v0.14.0" & with-test} + "ppx_cold" {= "v0.14.0" & with-test} + "ppx_compare" {= "v0.14.0" & with-test} + "ppx_derivers" {= "1.2.1" & with-test} + "ppx_deriving" {= "5.2.1" & with-test} + "ppx_enumerate" {= "v0.14.0" & with-test} + "ppx_expect" {= "v0.14.1" & with-test} + "ppx_hash" {= "v0.14.0" & with-test} + "ppx_here" {= "v0.14.0" & with-test} + "ppx_inline_test" {= "v0.14.1" & with-test} + "ppx_js_style" {= "v0.14.1" & with-test} + "ppx_optcomp" {= "v0.14.3" & with-test} + "ppx_sexp_conv" {= "v0.14.3" & with-test} + "ppxlib" {= "0.22.2" & with-test} + "re" {= "1.9.0" & with-test} + "result" {= "1.5"} + "seq" {= "base"} + "sexplib0" {= "v0.14.0" & with-test} + "stdio" {= "v0.14.0" & with-test} + "stdlib-shims" {= "0.3.0"} + "time_now" {= "v0.14.0" & with-test} + "topkg" {= "1.0.3"} + "tyxml" {= "4.5.0" & with-doc} + "uchar" {= "0.0.2" & with-doc} + "uutf" {= "1.0.2" & with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/lemaetech/http-mutlipart-formdata.git" \ No newline at end of file diff --git a/lib/http_multipart_formdata.ml b/lib/http_multipart_formdata.ml index dd7acf8..dab7b8e 100644 --- a/lib/http_multipart_formdata.ml +++ b/lib/http_multipart_formdata.ml @@ -50,6 +50,10 @@ and part_header = ; filename: string option ; parameters: string Map.t } +and field_name = string + +and part_body = string + type part_body_header = | Content_type of {ty: string; subtype: string; parameters: string Map.t} | Content_disposition of string Map.t @@ -162,7 +166,8 @@ let boundary content_type = let* params = skip_many ws *> (string_ci "multipart/form-data" "Not multipart formdata header") - *> skip_many ws *> many param + *> skip_many ws + *> many param in match List.assoc_opt "boundary" params with | Some boundary -> return (Boundary boundary) @@ -173,7 +178,9 @@ let boundary content_type = let content_disposition = let+ params = string_ci "Content-Disposition:" - *> skip_many ws *> string_ci "form-data" *> many param + *> skip_many ws + *> string_ci "form-data" + *> many param in let params = List.to_seq params |> Map.of_seq in Content_disposition params @@ -186,7 +193,8 @@ let preamble dash_boundary = many (let* dash_boundary' = peek_string len in if String.equal dash_boundary dash_boundary' then fail "" else any_char ) - *> advance len *> commit + *> advance len + *> commit let crlf = string_ci "\r\n" "[crlf]" @@ -294,7 +302,7 @@ let of_bigarray = Cstruct.of_bigarray let rec read (reader : reader) = match reader.parser_state with - | Buffered.Partial k -> ( + | Buffered.Partial k -> begin match reader.input with | `Incremental -> let continue (input : [`Cstruct of Cstruct.t | `Eof]) = @@ -310,11 +318,13 @@ let rec read (reader : reader) = `Awaiting_input continue | `Cstruct i -> let input' = - if Cstruct.len i = 0 then `Eof else `Bigstring (Cstruct.to_bigarray i) + if Cstruct.length i = 0 then `Eof + else `Bigstring (Cstruct.to_bigarray i) in reader.parser_state <- k input' ; - read reader ) - | Buffered.Done (buf, x) -> ( + read reader + end + | Buffered.Done (buf, x) -> begin match x with | `End -> reader.unconsumed <- of_bigarray ~off:buf.off ~len:buf.len buf.buf ; @@ -328,13 +338,46 @@ let rec read (reader : reader) = x | `Incremental -> reader.unconsumed <- of_bigarray ~off:buf.off ~len:buf.len buf.buf ; - x ) ) + x ) + end | Buffered.Fail (buf, marks, err) -> reader.unconsumed <- of_bigarray ~off:buf.off ~len:buf.len buf.buf ; `Error (String.concat " > " marks ^ ": " ^ err) let unconsumed reader = reader.unconsumed +(* Non streaming *) + +let parts boundary body = + let rec read_parts reader parts = + read reader + |> function + | `End -> + Queue.to_seq parts + |> List.of_seq + |> List.map (fun (header, body) -> + let field_name = name header in + (field_name, (header, body)) ) + |> Result.ok + | `Header header -> + let body = Cstruct.(read_body reader empty |> to_string) in + Queue.push (header, body) parts ; + read_parts reader parts + | `Error e -> Error e + | _ -> assert false + and read_body reader body = + read reader + |> function + | `Body_end -> body + | `Body buf -> read_body reader (Cstruct.append body buf) + | `Error e -> failwith e + | _ -> assert false + in + let reader = + reader ~read_buffer_size:10 boundary (`Cstruct (Cstruct.of_string body)) + in + read_parts reader (Queue.create ()) + (* Pretty Printers *) let pp_boundary fmt (Boundary boundary) = Fmt.string fmt boundary diff --git a/lib/http_multipart_formdata.mli b/lib/http_multipart_formdata.mli index 40c64ca..14eca06 100644 --- a/lib/http_multipart_formdata.mli +++ b/lib/http_multipart_formdata.mli @@ -18,7 +18,7 @@ The parser implements HTTP [multipart/form-data] standard as defined in {{:https://tools.ietf.org/html/rfc7578} RFC 7578}. *) -(** {2 Types} *) +(** {1 Types} *) (** [reader] represents a HTTP multipart formdata reader. *) type reader @@ -46,7 +46,13 @@ and part_header (** Represents the multipart boundary value. *) and boundary -(** {2 Mulipart Boundary parser} *) +(** A form field name *) +and field_name = string + +(** A Multipart body *) +and part_body = string + +(** {1 Mulipart Boundary parser} *) val boundary : string -> (boundary, string) result (** [boundary content_type] parses [content_type] to extract {!type:boundary} @@ -60,7 +66,11 @@ val boundary : string -> (boundary, string) result Http_multipart_formdata.boundary content_type ]} *) -(** {2 Multipart Reader} *) +(** {1 Streaming Multipart} + + API to stream multipart parts. Use these functions when you have to handle + HTTP form submissions which has large file uploads and at the same time be + memory efficient. *) val reader : ?read_buffer_size:int -> boundary -> input -> reader (** [reader ?read_buffer_size boundary input] creates reader. The default value @@ -73,7 +83,28 @@ val unconsumed : reader -> Cstruct.t (** [unconsumed reader] returns any leftover data still remaining after {!type:reader} returns [`End]. *) -(** {2 Part header} *) +(** {1 Non-Streaming Multipart} + + Use these functions if the HTTP form submission is of a relatively small + size. *) + +val parts : + boundary + -> string + -> ((field_name * (part_header * part_body)) list, string) result +(** [parts boundary http_body] returns a list of HTTP multipart parts parsed in + [http_body]. + + The returned parts list is keyed to a form field name so that one can do: + + {[ + let parts_kv = parts boundary http_body in + match List.assoc_opt "field1" parts_vk with + | Some v -> ... + | None -> .. + ]} *) + +(** {1 Part header} *) val name : part_header -> string (** [name t] returns the form field name. *) @@ -87,7 +118,7 @@ val filename : part_header -> string option val find : string -> part_header -> string option (** [find name t] returns the multipart parameter value associated with [name]. *) -(** {2 Pretty Printers} *) +(** {3 Pretty Printers} *) val pp_part_header : Format.formatter -> part_header -> unit val pp_read_result : Format.formatter -> read -> unit diff --git a/test/test.ml b/test/test.ml index 8e0472d..522d0b6 100644 --- a/test/test.ml +++ b/test/test.ml @@ -114,3 +114,91 @@ asdfasdfasdfasdfasdfasdf|} filename: binary Body: 4, a\207\137b Body_end |}] + +type parts_result = + ( (string * (Http_multipart_formdata.part_header * string)) list + , string ) + result +[@@deriving show] + +let%expect_test "parse_parts" = + let body = + String.concat "\r\n" + [ {||} + ; {| this is a preamble text. |} + ; {|-----------------------------735323031399963166993862150|} + ; {|Content-Disposition: form-data; name="text1"|} + ; {||} + ; {|text default|} + ; {|-----------------------------735323031399963166993862150|} + ; {|Content-Disposition: form-data; name="text2"|} + ; {||} + ; {|aωb|} + ; {|-----------------------------735323031399963166993862150|} + ; {|Content-Disposition: form-data; name="file1"; filename="a.txt"|} + ; {|Content-Type: text/plain|} + ; {||} + ; {|Content of a.txt.|} + ; {||} + ; {|-----------------------------735323031399963166993862150|} + ; {|Content-Disposition: form-data; name="file2"; filename="a.html"|} + ; {|Content-Type: text/html|} + ; {||} + ; {|Content of a.html.
thiasdasdf asdfiasdf asdf asdf as df asdf asdf as df asdf asd fa sdf asd fas df asdf as df asd fas df asdf as df asdfas df asd fa sdf as dfa sdf asdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfsadfsadfasdfasdfasdfasdfasdfsadfasdfasdfasdfasdfasdfsadfasdfasdfasdfasdfasdfasdfasdfasdfasdf +asdfasdfasdfasdfasdfasdf|} + ; {||} + ; {|-----------------------------735323031399963166993862150|} + ; {|Content-Disposition: form-data; name="file3"; filename="binary"|} + ; {|Content-Type: application/octet-stream|} + ; {||} + ; {|aωb|} + ; {||} + ; {|-----------------------------735323031399963166993862150|} + ; {|Content-Disposition: form-data; name="file3"; filename="binary"; param1=value1; param2=value2|} + ; {|Content-Type: application/octet-stream|} + ; {||} + ; {|aωb|} + ; {|-----------------------------735323031399963166993862150--|} ] + in + let boundary = + Http_multipart_formdata.boundary + "multipart/form-data; \ + boundary=---------------------------735323031399963166993862150" + |> Result.get_ok + in + let parts = Http_multipart_formdata.parts boundary body in + pp_parts_result Format.std_formatter parts ; + [%expect + {| + (Ok [("text1", + (name: text1; + parameters: ; + content_type: text/plain; + filename: , "text default")); + ("text2", + (name: text2; + parameters: ; + content_type: text/plain; + filename: , "a\207\137b")); + ("file1", + (name: file1; + parameters: ; + content_type: text/plain; + filename: a.txt, "Content of a.txt.\r\n")); + ("file2", + (name: file2; + parameters: ; + content_type: text/html; + filename: a.html, + "Content of a.html.
thiasdasdf asdfiasdf asdf asdf as df asdf asdf as df asdf asd fa sdf asd fas df asdf as df asd fas df asdf as df asdfas df asd fa sdf as dfa sdf asdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfsadfsadfasdfasdfasdfasdfasdfsadfasdfasdfasdfasdfasdfsadfasdfasdfasdfasdfasdfasdfasdfasdfasdf\nasdfasdfasdfasdfasdfasdf\r\n")); + ("file3", + (name: file3; + parameters: ; + content_type: application/octet-stream; + filename: binary, "a\207\137b\r\n")); + ("file3", + (name: file3; + parameters: (param1, value1); (param2, value2); + content_type: application/octet-stream; + filename: binary, "a\207\137b")) + ]) |}]