Skip to content

Commit

Permalink
Use official Pug parser as fallback
Browse files Browse the repository at this point in the history
  • Loading branch information
SGrondin committed Sep 17, 2022
1 parent 788dbaf commit f893f71
Show file tree
Hide file tree
Showing 8 changed files with 181 additions and 120 deletions.
86 changes: 53 additions & 33 deletions src/cli/strings.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ open! Core
open Lwt.Infix
open Lwt.Syntax

let version = "2.1.0"
let version = "2.2.0"

let header = sprintf "/* Generated by okTurtles/strings v%s */\n\n" version

Expand Down Expand Up @@ -35,7 +35,7 @@ type counts = {
type common_options = {
targets: string list;
template_script: Vue.template_script;
fast_pug: bool;
slow_pug: bool;
}

type action =
Expand All @@ -61,7 +61,7 @@ let process_file ~root table count filename template_script ~f:get_collector : u
Queue.iter collector.strings ~f:handler;
Vue.collect_from_possible_scripts collector template_script ~on_string:handler)

let rec traverse ~root counts strings ({ template_script; fast_pug; _ } as options) directory =
let rec traverse ~root counts strings ({ template_script; slow_pug; _ } as options) directory =
let* entries =
Lwt_pool.use pool (fun () -> Lwt_unix.files_of_directory directory |> Lwt_stream.to_list)
in
Expand All @@ -88,25 +88,35 @@ let rec traverse ~root counts strings ({ template_script; fast_pug; _ } as optio
| { st_kind = S_REG; _ }, (lazy ".vue"), _ ->
process_file ~root strings counts.vue path template_script ~f:(fun ic ->
let collector = Utils.Collector.create ~path in
let* languages = Vue.parse ~path ~fast_pug ic in
let* languages = Vue.parse ~path ~slow_pug ic in
let+ () = Vue.collect_from_languages collector languages in
collector)
| { st_kind = S_REG; _ }, (lazy ".pug"), _ ->
process_file ~root strings counts.pug path template_script ~f:(fun ic ->
let collector = Utils.Collector.create ~path in
let+ parsed =
Parsing.Basic.exec_parser_lwt Parsing.Pug.parser ~path ~language_name:"Pug" ic
let* source = Lwt_io.read ic in
let slow_parse () = Quickjs.extract_to_collector collector Pug source in
let+ () =
match slow_pug with
| true -> slow_parse ()
| false ->
let on_ok parsed =
Parsing.Pug.collect collector parsed;
Lwt.return_unit
in
let on_error ~msg:_ = slow_parse () in
Parsing.Basic.exec_parser ~on_ok ~on_error Parsing.Pug.parser ~path ~language_name:"Pug"
source
in
Parsing.Pug.collect collector parsed;
collector)
| { st_kind = S_REG; _ }, _, _ when String.is_suffix filename ~suffix:".html" ->
process_file ~root strings counts.html path template_script ~f:(fun ic ->
let collector = Utils.Collector.create ~path in
let+ parsed =
Parsing.Basic.exec_parser_lwt Parsing.Html.parser ~path ~language_name:"HTML" ic
let on_ok parsed =
Parsing.Html.collect collector parsed;
Lwt.return collector
in
Parsing.Html.collect collector parsed;
collector)
Parsing.Basic.exec_parser_lwt ~on_ok Parsing.Html.parser ~path ~language_name:"HTML" ic)
| { st_kind = S_DIR; _ }, _, _ -> traverse ~root counts strings options path
| _ -> Lwt.return_unit))
entries
Expand Down Expand Up @@ -224,33 +234,39 @@ let handle_system_failure = function
exit 1
| exn -> raise exn

let main ({ targets; template_script; fast_pug } as options) = function
let main ({ targets; template_script; slow_pug } as options) = function
| Debug lang ->
Lwt_list.iter_s
(fun path ->
let* () = Lwt_io.printlf "\n>>> Debugging [%s]" path in
Lwt_io.with_file ~flags:read_flags ~mode:Input path (fun ic ->
match lang, String.slice path (-4) 0 with
| _, ".vue" ->
let* languages = Vue.parse ~path ~fast_pug ic in
let* languages = Vue.parse ~path ~slow_pug ic in
Vue.debug_template ~path languages template_script lang
| Pug, ".pug" when fast_pug ->
let* parsed =
Parsing.Basic.exec_parser_lwt Parsing.Pug.parser ~path ~language_name:"Pug" ic
in
Vue.debug_template ~path [ Pug_native { parsed; length = None } ] template_script lang
| Pug, ".pug" ->
| Pug, ".pug" -> (
let* source = Lwt_io.read ic in
let collector = Utils.Collector.create ~path in
let* () = Quickjs.extract_to_collector collector Pug source in
Vue.debug_template ~path
[ Pug { collector; length = String.length source } ]
template_script lang
let slow_parse () =
let collector = Utils.Collector.create ~path in
let* () = Quickjs.extract_to_collector collector Pug source in
Vue.debug_template ~path
[ Pug { collector; length = String.length source } ]
template_script lang
in
match slow_pug with
| true -> slow_parse ()
| false ->
let on_ok parsed =
Vue.debug_template ~path [ Pug_native { parsed; length = None } ] template_script lang
in
let on_error ~msg:_ = slow_parse () in
Parsing.Basic.exec_parser ~on_ok ~on_error Parsing.Pug.parser ~path ~language_name:"Pug"
source)
| Html, _ when String.is_suffix path ~suffix:".html" ->
let* parsed =
Parsing.Basic.exec_parser_lwt Parsing.Html.parser ~path ~language_name:"Pug" ic
let on_ok parsed =
Vue.debug_template ~path [ Html { parsed; length = None } ] template_script lang
in
Vue.debug_template ~path [ Html { parsed; length = None } ] template_script lang
Parsing.Basic.exec_parser_lwt ~on_ok Parsing.Html.parser ~path ~language_name:"Pug" ic
| _ -> Lwt_io.printlf "Nothing to do for file [%s]" path))
targets
| Run ->
Expand Down Expand Up @@ -320,12 +336,16 @@ let () =
let open Command.Let_syntax in
let common =
let%map_open targets = Param.("path" %: string |> sequence |> anon)
and use_ts = flag "--ts" ~full_flag_required:() no_arg ~doc:"Interpret Vue templates as TypeScript"
and fast_pug =
flag "--fast-pug" ~aliases:[ "--fp" ] ~full_flag_required:() no_arg
~doc:"Use the native Pug parser. Much faster but not every Pug feature is supported."
and use_ts =
flag "--ts" ~full_flag_required:() no_arg
~doc:"Interpret HTML/Pug templates as containing TypeScript"
and slow_pug =
flag "--slow-pug" ~aliases:[ "--sp" ] ~full_flag_required:() no_arg
~doc:
"Use the official Pug parser. Much slower, especially on large files. Use this option if any \
translation seems to be missing from a Pug file, and report the bug if this option fixes it."
in
{ targets; template_script = (if use_ts then TS else JS); fast_pug }
{ targets; template_script = (if use_ts then TS else JS); slow_pug }
in
let action =
let open Param in
Expand All @@ -346,5 +366,5 @@ let () =
>>| (fun (common, action) () ->
let program () = main common action in
Lwt_main.run (Lwt.catch program handle_system_failure))
|> Command.basic ~summary:"Extract i18n strings - https://github.com/okTurtles/strings"
|> basic ~summary:"Extract i18n strings - https://github.com/okTurtles/strings"
|> Command_unix.run ~version
42 changes: 27 additions & 15 deletions src/cli/vue.ml
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,26 @@ module Language = struct
length: int;
}
| Css of int
| Failed of string

let of_source ~path ~fast_pug : Source.t -> t Lwt.t = function
let of_source ~path ~slow_pug : Source.t -> t Lwt.t = function
| Template (Template.HTML source) ->
let parsed = Parsing.Basic.exec_parser Parsing.Html.parser ~path ~language_name:"HTML" source in
Html { parsed; length = Some (String.length source) } |> Lwt.return
| Template (Template.PUG source) when fast_pug ->
let parsed = Basic.exec_parser Pug.parser ~path ~language_name:"Pug" source in
Pug_native { parsed; length = Some (String.length source) } |> Lwt.return
| Template (Template.PUG source) ->
let collector = Utils.Collector.create ~path in
let+ () = Quickjs.extract_to_collector collector Pug source in
Pug { collector; length = String.length source }
let on_ok parsed = Html { parsed; length = Some (String.length source) } in
let on_error ~msg = Failed msg in
Parsing.Basic.exec_parser ~on_ok ~on_error Parsing.Html.parser ~path ~language_name:"HTML" source
|> Lwt.return
| Template (Template.PUG source) -> (
let slow_parse () =
let collector = Utils.Collector.create ~path in
let+ () = Quickjs.extract_to_collector collector Pug source in
Pug { collector; length = String.length source }
in
match slow_pug with
| true -> slow_parse ()
| false ->
let on_ok parsed = Pug_native { parsed; length = Some (String.length source) } |> Lwt.return in
let on_error ~msg:_ = slow_parse () in
Basic.exec_parser ~on_ok ~on_error Pug.parser ~path ~language_name:"Pug" source)
| Script (Script.JS s) -> Js s |> Lwt.return
| Script (Script.TS s) -> Ts s |> Lwt.return
| Style (Style.CSS s) -> Css (String.length s) |> Lwt.return
Expand Down Expand Up @@ -82,7 +90,10 @@ let collect_from_languages collector languages =
Js.extract_to_collector collector source;
Lwt.return_unit
| Ts source -> Quickjs.extract_to_collector collector Typescript source
| Css _ -> Lwt.return_unit)
| Css _ -> Lwt.return_unit
| Failed msg ->
Queue.enqueue collector.file_errors msg;
Lwt.return_unit)
languages

let debug_template ~path languages template_script target =
Expand Down Expand Up @@ -119,10 +130,11 @@ let debug_template ~path languages template_script target =
| Html { length = None; _ }, Pug -> Lwt_io.printl "<HTML code>"
| Pug_native { length = Some len; _ }, Html -> Lwt_io.printlf "<Pug code - %d bytes>" len
| Pug_native { length = None; _ }, Html -> Lwt_io.printl "<Pug code>"
| Pug { length; _ }, Html -> Lwt_io.printlf "<Pug code - %d bytes>" length)
| Pug { length; _ }, Html -> Lwt_io.printlf "<Pug code - %d bytes>" length
| Failed msg, _ -> Lwt_io.printlf "❌ Parsing error in path: %s" msg)
languages

let parse ~path ~fast_pug ic =
let parse ~path ~slow_pug ic =
let buf = Buffer.create 256 in
let parser =
let open Angstrom in
Expand All @@ -137,5 +149,5 @@ let parse ~path ~fast_pug ic =
in
mlws *> sep_by mlws languages <* mlws
in
Basic.exec_parser_lwt parser ~path ~language_name:"Vue" ic
>>= Lwt_list.map_p (Language.of_source ~path ~fast_pug)
let on_ok ll = Lwt_list.map_p (Language.of_source ~path ~slow_pug) ll in
Basic.exec_parser_lwt ~on_ok parser ~path ~language_name:"Vue" ic
37 changes: 21 additions & 16 deletions src/parsing/basic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ let is_identifier = function
| 'a' .. 'z'
|'A' .. 'Z'
|'0' .. '9'
|'_' ->
|'_'
|'-' ->
true
| _ -> false

Expand Down Expand Up @@ -108,32 +109,36 @@ let block_parser (starts, ends) buf ~f =
<* many_till line ends
>>| fun x -> f (Buffer.contents buf) x

let default_error_message ~path ~language_name ~unparsed =
sprintf
let default_error_handler ~path ~language_name ~unparsed =
failwithf
"The file [%s] contains invalid syntax or %s features unsupported by this tool.\n\
Please report this so it can be improved.\n\
The unsupported syntax starts at:\n\
%s"
path language_name
(Yojson.Basic.to_string (`String (String.slice unparsed 0 Int.(min 20 (String.length unparsed)))))
()

let default_syntax_error ~path ~language_name ~err =
let default_syntax_error_handler ~path ~language_name ~msg =
failwithf
"The file [%s] contains invalid syntax or %s features unsupported by this tool.\n\
If you are certain the syntax is valid, then please report this error.\n\
Error: %s" path language_name err ()
Error: %s" path language_name msg ()

let exec_parser parser ~path ~language_name raw =
let result = Angstrom.parse_string ~consume:All parser raw in
match result with
| Ok parsed -> parsed
| Error err -> default_syntax_error ~path ~language_name ~err
let exec_parser ~on_ok ?on_error parser ~path ~language_name raw =
Angstrom.parse_string ~consume:All parser raw |> function
| Ok x -> on_ok x
| Error msg -> (
match on_error with
| None -> default_syntax_error_handler ~path ~language_name ~msg
| Some handler -> handler ~msg)

let exec_parser_lwt ?(error_message = default_error_message) parser ~path ~language_name ic =
let exec_parser_lwt ~on_ok ?on_error parser ~path ~language_name ic =
let open Lwt.Infix in
Angstrom_lwt_unix.parse parser ic >|= function
| Angstrom.Buffered.{ len = 0; _ }, Ok parsed -> parsed
| Angstrom.Buffered.{ buf; off; len }, Ok _ ->
Angstrom_lwt_unix.parse parser ic >>= function
| Angstrom.Buffered.{ len = 0; _ }, Ok x -> on_ok x
| Angstrom.Buffered.{ buf; off; len }, result -> (
let unparsed = Bigstringaf.substring buf ~off ~len in
failwith (error_message ~path ~language_name ~unparsed)
| _, Error err -> default_syntax_error ~path ~language_name ~err
match on_error with
| None -> default_error_handler ~path ~language_name ~unparsed
| Some handler -> handler ~unparsed (Result.ok result))
58 changes: 30 additions & 28 deletions src/parsing/js.ml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
open! Core
open Utils

let errors_to_string errors =
let buf = Buffer.create 128 in
Expand All @@ -11,14 +10,10 @@ let errors_to_string errors =
bprintf buf "Line %d (%d) to line %d (%d): %s\n" sl sc el ec (Parse_error.PP.error err));
Buffer.contents buf

let parse_error ({ file_errors; _ } : Collector.t) error =
let message =
match error with
| First (_ :: _ :: _ as ll) -> errors_to_string ll
| First ll -> errors_to_string ll
| Second msg -> msg
in
Queue.enqueue file_errors message
let parse_error = function
| First (_ :: _ :: _ as ll) -> errors_to_string ll
| First ll -> errors_to_string ll
| Second msg -> msg

let debug statements =
sprintf "Statements: %s"
Expand All @@ -27,28 +22,35 @@ let debug statements =
|> String.concat ~sep:", ")
|> print_endline

let extract source ~on_string =
match Parser_flow.program source with
| _, _ :: _ -> ()
| ast, [] -> (
match ast with
| _, Flow_ast.Program.{ statements; comments = _; all_comments = _ } ->
(* debug statements; *)
Js_ast.extract ~on_string statements)
| exception _ -> ()

let parse_options = Some { Parser_env.default_parse_options with esproposal_export_star_as = true }

let extract_to_collector (collector : Utils.Collector.t) source =
let parse ~path source =
match Parser_flow.program ~parse_options source with
| _, (_ :: _ as errors) -> failwith (errors_to_string errors)
| _, (_ :: _ as errors) -> Error (lazy (errors_to_string errors))
| ast, [] -> (
match ast with
| _, Flow_ast.Program.{ statements; comments = _; all_comments = _ } ->
(* debug statements; *)
Js_ast.extract ~on_string:(Queue.enqueue collector.strings) statements)
| exception Parse_error.Error (_, (_ :: _ as errors)) -> parse_error collector (First errors)
| exception Parse_error.Error (_, []) -> parse_error collector (Second "Syntax error")
| _, Flow_ast.Program.{ statements; comments = _; all_comments = _ } -> Ok statements)
| exception Parse_error.Error (_, (_ :: _ as errors)) -> Error (lazy (parse_error (First errors)))
| exception Parse_error.Error (_, []) -> Error (lazy (parse_error (Second "Syntax error")))
| exception exn ->
print_endline (sprintf "Unexpected error in %s\nPlease report this bug." collector.path);
raise exn
Error
(lazy
(sprintf "Unexpected error in %s: %s\nPlease report this bug." path (Utils.Exception.human exn)))

let unescape source =
match parse ~path:"attribute" source with
| Ok stmts -> (
match Js_ast.unescape stmts with
| Some s -> s
| None -> source)
| Error _ -> source

let extract source ~on_string =
match parse ~path:"attribute" source with
| Ok stmts -> Js_ast.extract ~on_string stmts
| Error _ -> ()

let extract_to_collector ({ path; strings; file_errors; _ } : Utils.Collector.t) source =
match parse ~path source with
| Ok stmts -> Js_ast.extract ~on_string:(Queue.enqueue strings) stmts
| Error (lazy msg) -> Queue.enqueue file_errors msg
13 changes: 13 additions & 0 deletions src/parsing/js_ast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -446,3 +446,16 @@ let extract ~on_string stmts =
in

List.iter stmts ~f:extract_statement

let unescape = function
| [
( _,
Statement.Expression
{
expression = _, Literal { value = String s; raw = _; comments = _ };
directive = _;
comments = _;
} );
] ->
Some s
| _ -> None
Loading

0 comments on commit f893f71

Please sign in to comment.