Skip to content

Commit

Permalink
Parse .html files
Browse files Browse the repository at this point in the history
  • Loading branch information
SGrondin committed Sep 7, 2022
1 parent 176aafe commit 2a0da92
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 121 deletions.
74 changes: 56 additions & 18 deletions src/cli/strings.ml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ let write_flags = Core_unix.[ O_WRONLY; O_NONBLOCK; O_TRUNC; O_CREAT ]

type counts = {
vue: int ref;
pug: int ref;
html: int ref;
js: int ref;
ts: int ref;
}
Expand All @@ -40,33 +42,54 @@ let process_file ~root strings count filename ~f:get_iter =
| None -> String.Set.add String.Set.empty data
| Some set -> String.Set.add set data)))

let rec traverse ~root counts strings js_file_errors directory =
let rec traverse ~root counts strings js_file_errors template_script directory =
let* entries =
Lwt_pool.use pool (fun () -> Lwt_unix.files_of_directory directory |> Lwt_stream.to_list)
in
Lwt_list.iter_p
(function
| filename when String.is_prefix ~prefix:"." filename || String.( = ) filename "node_modules" ->
Lwt.return_unit
| "node_modules" -> Lwt.return_unit
| filename when String.is_prefix ~prefix:"." filename -> Lwt.return_unit
| filename -> (
let path = sprintf "%s/%s" directory filename in
Lwt_unix.lstat path >>= function
| { st_kind = S_REG; _ } when String.is_suffix ~suffix:".vue" filename ->
process_file ~root strings counts.vue path ~f:(fun ic ->
let* languages = Vue.parse ~filename ic in
Vue.extract_strings ~filename js_file_errors languages JS)
| { st_kind = S_REG; _ } when String.is_suffix ~suffix:".js" filename ->
Lwt_unix.lstat path >>= fun stat ->
match stat, lazy (String.slice filename (-4) 0), lazy (String.slice filename (-3) 0) with
| { st_kind = S_REG; _ }, _, (lazy ".js") when String.is_suffix ~suffix:".js" filename ->
process_file ~root strings counts.js path ~f:(fun ic ->
let* source = Lwt_io.read ic in
let parsed = Queue.create () in
let+ () = Parsing.Js_ast.strings_from_js ~filename:path parsed js_file_errors source in
Queue.iter parsed)
| { st_kind = S_REG; _ } when String.is_suffix ~suffix:".ts" filename ->
| { st_kind = S_REG; _ }, _, (lazy ".ts") ->
process_file ~root strings counts.ts path ~f:(fun ic ->
let* source = Lwt_io.read ic in
let+ parsed = Quickjs.extract_ts source in
Array.iter parsed)
| { st_kind = S_DIR; _ } -> traverse ~root counts strings js_file_errors path
| { st_kind = S_REG; _ }, (lazy ".vue"), _ ->
process_file ~root strings counts.vue path ~f:(fun ic ->
let* languages = Vue.parse ~filename ic in
Vue.extract_strings ~filename js_file_errors template_script languages)
| { st_kind = S_REG; _ }, (lazy ".pug"), _ ->
process_file ~root strings counts.pug path ~f:(fun ic ->
let* nodes =
Parsing.Basic.exec_parser_lwt Parsing.Pug.parser ~filename ~language_name:"Pug" ic
in
let parsed = Queue.create () in
let+ () = Vue.collect_pug parsed template_script nodes in
Queue.iter parsed)
| { st_kind = S_REG; _ }, _, _ when String.is_suffix filename ~suffix:".html" ->
process_file ~root strings counts.html path ~f:(fun ic ->
let* nodes =
Parsing.Basic.exec_parser_lwt Parsing.Html.parser ~filename ~language_name:"HTML" ic
in
let parsed = Queue.create () in
let+ () =
Parsing.Html.finalize ~filename nodes
|> Option.value_map ~default:Lwt.return_unit ~f:(Vue.collect_html parsed template_script)
in
Queue.iter parsed)
| { st_kind = S_DIR; _ }, _, _ ->
traverse ~root counts strings js_file_errors template_script path
| _ -> Lwt.return_unit))
entries

Expand Down Expand Up @@ -196,10 +219,24 @@ let main { targets; template_script } = function
| Debug lang ->
Lwt_list.iter_s
(fun filename ->
let* () = Lwt_io.printlf "Debugging %s" filename in
let* () = Lwt_io.printlf "Debugging [%s]" filename in
Lwt_io.with_file ~flags:read_flags ~mode:Input filename (fun ic ->
let* languages = Vue.parse ~filename ic in
Vue.debug_template ~filename languages lang template_script))
match lang, String.slice filename (-4) 0 with
| _, ".vue" ->
let* languages = Vue.parse ~filename ic in
Vue.debug_template ~filename languages template_script lang
| Pug, ".pug" ->
let* nodes =
Parsing.Basic.exec_parser_lwt Parsing.Pug.parser ~filename ~language_name:"Pug" ic
in
Vue.debug_template ~filename [ Pug { nodes; length = None } ] template_script lang
| Html, _ when String.is_suffix filename ~suffix:".html" ->
let* top =
Parsing.Basic.exec_parser_lwt Parsing.Html.parser ~filename ~language_name:"Pug" ic
>|= Parsing.Html.finalize ~filename
in
Vue.debug_template ~filename [ Html { top; length = None } ] template_script lang
| _ -> Lwt_io.printlf "Nothing to do for file [%s]" filename))
targets
| Run ->
let overall_time = Utils.time () in
Expand All @@ -220,13 +257,13 @@ let main { targets; template_script } = function
let js_file_errors = Queue.create () in
let* english =
let english_list = String.Table.create () in
let counts = { vue = ref 0; js = ref 0; ts = ref 0 } in
let counts = { vue = ref 0; pug = ref 0; html = ref 0; js = ref 0; ts = ref 0 } in
let time = Utils.time () in
let* () =
Lwt_list.iter_p
(fun directory ->
let root = String.chop_suffix ~suffix:"/" directory |> Option.value ~default:directory in
traverse ~root:(sprintf "%s/" root) counts english_list js_file_errors root)
traverse ~root:(sprintf "%s/" root) counts english_list js_file_errors template_script root)
targets
in
let english =
Expand All @@ -237,8 +274,9 @@ let main { targets; template_script } = function
let f ext i = sprintf "%d %s file%s" i ext (plural i) in
let time = Int63.(time () - !Quickjs.init_time) in
Lwt_io.printlf
!"✅ [%{Int63}ms] Processed %s, %s, and %s"
time (f ".vue" !(counts.vue)) (f ".js" !(counts.js)) (f ".ts" !(counts.ts))
!"✅ [%{Int63}ms] Processed %s, %s, %s, %s, and %s"
time (f ".js" !(counts.js)) (f ".ts" !(counts.ts)) (f ".html" !(counts.html))
(f ".vue" !(counts.vue)) (f ".pug" !(counts.pug))
in
let+ () = write_english english in
english
Expand Down
150 changes: 68 additions & 82 deletions src/cli/vue.ml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
open Core
open Lwt.Syntax
open Lwt.Infix
open Parsing

module Source = struct
Expand All @@ -15,29 +16,24 @@ module Language = struct
| Ts of string
| Html of {
top: SZXX.Xml.DOM.element option;
length: int;
length: int option;
}
| Pug of {
nodes: Pug.node array;
length: int;
length: int option;
}
| Css of int

let of_source ~filename : Source.t -> t = function
| Template (Template.HTML source) ->
let open SZXX in
let Xml.SAX.To_DOM.{ top; _ } =
Angstrom.parse_string ~consume:All (Angstrom.many Xml.parser) source
|> Result.map_error ~f:(sprintf "Parsing error in %s: %s" filename)
|> Result.ok_or_failwith
|> List.fold_result ~init:Xml.SAX.To_DOM.init ~f:(fun acc x -> Xml.SAX.To_DOM.folder (Ok acc) x)
|> Result.map_error ~f:(sprintf "Syntax error in %s: %s" filename)
|> Result.ok_or_failwith
let top =
Parsing.Basic.exec_parser Parsing.Html.parser ~filename ~language_name:"HTML" source
|> Parsing.Html.finalize ~filename
in
Html { top; length = String.length source }
Html { top; length = Some (String.length source) }
| Template (Template.PUG source) ->
let nodes = Basic.exec_parser Pug.parser ~filename ~language_name:"Pug" source in
Pug { nodes; length = String.length source }
Pug { nodes; length = Some (String.length source) }
| Script (Script.JS s) -> Js s
| Script (Script.TS s) -> Ts s
| Style (Style.CSS s) -> Css (String.length s)
Expand All @@ -53,73 +49,74 @@ module Debug = struct
| Html
end

let rec collect_pug strings acc Pug.{ selector; arguments; text; children } =
let acc =
match text, selector with
| Some s, Element { parts = "i18n" :: _ } ->
Queue.enqueue strings s;
acc
| Some s, _ -> s :: acc
| None, _ -> acc
in
let acc =
List.fold arguments ~init:acc ~f:(fun acc -> function
| Pug.{ contents = None; _ } -> acc
| Pug.{ contents = Some s; _ } -> s :: acc)
in
Array.fold children ~init:acc ~f:(collect_pug strings)

let rec collect_html strings acc (node : SZXX.Xml.DOM.element) =
let acc =
match node with
| { text = ""; _ } -> acc
| { tag = "i18n"; text; _ } ->
Queue.enqueue strings text;
acc
| { text; _ } -> text :: acc
in
let acc =
List.fold node.attrs ~init:acc ~f:(fun acc -> function
| "class", _
|"id", _
|_, "" ->
acc
| _, source -> SZXX.Xml.unescape source :: acc)
in
Array.fold node.children ~init:acc ~f:(collect_html strings)

let extract_ts strings source =
let+ parsed = Quickjs.extract_ts source in
Array.iter parsed ~f:(Queue.enqueue strings)

let extract_template strings possible_code = function
| JS ->
List.iter possible_code ~f:(Js_ast.strings_from_template strings);
Lwt.return_unit
| TS -> Lwt_list.iter_p (extract_ts strings) possible_code
let extract_template strings template_script possible_code =
match template_script with
| JS ->
List.iter possible_code ~f:(Js_ast.strings_from_template strings);
Lwt.return_unit
| TS -> Lwt_list.iter_p (extract_ts strings) possible_code

let collect_pug strings template_script nodes =
let rec loop acc Pug.{ selector; arguments; text; children } =
let acc =
match text, selector with
| Some s, Element { parts = "i18n" :: _ } ->
Queue.enqueue strings s;
acc
| Some s, _ -> s :: acc
| None, _ -> acc
in
let acc =
List.fold arguments ~init:acc ~f:(fun acc -> function
| Pug.{ contents = None; _ } -> acc
| Pug.{ contents = Some s; _ } -> s :: acc)
in
Array.fold children ~init:acc ~f:loop
in
Array.fold nodes ~init:[] ~f:loop |> extract_template strings template_script

let collect_html strings template_script top =
let rec loop acc (node : SZXX.Xml.DOM.element) =
let acc =
match node with
| { text = ""; _ } -> acc
| { tag = "i18n"; text; _ } ->
Queue.enqueue strings text;
acc
| { text; _ } -> text :: acc
in
let acc =
List.fold node.attrs ~init:acc ~f:(fun acc -> function
| "class", _
|"id", _
|_, "" ->
acc
| _, source -> SZXX.Xml.unescape source :: acc)
in
Array.fold node.children ~init:acc ~f:loop
in
loop [] top |> extract_template strings template_script

let extract_strings ~filename js_file_errors languages template_script =
let extract_strings ~filename js_file_errors template_script languages =
let strings = Queue.create () in
let+ () =
Lwt_list.iter_p
(function
| Language.Html { top = None; length = _ } -> Lwt.return_unit
| Html { top = Some node; length = _ } ->
let possible_code = collect_html strings [] node in
extract_template strings possible_code template_script
| Pug { nodes; length = _ } ->
let possible_code =
Array.fold nodes ~init:[] ~f:(fun acc node -> collect_pug strings acc node)
in
extract_template strings possible_code template_script
| Html { top = Some node; length = _ } -> collect_html strings template_script node
| Pug { nodes; length = _ } -> collect_pug strings template_script nodes
| Js source -> Js_ast.strings_from_js ~filename strings js_file_errors source
| Ts source -> extract_ts strings source
| Css _ -> Lwt.return_unit)
languages
in
Queue.iter strings

let debug_template ~filename languages target template_script =
let debug_template ~filename languages template_script target =
let print_iter iter =
let buf = Buffer.create 256 in
iter ~f:(fun s ->
Expand All @@ -137,22 +134,23 @@ let debug_template ~filename languages target template_script =
| Css length, _ -> Lwt_io.printlf "<CSS Code - %d bytes>" length
| Html { top; length = _ }, Html ->
let* () = Lwt_io.printlf !"%{sexp#hum: SZXX.Xml.DOM.element option}" top in
let* iter = extract_strings ~filename js_file_errors [ lang ] template_script in
let* iter = extract_strings ~filename js_file_errors template_script [ lang ] in
print_iter iter
| (Pug { nodes; length = _ } as lang), Pug ->
let* () = Lwt_io.printlf !"%{sexp#hum: Pug.nodes}" nodes in
let* iter = extract_strings ~filename js_file_errors [ lang ] template_script in
let* iter = extract_strings ~filename js_file_errors template_script [ lang ] in
print_iter iter
| Html { length; _ }, Pug -> Lwt_io.printlf "<HTML code - %d bytes>" length
| Pug { length; _ }, Html -> Lwt_io.printlf "<Pug code - %d bytes>" length)
| Html { length = Some len; _ }, Pug -> Lwt_io.printlf "<HTML code - %d bytes>" len
| Html { length = None; _ }, Pug -> Lwt_io.printl "<HTML code>"
| Pug { length = Some len; _ }, Html -> Lwt_io.printlf "<Pug code - %d bytes>" len
| Pug { length = None; _ }, Html -> Lwt_io.printl "<Pug code>")
languages
in
Lwt_io.printl
(Queue.to_array js_file_errors |> Array.map ~f:Failed.to_string |> String.concat_array ~sep:"\n")

let parse ~filename ic =
let open Angstrom in
let open Lwt.Syntax in
let open Basic in
let buf = Buffer.create 256 in
let languages =
Expand All @@ -163,18 +161,6 @@ let parse ~filename ic =
(Style.parser buf >>| fun x -> Source.Style x);
]
in
let parser = mlws *> lift2 Tuple2.create (sep_by mlws languages) (mlws *> take_while (fun _ -> true)) in
let+ _unconsumed, result = Angstrom_lwt_unix.parse parser ic in
(match result with
| Ok (parsed, "") -> parsed
| Ok (_, unparsed) ->
failwithf
"The file [%s] contains invalid syntax or Vue features unsupported by this tool.\n\
Please report this so it can be improved.\n\
The unsupported syntax starts at:\n\
%s"
filename
(Yojson.Basic.to_string (`String (String.slice unparsed 0 Int.(min 20 (String.length unparsed)))))
()
| Error err -> failwithf "Syntax Error: %s" err ())
|> List.map ~f:(Language.of_source ~filename)
let parser = mlws *> sep_by mlws languages <* mlws in
Basic.exec_parser_lwt parser ~filename ~language_name:"Vue" ic
>|= List.map ~f:(Language.of_source ~filename)
29 changes: 25 additions & 4 deletions src/parsing/basic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,32 @@ let block_parser (starts, ends) buf ~f =
<* many_till line ends
>>| fun x -> f (Buffer.contents buf) x

let default_error_message ~filename ~language_name ~unparsed =
sprintf
"The file [%s] contains invalid syntax or %s features unsupported by this tool.\n\
Please report this so it can be improved.\n\
The unsupported syntax starts at:\n\
%s"
filename language_name
(Yojson.Basic.to_string (`String (String.slice unparsed 0 Int.(min 20 (String.length unparsed)))))

let default_syntax_error ~filename ~language_name ~err =
failwithf
"The file [%s] contains invalid syntax or %s features unsupported by this tool.\n\
If you are certain the syntax is valid, then please report this error.\n\
Error: %s" filename language_name err ()

let exec_parser parser ~filename ~language_name raw =
let result = Angstrom.parse_string ~consume:All parser raw in
match result with
| Ok parsed -> parsed
| Error err ->
failwithf
"The file [%s] contains invalid syntax or %s features unsupported by this tool.\n\
Please report this so it can be improved. Error: %s" filename language_name err ()
| Error err -> default_syntax_error ~filename ~language_name ~err

let exec_parser_lwt ?(error_message = default_error_message) parser ~filename ~language_name ic =
let open Lwt.Infix in
Angstrom_lwt_unix.parse parser ic >|= function
| Angstrom.Buffered.{ len = 0; _ }, Ok parsed -> parsed
| Angstrom.Buffered.{ buf; off; len }, Ok _ ->
let unparsed = Bigstringaf.substring buf ~off ~len in
failwith (error_message ~filename ~language_name ~unparsed)
| _, Error err -> default_syntax_error ~filename ~language_name ~err
Loading

0 comments on commit 2a0da92

Please sign in to comment.