From 098407f5d50b96b8bbb94bc9c03a8ba7bde073ad Mon Sep 17 00:00:00 2001 From: Tong Sun Date: Wed, 3 May 2023 12:35:08 -0400 Subject: [PATCH] - [+] implement excluding control. close #6 --- html2md_test.go | 25 +++++++++++++++++++++++++ prop_html2md.go | 11 +++++++++++ 2 files changed, 36 insertions(+) diff --git a/html2md_test.go b/html2md_test.go index c8e6507..cba8f44 100644 --- a/html2md_test.go +++ b/html2md_test.go @@ -12,6 +12,17 @@ const ( boldText = "Bold Text" boldEscape = "option src_ip" + + excludingControlTest = `
+

+
+

Some heading i don't need

+
+ The string I need + Some value I don't need + +

+
` ) type testCase struct { @@ -45,6 +56,20 @@ func TestExec(t *testing.T) { "Only blue ones left", []string{"-i", "--plugin-strikethrough"}, }, + { + "ExclChildren", "The string I need", excludingControlTest, + []string{"-s", "h1", "--xc", "-i"}, + }, + { + "Excl1", `### Some heading i don't need + + The string I need`, excludingControlTest, + []string{"-s", "h1", "-xspan", "-i"}, + }, + { + "Excl2", "The string I need", excludingControlTest, + []string{"-s", "h1", "-x", "span", "-xdiv", "-i"}, + }, // { // "", "", "", []string{"-i"}, // }, diff --git a/prop_html2md.go b/prop_html2md.go index b32f90f..26d4632 100644 --- a/prop_html2md.go +++ b/prop_html2md.go @@ -35,6 +35,17 @@ func Html2md(ctx *cli.Context) error { doc, err := goquery.NewDocumentFromReader(rootArgv.Filei) clis.AbortOn("Reading file with goquery", err) content := doc.Find(rootArgv.Sel) + if rootArgv.ExclChildren { + content = content.Children().Remove().End() + // h, _ := goquery.OuterHtml(content) + // clis.Verbose(3, "%#v\n", h) + } else if len(rootArgv.Excl) != 0 { + for _, ex := range rootArgv.Excl { + content = content.Find(ex).Remove().End() + h, _ := content.Html() + clis.Verbose(5, "%#v\n", h) + } + } domain, url := rootArgv.Domain, rootArgv.Filei.Name() if domain == "" && regexp.MustCompile(`(?i)^http`).MatchString(url) {