-
Notifications
You must be signed in to change notification settings - Fork 0
/
zimcat.go
65 lines (52 loc) · 1.06 KB
/
zimcat.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
package main
import (
"flag"
"fmt"
"log"
"os"
"regexp"
zim "github.com/akhenakh/gozim"
"jaytaylor.com/html2text"
)
var (
z *zim.ZimReader
)
func main() {
flag.Parse()
if len(flag.Args()) < 1 {
print("usage!")
os.Exit(1)
}
path := flag.Args()[0]
z, err := zim.NewReader(path, false)
if err != nil {
log.Fatal(err)
}
re := regexp.MustCompile(`(.*)This article is issued from Wikipedia.*Additional terms may apply for the media files.(.*)`)
z.ListTitlesPtrIterator(func(idx uint32) {
a, err := z.ArticleAtURLIdx(idx)
if err != nil || a.EntryType == zim.DeletedEntry {
return
}
if a.Namespace == 'A' {
htmldata, err := a.Data()
if err != nil {
log.Fatal(err.Error())
}
htmlstrraw := string(htmldata)
if len(htmlstrraw) <= 0 {
return
}
htmlstr := htmlstrraw
text, err := html2text.FromString(htmlstr, html2text.Options{PrettyTables: false})
if err != nil {
log.Fatal(err)
}
if len(text) <= 0 {
return
}
text = a.Title + "\n\n" + re.ReplaceAllString(text, "$1$2")
fmt.Print(text)
}
})
}