-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdictionaries.go
122 lines (109 loc) · 2.3 KB
/
dictionaries.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package anonymizer
import (
"bufio"
"fmt"
"os"
"unicode"
"github.com/derekparker/trie/v3"
)
// Dictionary of words.
type Dict = trie.Trie[struct{}]
const (
dictDir = "/usr/share/dict/"
defaultPath = dictDir + "words"
)
var installed map[string]struct{}
var languages = map[string]string{
"en": "american-english",
"no": "bokmaal",
"bg": "bulgarian",
"ca": "catalan",
"da": "danish",
"nl": "dutch",
"fo": "faroese",
"fr": "french",
"gl": "galician",
"de": "ngerman",
"it": "italian",
"pl": "polish",
"pt": "portuguese",
"es": "spanish",
"sv": "swedish",
"uk": "ukrainian",
}
func init() {
err := initInstalled()
if err != nil {
panic(err)
}
}
func initInstalled() error {
installed = make(map[string]struct{})
files, err := os.ReadDir(dictDir)
if err != nil {
return fmt.Errorf("open %s dir: %v", dictDir, err)
}
for _, file := range files {
installed[file.Name()] = struct{}{}
}
return nil
}
// A wrapper around [LoadDict] that panics on error.
func MustLoadDict(lang string) *Dict {
dict, err := LoadDict(lang)
if err != nil {
panic(fmt.Errorf("load dictionary: %v", err))
}
return dict
}
// Load dictionary for the given language.
//
// If the language is not found or not provided,
// the default one will be used. Run `sudo select-default-wordlist`
// to change the system default.
func LoadDict(lang string) (*Dict, error) {
path := findDict(lang)
return loadDict(path)
}
func findDict(lang string) string {
if lang == "" {
return defaultPath
}
_, knownFile := installed[lang]
if knownFile {
return dictDir + lang
}
fileName := languages[lang]
if fileName != "" {
return dictDir + fileName
}
return defaultPath
}
func loadDict(path string) (*Dict, error) {
file, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("open %s: %v", path, err)
}
defer file.Close()
dict := trie.New[struct{}]()
scanner := bufio.NewScanner(file)
// optionally, resize scanner's capacity for lines over 64K, see next example
for scanner.Scan() {
word := scanner.Text()
if isLower(word) {
dict.Add(word, struct{}{})
}
}
return dict, nil
}
// Check if the given word has only lowercase letters.
//
// No uppercase, no symbols, no digits.
func isLower(word string) bool {
for _, r := range word {
if !unicode.IsLower(r) {
return false
}
}
return true
}