-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtranslitbg.go
100 lines (85 loc) · 2.09 KB
/
translitbg.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
package translitbg
import (
"bytes"
"fmt"
"io"
"regexp"
)
type TranslitBG struct {
chars map[string]string
tokens map[string]string
combos map[rune]string
regex *regexp.Regexp
}
// isBGChar returns true, if the rune r is a cyrillic rune
// See https://symbl.cc/en/alphabets/bulgarian
func isBGChar(r rune) bool {
return (r >= 1040 && r <= 1103) || r == 1117 || r == 1037
}
// isUpperBGChar returns true, if the rune r is an uppercase cyrillic rune
func isUpperBGChar(r rune) bool {
return (r >= 1040 && r <= 1071) || r == 1037
}
func New() *TranslitBG {
pattern := "^\\w+$"
regex, err := regexp.Compile(pattern)
if err != nil {
panic(fmt.Errorf("error compiling regex: %v", err))
}
return &TranslitBG{
STREAMLINED,
STREAMLINED_TOKENS,
STREAMLINED_CYR2COMBO_UC,
regex,
}
}
// Encode transliterates Bulgarian string input to its latin equivalent.
// Non-cyrillic characters will be left as they are.
func (tr *TranslitBG) Encode(input string) (string, error) {
source := bytes.NewBufferString(input)
dest := bytes.NewBuffer(nil)
ch_1 := ""
for {
ch, _, err := source.ReadRune()
if err == io.EOF {
break
} else if err != nil {
return "", fmt.Errorf("error reading source text: %v", err)
}
if isBGChar(ch) {
ch2, _, err := source.ReadRune()
if err != nil && err != io.EOF {
return "", fmt.Errorf("error reading source text: %v", err)
} else if err == nil {
token := string([]rune{ch, ch2})
found, ok := tr.tokens[token]
if ok {
ch3, _, err := source.ReadRune()
if err != io.EOF || !tr.regex.MatchString(string(ch3)) {
source.UnreadRune()
dest.WriteString(found)
ch_1 = string(ch3)
continue
} else {
source.UnreadRune()
}
} else {
source.UnreadRune()
}
}
token, ok := tr.chars[string(ch)]
if ok {
ucc, ok := tr.combos[ch]
if ok && (ch2 == 0 || isUpperBGChar(ch2) || !isBGChar(ch2) || len(tr.chars[ch_1]) > 0) {
dest.WriteString(ucc)
} else {
dest.WriteString(token)
}
}
} else {
dest.WriteRune(ch)
}
ch_1 = string(ch)
}
return dest.String(), nil
}