-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEng_to_kana.go
110 lines (94 loc) · 2.96 KB
/
Eng_to_kana.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
package kanatrans
import (
"strings"
"encoding/json"
_ "embed"
)
// EngToKana struct holds the necessary functions for English to Katakana conversion
type EngToKana struct {
db map[string][]string
cleanFn func(string,func(string)string) string
vowelFn func(string, string) string
consonantFn func(string, string) string
epentheticFn func(string) string
moraeFn func(string) string
kanaFn func(string) string
recoverFn func(string) string
}
//go:embed dict/cmu_ipa.json
var dbFile string
// NewEngToKana creates a new instance of EngToKana
func NewEngToKana(strictClean ...bool) *EngToKana {
// Instantiate class
e2k := EngToKana{}
// Set english cleaner with default non-strict cleaning
var strictF bool
if len(strictClean) > 0 {
strictF = strictClean[0]
}
clean := newEnglishCleaner(strictF)
// Set other classes and link function pointers
e2k.cleanFn = clean.Clean
vowel := newVowelConverter()
e2k.vowelFn = vowel.ConvertVowel
consonant := newConsonantConverter()
e2k.consonantFn = consonant.ConvertConsonant
epenthetic := newEpentheticVowelHandler()
e2k.epentheticFn = epenthetic.AddEpentheticVowel
morae := newMoraeCreator()
e2k.moraeFn = morae.CreateMorae
kana := newMoraeKanaConverter()
e2k.kanaFn = kana.ConvertMorae
r2k := NewRomajiToKana(strictF)
e2k.recoverFn = r2k.Convert
// Load cmu_ipa english phoneme pronounce dictionary
e2k.loadDB()
// Return instance
return &e2k
}
// LoadDBFromFile loads the JSON file containing the database.
func (e2k *EngToKana) loadDB() error {
// Unmarshal the JSON data into a map[string][]string
if err := json.Unmarshal([]byte(dbFile), &e2k.db); err != nil {
return err
}
return nil
}
// TranscriptWord converts an English word to Katakana
func (e2k *EngToKana) TranscriptWord(word string) string {
phs, ok := e2k.db[word]
if !ok {
// return "E_DIC"
// If no match found, try to recover by using Romaji2Kana
return e2k.recoverFn(word)
}
var result []string
for _, ph := range phs {
ph1 := e2k.vowelFn(word, ph)
ph2 := e2k.consonantFn(word, ph1)
ph3 := e2k.epentheticFn(ph2)
morae := e2k.moraeFn(ph3)
kana := e2k.kanaFn(morae)
result = append(result, kana)
}
return result[0]
}
// TranscriptSentence converts an English sentence to Katakana
func (e2k *EngToKana) TranscriptSentence(line string) string {
// Clean string with call back to process clean sentence fragment
return e2k.cleanFn(line, e2k.transcriptCleanSentenceFragment)
}
// processes a line of text containing English words into Kana
func (e2k *EngToKana) transcriptCleanSentenceFragment(line string) string {
var result strings.Builder
words := strings.Fields(line)
// Iterate over the words and convert each to Katakana
for _, word := range words {
katakanaWords := e2k.TranscriptWord(word)
// Placeholder logic to join Katakana words
result.WriteString(katakanaWords)
// TODO: Recover func call in case of E_DIC
// TODO: Add and try romaji_to_kana.go
}
return result.String()
}