Moved most declarations to constructor for efficiency

andreihar · May 11, 2024 · ac4cb22 · ac4cb22
1 parent 0112aaf
commit ac4cb22
Showing 1 changed file with 74 additions and 118 deletions.
diff --git a/taibun/taibun.py b/taibun/taibun.py
@@ -86,12 +86,68 @@ class Converter(object):
 
     def __init__(self, system='Tailo', dialect='south', format='mark', delimiter=DEFAULT_DELIMITER, sandhi=DEFAULT_SANDHI, punctuation='format', convert_non_cjk=False):
         self.system = system.lower()
-        self.dialect = dialect.lower()
         self.format = format
         self.delimiter = delimiter if delimiter != self.DEFAULT_DELIMITER else self.__set_default_delimiter()
         self.sandhi = sandhi if sandhi != self.DEFAULT_SANDHI else self.__set_default_sandhi()
         self.punctuation = punctuation
         self.convert_non_cjk = convert_non_cjk
+        self.__declarations(dialect.lower())
+
+
+    def __declarations(self, dialect):
+        def placements(elements):
+            return elements + [s.capitalize() for s in elements]
+        def converts(dictionary):
+            return {**dictionary, **{k.capitalize(): v.capitalize() for k, v in dictionary.items()}}
+
+        # Conversion
+        self.conversion_func = {
+            'poj': self.__tailo_to_poj,
+            'zhuyin': self.__tailo_to_zhuyin,
+            'tlpa': self.__tailo_to_tlpa,
+            'pingyim': self.__tailo_to_pingyim,
+            'tongiong': self.__tailo_to_ti,
+            'ipa': self.__tailo_to_ipa,
+            'tailo': self.__tailo_to_tailo
+        }.get(self.system, lambda word: word[0])
+
+        if self.system == 'tailo':
+            self.placement = placements(['ia'+self.tt+'u', 'ua'+self.tt+'i', 'ua'+self.tt, 'ue'+self.tt, 'ui'+self.tt, 'a'+self.tt+'i', 'a'+self.tt+'u', 'o'+self.tt+'o','ia'+self.tt, 'iu'+self.tt, 'io'+self.tt, 'o'+self.tt+'o', 'a'+self.tt, 'o'+self.tt, 'e'+self.tt, 'i'+self.tt, 'u'+self.tt, 'n'+self.tt+'g', 'm'+self.tt])
+            self.tones = ["", "", "́", "̀", "", "̂", "̌", "̄", "̍", "̋"]
+        if self.system == 'poj':
+            self.convert = converts({'nng':'nng', 'nnh':'hⁿ', 'nn':'ⁿ', 'ts':'ch', 'ing':'eng', 'uai':'oai', 'uan':'oan', 'ik':'ek', 'ua':'oa', 'ue':'oe', 'oo':'o͘'})
+            self.placement = placements(['oa'+self.tt+'h', 'oa'+self.tt+'n', 'oa'+self.tt+'ng', 'oa'+self.tt+'ⁿ', 'oa'+self.tt+'t', 'ia'+self.tt+'u', 'oe'+self.tt+'h', 'o'+self.tt+'e', 'oa'+self.tt+'i', 'u'+self.tt+'i', 'o'+self.tt+'a', 'a'+self.tt+'i', 'a'+self.tt+'u', 'ia'+self.tt, 'iu'+self.tt, 'io'+self.tt, 'a'+self.tt, 'o'+self.tt, 'o͘'+self.tt, 'e'+self.tt, 'i'+self.tt, 'u'+self.tt, 'n'+self.tt+'g', 'm'+self.tt])
+            self.tones = ['', '', '́', '̀', '', '̂', '', '̄', '̍', '']
+        if self.system == 'zhuyin':
+            self.convert = {'p4':'ㆴ4', 'p8':'ㆴ8', 'k4':'ㆶ4', 'k8':'ㆶ8', 't4':'ㆵ4', 't8':'ㆵ8', 'h4':'ㆷ4', 'h8':'ㆷ8', 'h0': '0','tshing':'ㄑㄧㄥ', 'tshinn':'ㄑㆪ', 'phing':'ㄆㄧㄥ', 'phinn':'ㄆㆪ', 'tsing':'ㄐㄧㄥ', 'tsinn':'ㄐㆪ','ainn':'ㆮ', 'aunn':'ㆯ', 'giok':'ㆣㄧㄜㆶ', 'ngai':'ㄫㄞ', 'ngau':'ㄫㄠ', 'ngoo':'ㄫㆦ', 'ping':'ㄅㄧㄥ','pinn':'ㄅㆪ', 'senn':'ㄙㆥ', 'sing':'ㄒㄧㄥ', 'sinn':'ㄒㆪ', 'tshi':'ㄑㄧ','ang':'ㄤ', 'ann':'ㆩ', 'enn':'ㆥ', 'ing':'ㄧㄥ', 'inn':'ㆪ', 'mai':'ㄇㄞ', 'mau':'ㄇㄠ', 'mng':'ㄇㆭ','moo':'ㄇㆦ', 'mua':'ㄇㄨㄚ', 'mue':'ㄇㄨㆤ', 'mui':'ㄇㄨㄧ', 'nga':'ㄫㄚ', 'nge':'ㄫㆤ', 'ngi':'ㄫㄧ','ong':'ㆲ', 'onn':'ㆧ', 'tsh':'ㄘ', 'tsi':'ㄐㄧ', 'unn':'ㆫ','ai':'ㄞ', 'am':'ㆰ', 'an':'ㄢ', 'au':'ㄠ', 'ji':'ㆢㄧ', 'kh':'ㄎ', 'ma':'ㄇㄚ', 'me':'ㄇㆤ', 'mi':'ㄇㄧ','ng':'ㆭ', 'ok':'ㆦㆶ', 'om':'ㆱ', 'oo':'ㆦ', 'ph':'ㄆ', 'si':'ㄒㄧ', 'th':'ㄊ', 'ts':'ㄗ','a':'ㄚ', 'b':'ㆠ', 'e':'ㆤ', 'g':'ㆣ', 'h':'ㄏ', 'i':'ㄧ', 'j':'ㆡ', 'k':'ㄍ', 'l':'ㄌ', 'm':'ㆬ','n':'ㄋ', 'o':'ㄜ', 'p':'ㄅ', 's':'ㄙ', 't':'ㄉ', 'u':'ㄨ'}
+            self.tones = ['', '', 'ˋ', '˪', '', 'ˊ', '', '˫', '˙']
+        if self.system == 'tlpa':
+            self.convert = converts({'tsh':'ch', 'ts':'c'})
+        if self.system == 'pingyim':
+            self.convert = converts({'p4':'p4', 't4':'t4', 'k4':'k4', 'h4':'h4', 'p8':'p8', 't8':'t8', 'k8':'k8', 'h8':'h8','ainn':'nai', 'iunn':'niu', 'ann':'na', 'onn':'noo', 'enn':'ne','inn':'ni', 'unn':'nu', 'au':'ao', 'ph':'p', 'nng':'lng', 'tsh':'c','ng':'ggn', 'ts':'z', 'th':'t', 'kh':'k', 'ir':'i', 'p':'b', 'b':'bb','t':'d', 'k':'g', 'g':'gg', 'j':'zz', 'n':'ln', 'm':'bbn'})
+            self.placement = placements(['ua'+self.tt+'i', 'ia'+self.tt+'o', 'a'+self.tt+'i', 'a'+self.tt+'o', 'oo'+self.tt, 'ia'+self.tt, 'iu'+self.tt, 'io'+self.tt, 'ua'+self.tt, 'ue'+self.tt, 'ui'+self.tt,'a'+self.tt, 'o'+self.tt, 'e'+self.tt, 'i'+self.tt, 'u'+self.tt, 'n'+self.tt+'g', 'm'+self.tt, 'n'+self.tt])
+            self.tones = ['', '̄', '̌', '̀', '̄', '́', '', '̂', '́', '']
+        if self.system == 'tongiong':
+            self.convert = converts({'p4':'p4', 't4':'t4', 'k4':'k4', 'h4':'h4', 'p8':'p8', 't8':'t8', 'k8':'k8', 'h8':'h8','oo':'o', 'om':'om', 'ong':'ong', 'ir':'i', 'tsh':'c','ts':'z', 'nng':'nng', 'ng':'ng', 'g':'gh', 'kh':'k', 'k':'g','ph':'p', 'p':'b', 'b':'bh', 'th':'t', 't':'d', 'j':'r'})
+            self.placement = placements(['ua'+self.tt+'i', 'ia'+self.tt+'o', 'a'+self.tt+'i', 'a'+self.tt+'o', 'oo'+self.tt, 'ia'+self.tt, 'iu'+self.tt, 'io'+self.tt, 'ua'+self.tt, 'ue'+self.tt, 'ui'+self.tt,'a'+self.tt, 'o'+self.tt, 'e'+self.tt, 'i'+self.tt, 'u'+self.tt, 'n'+self.tt+'g', 'm'+self.tt])
+            self.tones = ["̊", "", "̀", "̂", "̄", "̆", "", "̄", "", "́"]
+        if self.system == 'ipa':
+            convert = {'tsing':'tɕiɪŋ','jiang':'dʑiaŋ','tshing':'tɕʰiɪŋ','tsik':'tɕiɪk','tshik':'tɕʰiɪk','jian':'dʑiɛn','jiat':'dʑiɛt','tshi':'tɕʰi','iann':'iã','ainn':'ãi','iang':'iaŋ','nng':'nŋ','mia':'miã','mui':'muĩ','mue':'muẽ','mua':'muã','ma':'mã','me':'mẽ','mi':'mĩ','moo':'mɔ̃','nia':'niã','nua':'nuã','na':'nã','ne':'nẽ','ni':'nĩ','noo':'nɔ̃','ngia':'ŋiã','ngiu':'ŋiũ','nga':'ŋã','nge':'ŋẽ','ngi':'ŋĩ','ngoo':'ŋɔ̃','ing':'iɪŋ','tsh':'tsʰ','tsi':'tɕi','ian':'iɛn','iat':'iɛt','onn':'ɔ̃','ong':'ɔŋ','ik':'iɪk','ji':'dʑi','kh':'kʰ','ng':'ŋ','oo':'ɔ','nn':'̃','hm':'hm̩','ph':'pʰ','th':'tʰ','ok':'ɔk','om':'ɔm','j':'dz','o':'ə'}
+            if dialect == 'north':
+                convert.update({'o':'o'})
+            self.convert = converts(convert)
+            self.convert2 = converts({'p4':'p̚4','p8':'p̚8','k4':'k̚4','k8':'k̚8','t4':'t̚4','t8':'t̚8','h4':'ʔ4','h8':'ʔ8','si':'ɕi','h0':'0'})
+            self.tones = ['', '⁴⁴', '⁵³', '¹¹', '²¹', '²⁵', '', '²²', '⁵'] if dialect != 'north' else ['', '⁵⁵', '⁵¹', '²¹', '³²', '²⁴', '', '³³', '⁴']
+
+        # Dialect
+        self.word_dict = {k: (v.split('/')[1] if dialect == 'north' else v.split('/')[0]) if '/' in v else v for k, v in word_dict.items()}
+
+        # Sandhi
+        if self.sandhi in ['auto', 'exc_last', 'incl_last']:
+            self.sandhi_conversion = {'1':'7', '7':'3', '3':'2', '2':'1', '5':'7', 'p4':'p8', 't4':'t8', 'k4':'k8', 'h4':'2', 'p8':'p4', 't8':'t4', 'k8':'k4', 'h8':'3'}
+            if dialect == 'north':
+                self.sandhi_conversion.update({'5':'3'})
+            self.a_sandhi = {'1':'7', '2':'1', '3':'1', '5':'7', 'p4':'p8', 't4':'t8', 'k4':'k8', 'h4':'1', 'p8':'p4', 't8':'t4', 'k8':'k4', 'h8':'7'}
 
 
     ### Interface functions
@@ -109,14 +165,11 @@ def get(self, input):
 
     # Helper to convert separate words
     def __convert_tokenised(self, word):
-        if word[0] in word_dict:
-            word = (word_dict[word[0]],) + word[1:]
-            if "/" in word[0]:
-                dialect_part = word[0].split("/")[1] if self.dialect == 'north' else word[0].split("/")[0]
-                word = (dialect_part,) + word[1:]
+        if word[0] in self.word_dict:
+            word = (self.word_dict[word[0]],) + word[1:]
         elif not self.convert_non_cjk or word[0] in ".,!?\"#$%&()*+/:;<=>@[\\]^`{|}~\t。．，、！？；：（）［］【】「」“”":
             return word[0]
-        word = self.__system_conversion(word).replace('---', '--')
+        word = self.conversion_func(word).replace('---', '--')
         if self.format == 'number' and self.system in ['tailo', 'poj']:
             word = self.__mark_to_number(word)
         if self.format == 'strip':
@@ -130,18 +183,6 @@ def __convert_tokenised(self, word):
         return word.replace('--', self.suffix_token).replace('-', self.delimiter).replace(self.suffix_token, '--')
 
 
-    # Helper switch for converting 漢字 based on defined transliteration system
-    def __system_conversion(self, word):
-        if self.system == 'poj': return self.__tailo_to_poj(word)
-        if self.system == 'zhuyin': return self.__tailo_to_zhuyin(word)
-        if self.system == 'tlpa': return self.__tailo_to_tlpa(word)
-        if self.system == 'pingyim': return self.__tailo_to_pingyim(word)
-        if self.system == 'tongiong': return self.__tailo_to_ti(word)
-        if self.system == 'ipa': return self.__tailo_to_ipa(word)
-        if self.sandhi in ['auto', 'exc_last', 'incl_last']: return self.__tailo_to_tailo(word)
-        else: return word[0]
-
-
     # Helper functions to set delimiter according to transliteration system if wasn't explicitly defined by user
     def __set_default_delimiter(self):
         if self.system == 'tlpa' or self.system == 'zhuyin' or self.system == 'ipa': return ' '
@@ -219,18 +260,14 @@ def __get_mark_tone(self, input, placement, tones):
 
     # Helper to apply tone sandhi to a word
     def __tone_sandhi(self, words, last):
-        sandhi = {'1':'7', '7':'3', '3':'2', '2':'1', '5':'7', 'p4':'p8', 't4':'t8', 'k4':'k8', 'h4':'2', 'p8':'p4', 't8':'t4', 'k8':'k4', 'h8':'3'}
-        a_sandhi = {'1':'7', '2':'1', '3':'1', '5':'7', 'p4':'p8', 't4':'t8', 'k4':'k8', 'h4':'1', 'p8':'p4', 't8':'t4', 'k8':'k4', 'h8':'7'}
-        if self.dialect == 'north':
-            sandhi.update({'5':'3'})
         indices = (
             list(range(len(words) - 2)) if last == 'a suff' and len(words) > 1 
             else list(range(len(words) - 1)) if not last 
             else list(range(len(words)))
         )
-        sandhi_words = [self.__replacement_tool(sandhi, words[i]) for i in indices]
+        sandhi_words = [self.__replacement_tool(self.sandhi_conversion, words[i]) for i in indices]
         if last == 'a suff' and len(words) > 1:
-            sandhi_words.append(self.__replacement_tool(a_sandhi, words[-2]))
+            sandhi_words.append(self.__replacement_tool(self.a_sandhi, words[-2]))
         if not last or last == 'a suff':
             sandhi_words.append(words[-1])
         return sandhi_words
@@ -264,93 +301,45 @@ def __tone_sandhi_position(self, input):
     ### Tai-lo to other transliteration systems converting
 
     # Helper to convert syllable from Tai-lo to Tai-lo
-    # (called only in cases when tone sandhi is applied)
     def __tailo_to_tailo(self, input):
-        placement = [
-            'ia'+self.tt+'u', 'ua'+self.tt+'i', 'ua'+self.tt, 'ue'+self.tt, 'ui'+self.tt, 'a'+self.tt+'i',
-            'a'+self.tt+'u', 'o'+self.tt+'o','ia'+self.tt, 'iu'+self.tt, 'io'+self.tt, 'o'+self.tt+'o', 'a'+self.tt, 
-            'o'+self.tt, 'e'+self.tt, 'i'+self.tt, 'u'+self.tt, 'n'+self.tt+'g', 'm'+self.tt
-        ]
-        tones = ["", "", "́", "̀", "", "̂", "̌", "̄", "̍", "̋"]
-        placement += [s.capitalize() for s in placement]
-        input = '-'.join(self.__get_mark_tone(nt, placement, tones) for nt in self.__get_number_tones(input))
+        input = '-'.join(self.__get_mark_tone(nt, self.placement, self.tones) for nt in self.__get_number_tones(input))
         return input.replace(self.suffix_token, '--')
 
 
     # Helper to convert syllable from Tai-lo to POJ
     def __tailo_to_poj(self, input):
-        placement = [
-            'oa'+self.tt+'h', 'oa'+self.tt+'n', 'oa'+self.tt+'ng', 'oa'+self.tt+'ⁿ', 'oa'+self.tt+'t',
-            'ia'+self.tt+'u', 'oe'+self.tt+'h', 'o'+self.tt+'e', 'oa'+self.tt+'i', 'u'+self.tt+'i', 'o'+self.tt+'a',
-            'a'+self.tt+'i', 'a'+self.tt+'u', 'ia'+self.tt, 'iu'+self.tt, 'io'+self.tt, 'a'+self.tt,
-            'o'+self.tt, 'o͘'+self.tt, 'e'+self.tt, 'i'+self.tt, 'u'+self.tt, 'n'+self.tt+'g', 'm'+self.tt
-        ]
-        convert = {'nng':'nng', 'nnh':'hⁿ', 'nn':'ⁿ', 'ts':'ch', 'ing':'eng', 'uai':'oai', 'uan':'oan', 'ik':'ek', 'ua':'oa', 'ue':'oe', 'oo':'o͘'}
-        tones = ['', '', '́', '̀', '', '̂', '', '̄', '̍', '']
-        placement += [s.capitalize() for s in placement]
-        convert.update({k.capitalize(): v.capitalize() for k, v in convert.items()})
         number_tones = self.__get_number_tones(input)
         input = '-'.join(
-            self.__get_mark_tone(self.__replacement_tool(convert, nt), placement, tones) 
+            self.__get_mark_tone(self.__replacement_tool(self.convert, nt), self.placement, self.tones) 
             for nt in number_tones
         )
         return input.replace(self.suffix_token, '--')
 
 
     # Helper to convert syllable from Tai-lo to 方音符號 (zhuyin)
     def __tailo_to_zhuyin(self, input):
-        convert = {
-            'p4':'ㆴ4', 'p8':'ㆴ8', 'k4':'ㆶ4', 'k8':'ㆶ8', 't4':'ㆵ4', 't8':'ㆵ8', 'h4':'ㆷ4', 'h8':'ㆷ8', 'h0': '0',
-            'tshing':'ㄑㄧㄥ', 'tshinn':'ㄑㆪ', 'phing':'ㄆㄧㄥ', 'phinn':'ㄆㆪ', 'tsing':'ㄐㄧㄥ', 'tsinn':'ㄐㆪ',
-            'ainn':'ㆮ', 'aunn':'ㆯ', 'giok':'ㆣㄧㄜㆶ', 'ngai':'ㄫㄞ', 'ngau':'ㄫㄠ', 'ngoo':'ㄫㆦ', 'ping':'ㄅㄧㄥ',
-            'pinn':'ㄅㆪ', 'senn':'ㄙㆥ', 'sing':'ㄒㄧㄥ', 'sinn':'ㄒㆪ', 'tshi':'ㄑㄧ',
-            'ang':'ㄤ', 'ann':'ㆩ', 'enn':'ㆥ', 'ing':'ㄧㄥ', 'inn':'ㆪ', 'mai':'ㄇㄞ', 'mau':'ㄇㄠ', 'mng':'ㄇㆭ',
-            'moo':'ㄇㆦ', 'mua':'ㄇㄨㄚ', 'mue':'ㄇㄨㆤ', 'mui':'ㄇㄨㄧ', 'nga':'ㄫㄚ', 'nge':'ㄫㆤ', 'ngi':'ㄫㄧ',
-            'ong':'ㆲ', 'onn':'ㆧ', 'tsh':'ㄘ', 'tsi':'ㄐㄧ', 'unn':'ㆫ',
-            'ai':'ㄞ', 'am':'ㆰ', 'an':'ㄢ', 'au':'ㄠ', 'ji':'ㆢㄧ', 'kh':'ㄎ', 'ma':'ㄇㄚ', 'me':'ㄇㆤ', 'mi':'ㄇㄧ',
-            'ng':'ㆭ', 'ok':'ㆦㆶ', 'om':'ㆱ', 'oo':'ㆦ', 'ph':'ㄆ', 'si':'ㄒㄧ', 'th':'ㄊ', 'ts':'ㄗ',
-            'a':'ㄚ', 'b':'ㆠ', 'e':'ㆤ', 'g':'ㆣ', 'h':'ㄏ', 'i':'ㄧ', 'j':'ㆡ', 'k':'ㄍ', 'l':'ㄌ', 'm':'ㆬ',
-            'n':'ㄋ', 'o':'ㄜ', 'p':'ㄅ', 's':'ㄙ', 't':'ㄉ', 'u':'ㄨ'}
-        tones = ['', '', 'ˋ', '˪', '', 'ˊ', '', '˫', '˙']
         output = []
         for nt in self.__get_number_tones((input[0].lower(), input[1])):
-            nt = self.__replacement_tool(convert, nt).replace(self.suffix_token, '')
+            nt = self.__replacement_tool(self.convert, nt).replace(self.suffix_token, '')
             if len(nt) > 2 and nt[-2] == 'ㄋ':
                 nt = nt[:-2] + 'ㄣ' + nt[-1]
             if self.format != 'number':
-                nt = ''.join(tones[int(t)] if t.isnumeric() else t for t in nt)
+                nt = ''.join(self.tones[int(t)] if t.isnumeric() else t for t in nt)
             output.append(nt)
         return '-'.join(output).replace(self.suffix_token, '')
 
 
     # Helper to convert syllable from Tai-lo to TLPA
     def __tailo_to_tlpa(self, input):
-        convert = {'tsh':'ch', 'ts':'c'}
-        convert.update({k.capitalize(): v.capitalize() for k, v in convert.items()})
-        input = '-'.join(self.__replacement_tool(convert, nt) for nt in self.__get_number_tones(input))
+        input = '-'.join(self.__replacement_tool(self.convert, nt) for nt in self.__get_number_tones(input))
         return input.replace(self.suffix_token, '')
 
 
     # Helper to convert syllable from Tai-lo to Bbanlam pingyim
     def __tailo_to_pingyim(self, input):
-        placement = [
-            'ua'+self.tt+'i', 'ia'+self.tt+'o', 'a'+self.tt+'i', 'a'+self.tt+'o', 
-            'oo'+self.tt, 'ia'+self.tt, 'iu'+self.tt, 'io'+self.tt, 'ua'+self.tt, 'ue'+self.tt, 'ui'+self.tt,
-            'a'+self.tt, 'o'+self.tt, 'e'+self.tt, 'i'+self.tt, 'u'+self.tt, 'n'+self.tt+'g', 'm'+self.tt, 'n'+self.tt
-        ]
-        # plosives don't change, ptkh 4/8 -> ptkh 4/8
-        convert = {
-            'p4':'p4', 't4':'t4', 'k4':'k4', 'h4':'h4', 'p8':'p8', 't8':'t8', 'k8':'k8', 'h8':'h8',
-            'ainn':'nai', 'iunn':'niu', 'ann':'na', 'onn':'noo', 'enn':'ne',
-            'inn':'ni', 'unn':'nu', 'au':'ao', 'ph':'p', 'nng':'lng', 'tsh':'c',
-            'ng':'ggn', 'ts':'z', 'th':'t', 'kh':'k', 'ir':'i', 'p':'b', 'b':'bb',
-            't':'d', 'k':'g', 'g':'gg', 'j':'zz', 'n':'ln', 'm':'bbn'}
-        tones = ['', '̄', '̌', '̀', '̄', '́', '', '̂', '́', '']
-        placement += [s.capitalize() for s in placement]
-        convert.update({k.capitalize(): v.capitalize() for k, v in convert.items()})
         output = []
         for nt in self.__get_number_tones(input):
-            replaced = self.__replacement_tool(convert, nt)
+            replaced = self.__replacement_tool(self.convert, nt)
             if replaced[0] in ['i', 'I']: # Initial i
                 replaced = ('Y' if replaced[0] == 'I' else 'y') + (replaced[1:] if replaced[1] in ['a', 'u', 'o'] else replaced.lower())
             if replaced[0] in ['u', 'U']: # Initial u
@@ -367,61 +356,29 @@ def __tailo_to_pingyim(self, input):
             if replaced[-3:-1] == 'ln': # Final n
                 replaced = replaced[:-3] + 'n' + replaced[-1]
             if self.format != 'number':
-                output.append(self.__get_mark_tone(replaced, placement, tones))
+                output.append(self.__get_mark_tone(replaced, self.placement, self.tones))
             else:
                 output.append(replaced)
         return '-'.join(output).replace(self.suffix_token, '')
 
 
     # Helper to convert syllable from Tai-lo to Tong-iong ping-im
-    #       Not enough information on tone mark placement
     def __tailo_to_ti(self, input):
-        placement = [
-            'ua'+self.tt+'i', 'ia'+self.tt+'o', 'a'+self.tt+'i', 'a'+self.tt+'o', 
-            'oo'+self.tt, 'ia'+self.tt, 'iu'+self.tt, 'io'+self.tt, 'ua'+self.tt, 'ue'+self.tt, 'ui'+self.tt,
-            'a'+self.tt, 'o'+self.tt, 'e'+self.tt, 'i'+self.tt, 'u'+self.tt, 'n'+self.tt+'g', 'm'+self.tt
-        ]
-        # plosives don't change, ptkh 4/8 -> ptkh 4/8
-        convert = {
-            'p4':'p4', 't4':'t4', 'k4':'k4', 'h4':'h4', 'p8':'p8', 't8':'t8', 'k8':'k8', 'h8':'h8',
-            'oo':'o', 'om':'om', 'ong':'ong', 'ir':'i', 'tsh':'c',
-            'ts':'z', 'nng':'nng', 'ng':'ng', 'g':'gh', 'kh':'k', 'k':'g',
-            'ph':'p', 'p':'b', 'b':'bh', 'th':'t', 't':'d', 'j':'r'}
-        tones = ["̊", "", "̀", "̂", "̄", "̆", "", "̄", "", "́"]
-        placement += [s.capitalize() for s in placement]
-        convert.update({k.capitalize(): v.capitalize() for k, v in convert.items()})
         number_tones = [nt[:-2] + 'or' + nt[-1] if nt[-2] == 'o' else nt for nt in self.__get_number_tones(input)]
         input = '-'.join(
-            self.__get_mark_tone(self.__replacement_tool(convert, nt), placement, tones) 
+            self.__get_mark_tone(self.__replacement_tool(self.convert, nt), self.placement, self.tones) 
             if self.format != 'number' 
-            else self.__replacement_tool(convert, nt) 
+            else self.__replacement_tool(self.convert, nt) 
             for nt in number_tones
         )
         return input.replace(self.suffix_token, '--')
 
 
     # Helper to convert syllable from Tai-lo to International Phonetic Alphabet
     def __tailo_to_ipa(self, input):
-        convert = {
-            'tsing':'tɕiɪŋ','jiang':'dʑiaŋ','tshing':'tɕʰiɪŋ','tsik':'tɕiɪk','tshik':'tɕʰiɪk',
-            'jian':'dʑiɛn','jiat':'dʑiɛt','tshi':'tɕʰi',
-            'iann':'iã','ainn':'ãi','iang':'iaŋ','nng':'nŋ',
-            'mia':'miã','mui':'muĩ','mue':'muẽ','mua':'muã','ma':'mã','me':'mẽ','mi':'mĩ','moo':'mɔ̃', # m nasalisation
-            'nia':'niã','nua':'nuã','na':'nã','ne':'nẽ','ni':'nĩ','noo':'nɔ̃', # n nasalisation
-            'ngia':'ŋiã','ngiu':'ŋiũ','nga':'ŋã','nge':'ŋẽ','ngi':'ŋĩ','ngoo':'ŋɔ̃', # ng nasalisation
-            'ing':'iɪŋ','tsh':'tsʰ','tsi':'tɕi','ian':'iɛn','iat':'iɛt','onn':'ɔ̃',
-            'ong':'ɔŋ','ik':'iɪk','ji':'dʑi','kh':'kʰ','ng':'ŋ','oo':'ɔ','nn':'̃',
-            'hm':'hm̩','ph':'pʰ','th':'tʰ','ok':'ɔk','om':'ɔm','j':'dz','o':'ə'}
-        if self.dialect == 'north':
-            convert.update({'o':'o'})
-        convert2 = {
-            'p4':'p̚4','p8':'p̚8','k4':'k̚4','k8':'k̚8','t4':'t̚4','t8':'t̚8','h4':'ʔ4','h8':'ʔ8','si':'ɕi','h0':'0'}
-        tones = ['', '⁴⁴', '⁵³', '¹¹', '²¹', '²⁵', '', '²²', '⁵'] if self.dialect != 'north' else ['', '⁵⁵', '⁵¹', '²¹', '³²', '²⁴', '', '³³', '⁴']
-        convert.update({k.capitalize(): v.capitalize() for k, v in convert.items()})
-        convert2.update({k.capitalize(): v.capitalize() for k, v in convert2.items()})
         output = []
         for nt in self.__get_number_tones((input[0], input[1])):
-            nt = self.__replacement_tool(convert, nt).replace(self.suffix_token, '')
+            nt = self.__replacement_tool(self.convert, nt).replace(self.suffix_token, '')
             if 'ŋ' in nt:
                 if len(nt) > 2:
                     if all(c.lower() not in 'aeioɔu' for c in nt[:nt.index('ŋ')]) and nt.index('ŋ') != 0:
@@ -430,9 +387,9 @@ def __tailo_to_ipa(self, input):
                     nt = nt.replace('ŋ', 'ŋ̍')
             if len(nt) == 2 and nt[0] == 'm':
                 nt = 'm̩' + nt[-1]
-            nt = self.__replacement_tool(convert2, nt)
+            nt = self.__replacement_tool(self.convert2, nt)
             if self.format != 'number':
-                nt = ''.join(tones[int(t)] if t.isnumeric() else t for t in nt)
+                nt = ''.join(self.tones[int(t)] if t.isnumeric() else t for t in nt)
             output.append(unicodedata.normalize('NFC', nt))
         return '-'.join(output).replace(self.suffix_token, '')
 
@@ -467,7 +424,6 @@ def __format_punctuation_cjk(self, input):
 
     # Helper to capitalise text in according to punctuation
     def __format_text(self, input):
-        # punc_filter = re.compile("([.!?]\s*)")
         punc_filter = re.compile(r"([.!?]\s*)")
         split_with_punc = punc_filter.split(input)
         split_with_punc = [i[0].upper() + i[1:] if len(i) > 1 else i for i in split_with_punc]