Skip to content

Commit

Permalink
fix normalizer and add config for finetunne
Browse files Browse the repository at this point in the history
  • Loading branch information
traderpedroso committed Aug 28, 2024
1 parent ca97496 commit 0318871
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 30 deletions.
51 changes: 51 additions & 0 deletions configs/autoreg_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@

paths:
checkpoint_dir: checkpoints # Directory to store model checkpoints and tensorboard, will be created if not existing.
data_dir: datasets # Directory to store processed data, will be created if not existing.

preprocessing:
languages: ['de', 'en_us'] # All languages in the dataset.

# Text (grapheme) and phoneme symbols, either provide a string or list of strings.
# Symbols in the dataset will be filtered according to these lists!
text_symbols: 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZäöüÄÖÜß'
phoneme_symbols: ['a', 'b', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'æ', 'ç', 'ð', 'ø', 'ŋ', 'œ', 'ɐ', 'ɑ', 'ɔ', 'ə', 'ɛ', 'ɝ', 'ɹ', 'ɡ', 'ɪ', 'ʁ', 'ʃ', 'ʊ', 'ʌ', 'ʏ', 'ʒ', 'ʔ', 'ˈ', 'ˌ', 'ː', '̃', '̍', '̥', '̩', '̯', '͡', 'θ']

char_repeats: 1 # Number of grapheme character repeats to allow for mapping to longer phoneme sequences.
# Set to 1 for autoreg_transformer.
lowercase: true # Whether to lowercase the grapheme input.
n_val: 5000 # Default number of validation data points if no explicit validation data is provided.


model:
type: 'autoreg_transformer' # Whether to use a forward transformer or autoregressive transformer model.
# Choices: ['transformer', 'autoreg_transformer']
d_model: 512
d_fft: 1024
layers: 4
dropout: 0.1
heads: 4

training:

# Hyperparams for learning rate and scheduler.
# The scheduler is reducing the lr on plateau of phoneme error rate (tested every n_generate_steps).

learning_rate: 0.0001 # Learning rate of Adam.
warmup_steps: 10000 # Linear increase of the lr from zero to the given lr within the given number of steps.
scheduler_plateau_factor: 0.5 # Factor to multiply learning rate on plateau.
scheduler_plateau_patience: 10 # Number of text generations with no improvement to tolerate.
batch_size: 32 # Training batch size.
batch_size_val: 32 # Validation batch size.
epochs: 500 # Number of epochs to train.
generate_steps: 10000 # Interval of training steps to generate sample outputs. Also, at this step the phoneme and word
# error rates are calculated for the scheduler.
validate_steps: 10000 # Interval of training steps to validate the model
# (for the autoregressive model this is teacher-forced).
checkpoint_steps: 100000 # Interval of training steps to save the model.
n_generate_samples: 10 # Number of result samples to show on tensorboard.
store_phoneme_dict_in_model: true # Whether to store the raw phoneme dict in the model.
# It will be loaded by the phonemizer object.
ddp_backend: 'nccl' # Backend used by Torch DDP
ddp_host: 'localhost' # Hostname used by Torch DDP
ddp_post: '12355' # Port used by Torch DDP
51 changes: 51 additions & 0 deletions configs/forward_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@

paths:
checkpoint_dir: checkpoints # Directory to store model checkpoints and tensorboard, will be created if not existing.
data_dir: datasets # Directory to store processed data, will be created if not existing.

preprocessing:
languages: ['de', 'en_us'] # All languages in the dataset.

# Text (grapheme) and phoneme symbols, either provide a string or list of strings.
# Symbols in the dataset will be filtered according to these lists!
text_symbols: 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZäöüÄÖÜß'
phoneme_symbols: ['a', 'b', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'æ', 'ç', 'ð', 'ø', 'ŋ', 'œ', 'ɐ', 'ɑ', 'ɔ', 'ə', 'ɛ', 'ɝ', 'ɹ', 'ɡ', 'ɪ', 'ʁ', 'ʃ', 'ʊ', 'ʌ', 'ʏ', 'ʒ', 'ʔ', 'ˈ', 'ˌ', 'ː', '̃', '̍', '̥', '̩', '̯', '͡', 'θ']

char_repeats: 3 # Number of grapheme character repeats to allow for mapping to longer phoneme sequences.
# Set to 1 for autoreg_transformer.
lowercase: true # Whether to lowercase the grapheme input.
n_val: 5000 # Default number of validation data points if no explicit validation data is provided.


model:
type: 'transformer' # Whether to use a forward transformer or autoregressive transformer model.
# Choices: ['transformer', 'autoreg_transformer']
d_model: 512
d_fft: 1024
layers: 6
dropout: 0.1
heads: 4

training:

# Hyperparams for learning rate and scheduler.
# The scheduler is reducing the lr on plateau of phoneme error rate (tested every n_generate_steps).

learning_rate: 0.0001 # Learning rate of Adam.
warmup_steps: 10000 # Linear increase of the lr from zero to the given lr within the given number of steps.
scheduler_plateau_factor: 0.5 # Factor to multiply learning rate on plateau.
scheduler_plateau_patience: 10 # Number of text generations with no improvement to tolerate.
batch_size: 32 # Training batch size.
batch_size_val: 32 # Validation batch size.
epochs: 500 # Number of epochs to train.
generate_steps: 10000 # Interval of training steps to generate sample outputs. Also, at this step the phoneme and word
# error rates are calculated for the scheduler.
validate_steps: 10000 # Interval of training steps to validate the model
# (for the autoregressive model this is teacher-forced).
checkpoint_steps: 100000 # Interval of training steps to save the model.
n_generate_samples: 10 # Number of result samples to show on tensorboard.
store_phoneme_dict_in_model: true # Whether to store the raw phoneme dict in the model.
# It will be loaded by the phonemizer object.
ddp_backend: 'nccl' # Backend used by Torch DDP
ddp_host: 'localhost' # Hostname used by Torch DDP
ddp_post: '12355' # Port used by Torch DDP
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
DeepPhonemizerBR is compatible with Python 3.6+ and is distributed under the MIT license.
"""
# Version: 0.0.6
# Version: 0.0.7
setup(
name="xphonebr",
version="0.0.6",
version="0.0.7",
author="Emerson Pedroso",
author_email="traderpedroso@icloud.com",
description="Grapheme to phoneme conversion and tools for tts with deep learning.",
Expand Down
47 changes: 19 additions & 28 deletions xphonebr/Util/norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,18 +65,6 @@
("ghz", "giga-hertz"),
("km", "quilômetro"),
("ltda", "limitada"),
("jan", "janeiro"),
("fev", "fevereiro"),
("mar", "março"),
("abr", "abril"),
("mai", "maio"),
("jun", "junho"),
("jul", "julho"),
("ago", "agosto"),
("set", "setembro"),
("out", "outubro"),
("nov", "novembro"),
("dez", "dezembro"),
("pág", "página"),
("págs", "páginas"),
("s.a", "sociedade anônima"),
Expand Down Expand Up @@ -166,8 +154,12 @@ def money_to_words_integers(match):
return f"{amount_text} {currency_text}"

# Expressão regular para valores com milhões e bilhões
text = re.sub(r"(R\$|€|£|\$) (\d+)( milhões| bilhões)", money_to_words_millions, text)
text = re.sub(r"(R\$|€|£|\$)(\d+)( milhões| bilhões)", money_to_words_millions, text)
text = re.sub(
r"(R\$|€|£|\$) (\d+)( milhões| bilhões)", money_to_words_millions, text
)
text = re.sub(
r"(R\$|€|£|\$)(\d+)( milhões| bilhões)", money_to_words_millions, text
)

# Expressão regular para valores com centavos
text = re.sub(r"(R\$|€|£|\$) (\d+),(\d{2})", money_to_words_cents, text)
Expand Down Expand Up @@ -212,18 +204,17 @@ def _normalize_numbers_with_letters(text):
)



def normalizer(text):
text = _normalize_percentages(text)
text = _normalize_time(text)
text = _normalize_money(text)
text = _normalize_am_pm_times(text)
text = _normalize_numbers_with_letters(text)
text = _normalize_numbers(text)
text = _normalize_abbreviations(text)
text = replace_punctuation(text)
text = remove_aux_symbols(text)
text = remove_punctuation_at_begin(text)
text = collapse_whitespace(text)
text = re.sub(r"([^\.,!\?\-…])$", r"\1.", text)
return text
text = _normalize_percentages(text)
text = _normalize_time(text)
text = _normalize_money(text)
text = _normalize_am_pm_times(text)
text = _normalize_numbers_with_letters(text)
text = _normalize_numbers(text)
text = _normalize_abbreviations(text)
text = replace_punctuation(text)
text = remove_aux_symbols(text)
text = remove_punctuation_at_begin(text)
text = collapse_whitespace(text)
text = re.sub(r"([^\.,!\?\-…])$", r"\1.", text)
return text

0 comments on commit 0318871

Please sign in to comment.