-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix normalizer and add config for finetunne
- Loading branch information
1 parent
ca97496
commit 0318871
Showing
4 changed files
with
123 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
|
||
paths: | ||
checkpoint_dir: checkpoints # Directory to store model checkpoints and tensorboard, will be created if not existing. | ||
data_dir: datasets # Directory to store processed data, will be created if not existing. | ||
|
||
preprocessing: | ||
languages: ['de', 'en_us'] # All languages in the dataset. | ||
|
||
# Text (grapheme) and phoneme symbols, either provide a string or list of strings. | ||
# Symbols in the dataset will be filtered according to these lists! | ||
text_symbols: 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZäöüÄÖÜß' | ||
phoneme_symbols: ['a', 'b', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'æ', 'ç', 'ð', 'ø', 'ŋ', 'œ', 'ɐ', 'ɑ', 'ɔ', 'ə', 'ɛ', 'ɝ', 'ɹ', 'ɡ', 'ɪ', 'ʁ', 'ʃ', 'ʊ', 'ʌ', 'ʏ', 'ʒ', 'ʔ', 'ˈ', 'ˌ', 'ː', '̃', '̍', '̥', '̩', '̯', '͡', 'θ'] | ||
|
||
char_repeats: 1 # Number of grapheme character repeats to allow for mapping to longer phoneme sequences. | ||
# Set to 1 for autoreg_transformer. | ||
lowercase: true # Whether to lowercase the grapheme input. | ||
n_val: 5000 # Default number of validation data points if no explicit validation data is provided. | ||
|
||
|
||
model: | ||
type: 'autoreg_transformer' # Whether to use a forward transformer or autoregressive transformer model. | ||
# Choices: ['transformer', 'autoreg_transformer'] | ||
d_model: 512 | ||
d_fft: 1024 | ||
layers: 4 | ||
dropout: 0.1 | ||
heads: 4 | ||
|
||
training: | ||
|
||
# Hyperparams for learning rate and scheduler. | ||
# The scheduler is reducing the lr on plateau of phoneme error rate (tested every n_generate_steps). | ||
|
||
learning_rate: 0.0001 # Learning rate of Adam. | ||
warmup_steps: 10000 # Linear increase of the lr from zero to the given lr within the given number of steps. | ||
scheduler_plateau_factor: 0.5 # Factor to multiply learning rate on plateau. | ||
scheduler_plateau_patience: 10 # Number of text generations with no improvement to tolerate. | ||
batch_size: 32 # Training batch size. | ||
batch_size_val: 32 # Validation batch size. | ||
epochs: 500 # Number of epochs to train. | ||
generate_steps: 10000 # Interval of training steps to generate sample outputs. Also, at this step the phoneme and word | ||
# error rates are calculated for the scheduler. | ||
validate_steps: 10000 # Interval of training steps to validate the model | ||
# (for the autoregressive model this is teacher-forced). | ||
checkpoint_steps: 100000 # Interval of training steps to save the model. | ||
n_generate_samples: 10 # Number of result samples to show on tensorboard. | ||
store_phoneme_dict_in_model: true # Whether to store the raw phoneme dict in the model. | ||
# It will be loaded by the phonemizer object. | ||
ddp_backend: 'nccl' # Backend used by Torch DDP | ||
ddp_host: 'localhost' # Hostname used by Torch DDP | ||
ddp_post: '12355' # Port used by Torch DDP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
|
||
paths: | ||
checkpoint_dir: checkpoints # Directory to store model checkpoints and tensorboard, will be created if not existing. | ||
data_dir: datasets # Directory to store processed data, will be created if not existing. | ||
|
||
preprocessing: | ||
languages: ['de', 'en_us'] # All languages in the dataset. | ||
|
||
# Text (grapheme) and phoneme symbols, either provide a string or list of strings. | ||
# Symbols in the dataset will be filtered according to these lists! | ||
text_symbols: 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZäöüÄÖÜß' | ||
phoneme_symbols: ['a', 'b', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'æ', 'ç', 'ð', 'ø', 'ŋ', 'œ', 'ɐ', 'ɑ', 'ɔ', 'ə', 'ɛ', 'ɝ', 'ɹ', 'ɡ', 'ɪ', 'ʁ', 'ʃ', 'ʊ', 'ʌ', 'ʏ', 'ʒ', 'ʔ', 'ˈ', 'ˌ', 'ː', '̃', '̍', '̥', '̩', '̯', '͡', 'θ'] | ||
|
||
char_repeats: 3 # Number of grapheme character repeats to allow for mapping to longer phoneme sequences. | ||
# Set to 1 for autoreg_transformer. | ||
lowercase: true # Whether to lowercase the grapheme input. | ||
n_val: 5000 # Default number of validation data points if no explicit validation data is provided. | ||
|
||
|
||
model: | ||
type: 'transformer' # Whether to use a forward transformer or autoregressive transformer model. | ||
# Choices: ['transformer', 'autoreg_transformer'] | ||
d_model: 512 | ||
d_fft: 1024 | ||
layers: 6 | ||
dropout: 0.1 | ||
heads: 4 | ||
|
||
training: | ||
|
||
# Hyperparams for learning rate and scheduler. | ||
# The scheduler is reducing the lr on plateau of phoneme error rate (tested every n_generate_steps). | ||
|
||
learning_rate: 0.0001 # Learning rate of Adam. | ||
warmup_steps: 10000 # Linear increase of the lr from zero to the given lr within the given number of steps. | ||
scheduler_plateau_factor: 0.5 # Factor to multiply learning rate on plateau. | ||
scheduler_plateau_patience: 10 # Number of text generations with no improvement to tolerate. | ||
batch_size: 32 # Training batch size. | ||
batch_size_val: 32 # Validation batch size. | ||
epochs: 500 # Number of epochs to train. | ||
generate_steps: 10000 # Interval of training steps to generate sample outputs. Also, at this step the phoneme and word | ||
# error rates are calculated for the scheduler. | ||
validate_steps: 10000 # Interval of training steps to validate the model | ||
# (for the autoregressive model this is teacher-forced). | ||
checkpoint_steps: 100000 # Interval of training steps to save the model. | ||
n_generate_samples: 10 # Number of result samples to show on tensorboard. | ||
store_phoneme_dict_in_model: true # Whether to store the raw phoneme dict in the model. | ||
# It will be loaded by the phonemizer object. | ||
ddp_backend: 'nccl' # Backend used by Torch DDP | ||
ddp_host: 'localhost' # Hostname used by Torch DDP | ||
ddp_post: '12355' # Port used by Torch DDP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters