Skip to content

Commit

Permalink
address #33
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Sep 9, 2024
1 parent f562ce6 commit 703b41a
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 7 deletions.
21 changes: 15 additions & 6 deletions e2_tts_pytorch/e2_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,28 @@ def list_str_to_tensor(
def get_g2p_en_encode():
g2p = G2p()

# used by @lucasnewman successfully here
# https://github.com/lucasnewman/e2-tts-pytorch/blob/ljspeech-test/e2_tts_pytorch/e2_tts.py

phoneme_to_index = g2p.p2idx
num_phonemes = len(phoneme_to_index)

extended_chars = [' ', ',', '.', '-', '!', '?', '\'', '"', '...', '..', '. .', '. . .', '. . . .', '. . . . .', '. ...', '... .', '.. ..']
num_extended_chars = len(extended_chars)

extended_chars_dict = {p: (num_phonemes + i) for i, p in enumerate(extended_chars)}

def encode(
text: list[str],
padding_value = -1
) -> Int['b nt']:

phonemes = [g2p(t) for t in text]
list_tensors = [tensor([g2p.p2idx[p] for p in one_phoneme]) for one_phoneme in phonemes]
list_tensors = [tensor([phoneme_to_index[p] for p in one_phoneme]) for one_phoneme in phonemes]
padded_tensor = pad_sequence(list_tensors, padding_value = -1)
return padded_tensor

return encode
return encode, (num_phonemes + num_extended_chars)

# tensor helpers

Expand Down Expand Up @@ -612,8 +623,7 @@ def __init__(
text_num_embeds = 256
self.tokenizer = list_str_to_tensor
elif tokenizer == 'phoneme_en':
text_num_embeds = 74
self.tokenizer = get_g2p_en_encode()
self.tokenizer, text_num_embeds = get_g2p_en_encode()
else:
raise ValueError(f'unknown tokenizer string {tokenizer}')

Expand Down Expand Up @@ -776,8 +786,7 @@ def __init__(
text_num_embeds = 256
self.tokenizer = list_str_to_tensor
elif tokenizer == 'phoneme_en':
text_num_embeds = 74
self.tokenizer = get_g2p_en_encode()
self.tokenizer, text_num_embeds = get_g2p_en_encode()
else:
raise ValueError(f'unknown tokenizer string {tokenizer}')

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "e2-tts-pytorch"
version = "0.9.4"
version = "0.9.5"
description = "E2-TTS in Pytorch"
authors = [
{ name = "Phil Wang", email = "lucidrains@gmail.com" }
Expand Down

0 comments on commit 703b41a

Please sign in to comment.