-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpyproject.toml
60 lines (54 loc) · 1.37 KB
/
pyproject.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
[tool.poetry]
name = "stepwise_tacotron"
version = "0.1.1"
description = "Tacotron with stepwise monotonic attention"
authors = ["Thông Nguyễn"]
license = "MIT"
[tool.poetry.dependencies]
python = ">=3.7,<3.11"
Unidecode = "^1.3.2"
toml = "^0.10.2"
tensorflow-cpu = "^2.8.0"
pax3 = "^0.5.5"
opax = "^0.2.8"
jax = { version = "^0.3.0", optional = true }
matplotlib = "^3.5.1"
librosa = "^0.9.1"
tqdm = "^4.62.3"
pooch = "^1.6.0"
fire = "^0.4.0"
soxr = "^0.2.7"
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
[tacotron]
# training
TRAINING_STEPS = 500000
BATCH_SIZE = 64
LR=1024e-6 # learning rate
MODEL_PREFIX = "mono_tts_cbhg_small"
LOG_DIR = "./logs"
CKPT_DIR = "./ckpts"
USE_MP = false # use mixed-precision training
STEPS_PER_CALL = 10
TEST_DATA_SIZE = 100
# data
TF_DATA_DIR = "./tf_data" # tensorflow data directory
TF_GTA_DATA_DIR = "./tf_gta_data" # tf gta data directory
SAMPLE_RATE = 24000 # convert to this sample rate if needed
MEL_DIM = 80 # the dimension of melspectrogram features
MEL_MIN = 1e-5
PAD = "_" # padding character
PAD_TOKEN = 0
END_CHARACTER = "■" # to signal the end of the transcript
# model
RR = 1 # reduction factor
MAX_RR = 2
ATTN_BIAS = 0.0 # control how slow the attention moves forward
SIGMOID_NOISE = 2.0
PRENET_DIM = 128
TEXT_DIM = 256
RNN_DIM = 512
ATTN_RNN_DIM = 256
ATTN_HIDDEN_DIM = 128
POSTNET_DIM = 512