-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathvqapc_example.yml
36 lines (34 loc) · 1.95 KB
/
vqapc_example.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# All hyper-parameters taken from the official implementation at
# https://github.com/iamyuanchung/VQ-APC/tree/96230cc358b174b736b4c0e7664b3e72b304d9b0
data:
# Dataset-related setting
dataset:
name: 'LibriSpeech' # Specify dataset, must match to dataset/<corpus>.py
path: '/path/to/LibriSpeech' # Path to unzipped LibriSpeech dataset
train_split: ['train-clean-360'] # Splits to be used as training set
dev_split: ['dev-clean'] # Splits to be used as valid. set
batch_size: 32 # Batch sizes
audio_max_frames: 1500 # Max length of spectrogram to ensure batch size
# Attributes of audio feature
audio:
feat_type: 'fbank' # Feature type
feat_dim: 80 # Feature dimension
frame_length: 25 # Window size in ms
frame_shift: 10 # Hop size in ms
cmvn: True # Apply uttr.-wised CMVN on Mel spectrogram
model:
method: 'vqapc' # Accepts npc/apc/vqapc
n_future: 5 # Target the next n-th frame in future for learning
paras:
num_layers: 3 # Number of GRU layers to be stacked in APC model
hidden_size: 512 # Dimension of feature of all layers
dropout: 0.1 # Dropout between GRU layers
residual: True # Residual Connection between GRU layers
vq: # Remove this part to train APC
codebook_size: [512] # Codebook size of each group in VQ-layer
code_dim: [512] # Dim of each group summing up to hidden_size
gumbel_temperature: 0.5 # Temperature of Gumbel Softmax in VQ-layer
hparas:
optimizer: 'Adam'
lr: 0.0001
epoch: 100