Skip to content

Commit

Permalink
Experimental serving
Browse files Browse the repository at this point in the history
* Does not serve anything right now.
* One could upgrade this to receive sentences from POST requests.
However, this code is primarily meant as a proof of work.
  • Loading branch information
yweweler committed Nov 16, 2018
1 parent 412149a commit 8ee45bb
Show file tree
Hide file tree
Showing 2 changed files with 180 additions and 0 deletions.
11 changes: 11 additions & 0 deletions tacotron/params/serving.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import tensorflow as tf

# Default hyper-parameters:
serving_params = tf.contrib.training.HParams(
# Directory to load the exported model from.
export_dir='/tmp/export/ljspeech',

# Version number of the exported model to be loaded.
export_version=1,

)
169 changes: 169 additions & 0 deletions tacotron/serve.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
from multiprocessing.pool import ThreadPool

import numpy as np
import tensorflow as tf

from audio.conversion import inv_normalize_decibel, decibel_to_magnitude, ms_to_samples
from audio.synthesis import spectrogram_to_wav
from tacotron.params.dataset import dataset_params
from tacotron.params.inference import inference_params
from tacotron.params.model import model_params
from tacotron.params.serving import serving_params


def pad_sentence(_sentence, _max_len):
pad_len = _max_len - len(_sentence)
pad_token = dataset_params.vocabulary_dict['pad']
_sentence = np.append(_sentence, [pad_token] * pad_len)

return _sentence


def pre_process_sentences(_sentences, dataset):
# Pre-process sentence and convert it into ids.
id_sequences, sequence_lengths = dataset.process_sentences(_sentences)

# Get the first sentence.
sentences = [np.fromstring(id_sequence, dtype=np.int32) for id_sequence in id_sequences]

# Pad sentence to the same length in order to be able to batch them in a single tensor.
max_length = max(sequence_lengths)
sentences = np.array([pad_sentence(sentence, max_length) for sentence in sentences])

print('sentences', sentences)
print('sentences.shape', sentences.shape)

return sentences


def post_process_spectrograms(_spectrograms):
# Apply Griffin-Lim to all spectrogram's to get the waveforms.
normalized = list()
for spectrogram in _spectrograms:
print('Reverse spectrogram normalization ...', spectrogram.shape)
linear_mag_db = inv_normalize_decibel(spectrogram.T,
dataset_params.dataset_loader.mel_mag_ref_db,
dataset_params.dataset_loader.mel_mag_max_db)

linear_mag = decibel_to_magnitude(linear_mag_db)
normalized.append(linear_mag)

specs = normalized

win_len = ms_to_samples(model_params.win_len, model_params.sampling_rate)
win_hop = ms_to_samples(model_params.win_hop, model_params.sampling_rate)
n_fft = model_params.n_fft

def synthesize(linear_mag):
linear_mag = np.squeeze(linear_mag, -1)
linear_mag = np.power(linear_mag, model_params.magnitude_power)

print('Spectrogram inversion ...')
return spectrogram_to_wav(linear_mag,
win_len,
win_hop,
n_fft,
model_params.reconstruction_iterations)

# Synthesize waveforms from the spectrograms.
pool = ThreadPool(inference_params.n_synthesis_threads)
wavs = pool.map(synthesize, specs)
pool.close()
pool.join()

# # Write all generated waveforms to disk.
# for i, (sentence, wav) in enumerate(zip(raw_sentences, wavs)):
# # Append ".wav" to the sentence line number to get the filename.
# file_name = '{}.wav'.format(i + 1)
#
# # Generate the full path under which to save the wav.
# save_path = os.path.join(inference_params.synthesis_dir, file_name)
#
# # Write the wav to disk.
# # save_wav(save_path, wav, model_params.sampling_rate, True)
# print('Saved: "{}"'.format(save_path))

return wavs


def serve(sentence_generator):
# Create a dataset loader.
dataset = dataset_params.dataset_loader(dataset_folder=dataset_params.dataset_folder,
char_dict=dataset_params.vocabulary_dict,
fill_dict=False)

graph = tf.Graph()
# Start a session for serving.
with start_session(graph=graph) as session:
# Load the exported model into the current session for serving.
tf.saved_model.loader.load(session,
[tf.saved_model.tag_constants.SERVING],
serving_params.export_dir)

# Get a handle to the tensor that is filled with the sentences.
ph_inp_sentences = graph.get_tensor_by_name('ph_inp_sentences:0')

# The linear spec tensor used as the models output is produced by a dense layer.
# As such it has no direct name in the exported model (or I am getting something wrong).
# Hence, the resulting tensor has to be addressed by the bias add operation of the dense
# layer.
output_linear_spec = graph.get_tensor_by_name('dense/BiasAdd:0')

while True:
# Wait until the sentence generator provides a new set of sentences.
for raw_sentences in sentence_generator:
batched_sentences = pre_process_sentences(raw_sentences, dataset)
spectrograms = session.run([
output_linear_spec
],
feed_dict={
ph_inp_sentences: batched_sentences
})

print(spectrograms)

wavs = post_process_spectrograms(spectrograms)
print('generated {} wavefeforms in total'.format(len(wavs)))


def start_session(graph=None):
"""
Creates a session that can be used for training.
Arguments:
graph (tf.Graph):
Default graph to set when creating the session.
Default is None.
Returns:
tf.Session
"""

session_config = tf.ConfigProto(
gpu_options=tf.GPUOptions(
allow_growth=True,
)
)

session = tf.Session(
config=session_config,
graph=graph
)

init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
session.run(init_op)

return session


if __name__ == '__main__':
def sentence_generator():
raw_sentences = [
'Tis a test!',
'Sentences could be entered using POST requests in the future.'
]
while True:
yield raw_sentences


serve(sentence_generator())

0 comments on commit 8ee45bb

Please sign in to comment.