Skip to content

Commit

Permalink
Merge pull request #20 from baxtree/development
Browse files Browse the repository at this point in the history
Prepare for release 0.0.4
  • Loading branch information
baxtree authored Feb 10, 2020
2 parents db5589b + 4b73e09 commit e7156a3
Show file tree
Hide file tree
Showing 16 changed files with 173 additions and 55 deletions.
35 changes: 19 additions & 16 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -49,22 +49,25 @@ test-all: ## run tests on every Python version with tox
.venv/bin/tox

pydoc: clean-doc ## generate pydoc HTML documentation based on docstrings
python -m pydoc -w subaligner; mv subaligner.html docs/index.html
python -m pydoc -w subaligner.embedder; mv subaligner.embedder.html docs
python -m pydoc -w subaligner.media_helper; mv subaligner.media_helper.html docs
python -m pydoc -w subaligner.network; mv subaligner.network.html docs
python -m pydoc -w subaligner.predictor; mv subaligner.predictor.html docs
python -m pydoc -w subaligner.singleton; mv subaligner.singleton.html docs
python -m pydoc -w subaligner.trainer; mv subaligner.trainer.html docs
python -m pydoc -w subaligner.utils; mv subaligner.utils.html docs
python -m pydoc -w subaligner.subtitle; mv subaligner.subtitle.html docs
python -m pydoc -w subaligner.logger; mv subaligner.logger.html docs
python -m pydoc -w subaligner.exception; mv subaligner.exception.html docs
python -m pydoc -w subaligner.models; mv subaligner.models.html docs
python -m pydoc -w subaligner.models.training; mv subaligner.models.training.html docs
python -m pydoc -w subaligner.models.training.model; mv subaligner.models.training.model.html docs
python -m pydoc -w subaligner.models.training.weights; mv subaligner.models.training.weights.html docs
python -m pydoc -w subaligner._version; mv subaligner._version.html docs
if [ ! -e ".venv" ]; then pip3 install virtualenv; virtualenv -p python3 .venv; fi
.venv/bin/pip install --upgrade pip setuptools wheel; \
cat requirements.txt | xargs -L 1 .venv/bin/pip install; \
.venv/bin/python -m pydoc -w subaligner; mv subaligner.html docs/index.html
.venv/bin/python -m pydoc -w subaligner.embedder; mv subaligner.embedder.html docs
.venv/bin/python -m pydoc -w subaligner.media_helper; mv subaligner.media_helper.html docs
.venv/bin/python -m pydoc -w subaligner.network; mv subaligner.network.html docs
.venv/bin/python -m pydoc -w subaligner.predictor; mv subaligner.predictor.html docs
.venv/bin/python -m pydoc -w subaligner.singleton; mv subaligner.singleton.html docs
.venv/bin/python -m pydoc -w subaligner.trainer; mv subaligner.trainer.html docs
.venv/bin/python -m pydoc -w subaligner.utils; mv subaligner.utils.html docs
.venv/bin/python -m pydoc -w subaligner.subtitle; mv subaligner.subtitle.html docs
.venv/bin/python -m pydoc -w subaligner.logger; mv subaligner.logger.html docs
.venv/bin/python -m pydoc -w subaligner.exception; mv subaligner.exception.html docs
.venv/bin/python -m pydoc -w subaligner.models; mv subaligner.models.html docs
.venv/bin/python -m pydoc -w subaligner.models.training; mv subaligner.models.training.html docs
.venv/bin/python -m pydoc -w subaligner.models.training.model; mv subaligner.models.training.model.html docs
.venv/bin/python -m pydoc -w subaligner.models.training.weights; mv subaligner.models.training.weights.html docs
.venv/bin/python -m pydoc -w subaligner._version; mv subaligner._version.html docs
$(BROWSER) docs/index.html

coverage: ## check code coverage quickly with the default Python
Expand Down
2 changes: 1 addition & 1 deletion docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<table width="100%" cellspacing=0 cellpadding=2 border=0 summary="heading">
<tr bgcolor="#7799ee">
<td valign=bottom>&nbsp;<br>
<font color="#ffffff" face="helvetica, arial">&nbsp;<br><big><big><strong>subaligner</strong></big></big> (version 0.0.3)</font></td
<font color="#ffffff" face="helvetica, arial">&nbsp;<br><big><big><strong>subaligner</strong></big></big> (version 0.0.4)</font></td
><td align=right valign=bottom
><font color="#ffffff" face="helvetica, arial"><a href=".">index</a><br><a href="file:/Users/baix01/MyWorkspace/subaligner-github/subaligner/__init__.py">/Users/baix01/MyWorkspace/subaligner-github/subaligner/__init__.py</a></font></td></tr></table>
<p></p>
Expand Down
2 changes: 1 addition & 1 deletion docs/subaligner._version.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<table width="100%" cellspacing=0 cellpadding=2 border=0 summary="heading">
<tr bgcolor="#7799ee">
<td valign=bottom>&nbsp;<br>
<font color="#ffffff" face="helvetica, arial">&nbsp;<br><big><big><strong><a href="subaligner.html"><font color="#ffffff">subaligner</font></a>._version</strong></big></big> (version 0.0.3)</font></td
<font color="#ffffff" face="helvetica, arial">&nbsp;<br><big><big><strong><a href="subaligner.html"><font color="#ffffff">subaligner</font></a>._version</strong></big></big> (version 0.0.4)</font></td
><td align=right valign=bottom
><font color="#ffffff" face="helvetica, arial"><a href=".">index</a><br><a href="file:/Users/baix01/MyWorkspace/subaligner-github/subaligner/_version.py">/Users/baix01/MyWorkspace/subaligner-github/subaligner/_version.py</a></font></td></tr></table>
<p><tt>The&nbsp;semver&nbsp;for&nbsp;the&nbsp;current&nbsp;release.</tt></p>
Expand Down
2 changes: 1 addition & 1 deletion docs/subaligner.embedder.html
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
&nbsp;&nbsp;&nbsp;&nbsp;n_mfcc&nbsp;{int}&nbsp;--&nbsp;The&nbsp;number&nbsp;of&nbsp;MFCC&nbsp;components&nbsp;(default:&nbsp;{13}).<br>
&nbsp;&nbsp;&nbsp;&nbsp;frequency&nbsp;{float}&nbsp;--&nbsp;The&nbsp;sample&nbsp;rate&nbsp;&nbsp;(default:&nbsp;{16000}).<br>
&nbsp;&nbsp;&nbsp;&nbsp;hop_len&nbsp;{int}&nbsp;--&nbsp;The&nbsp;number&nbsp;of&nbsp;samples&nbsp;per&nbsp;frame&nbsp;(default:&nbsp;{512}).<br>
&nbsp;&nbsp;&nbsp;&nbsp;step_sample&nbsp;{float}&nbsp;--&nbsp;The&nbsp;space&nbsp;(in&nbsp;seconds)&nbsp;between&nbsp;the&nbsp;begining&nbsp;of&nbsp;each&nbsp;sample&nbsp;(default:&nbsp;1s&nbsp;/&nbsp;25&nbsp;FPS&nbsp;=&nbsp;0.04s).<br>
&nbsp;&nbsp;&nbsp;&nbsp;step_sample&nbsp;{float}&nbsp;--&nbsp;The&nbsp;space&nbsp;(in&nbsp;seconds)&nbsp;between&nbsp;the&nbsp;beginning&nbsp;of&nbsp;each&nbsp;sample&nbsp;(default:&nbsp;1s&nbsp;/&nbsp;25&nbsp;FPS&nbsp;=&nbsp;0.04s).<br>
&nbsp;&nbsp;&nbsp;&nbsp;len_sample&nbsp;{float}&nbsp;--&nbsp;The&nbsp;length&nbsp;in&nbsp;seconds&nbsp;for&nbsp;the&nbsp;input&nbsp;samples&nbsp;(default:&nbsp;{0.075}).</tt></dd></dl>

<dl><dt><a name="FeatureEmbedder-extract_data_and_label_from_audio"><strong>extract_data_and_label_from_audio</strong></a>(self, audio_file_path, subtitle_file_path, subtitles=None, ignore_sound_effects=False)</dt><dd><tt>Generate&nbsp;a&nbsp;train&nbsp;dataset&nbsp;from&nbsp;an&nbsp;audio&nbsp;file&nbsp;and&nbsp;its&nbsp;subtitles<br>
Expand Down
10 changes: 9 additions & 1 deletion docs/subaligner.media_helper.html
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@
<td width="100%">Static methods defined here:<br>
<dl><dt><a name="MediaHelper-extract_audio"><strong>extract_audio</strong></a>(video_file_path, decompress=False, freq=16000)</dt><dd><tt>Extract&nbsp;audio&nbsp;track&nbsp;from&nbsp;the&nbsp;video&nbsp;file&nbsp;and&nbsp;save&nbsp;it&nbsp;to&nbsp;a&nbsp;WAV&nbsp;file.<br>
&nbsp;<br>
Keyword&nbsp;Arguments:<br>
Arguments:<br>
&nbsp;&nbsp;&nbsp;&nbsp;video_file_path&nbsp;{string}&nbsp;--&nbsp;The&nbsp;input&nbsp;video&nbsp;file&nbsp;path.<br>
Keyword&nbsp;Arguments:<br>
&nbsp;&nbsp;&nbsp;&nbsp;decompress&nbsp;{bool}&nbsp;--&nbsp;Extract&nbsp;WAV&nbsp;if&nbsp;True&nbsp;otherwise&nbsp;extract&nbsp;AAC&nbsp;(default:&nbsp;{False}).<br>
&nbsp;&nbsp;&nbsp;&nbsp;freq&nbsp;{int}&nbsp;--&nbsp;The&nbsp;audio&nbsp;sample&nbsp;frequency&nbsp;(default:&nbsp;{16000}).<br>
Returns:<br>
Expand Down Expand Up @@ -83,6 +84,13 @@
Returns:<br>
&nbsp;&nbsp;&nbsp;&nbsp;float&nbsp;--&nbsp;The&nbsp;duration&nbsp;in&nbsp;seconds.</tt></dd></dl>

<dl><dt><a name="MediaHelper-get_frame_rate"><strong>get_frame_rate</strong></a>(video_file_path)</dt><dd><tt>Extract&nbsp;audio&nbsp;track&nbsp;from&nbsp;the&nbsp;video&nbsp;file&nbsp;and&nbsp;save&nbsp;it&nbsp;to&nbsp;a&nbsp;WAV&nbsp;file.<br>
&nbsp;<br>
Arguments:<br>
&nbsp;&nbsp;&nbsp;&nbsp;video_file_path&nbsp;{string}&nbsp;--&nbsp;The&nbsp;input&nbsp;video&nbsp;file&nbsp;path.<br>
Returns:<br>
&nbsp;&nbsp;&nbsp;&nbsp;float&nbsp;--&nbsp;The&nbsp;frame&nbsp;rate</tt></dd></dl>

<hr>
Data descriptors defined here:<br>
<dl><dt><strong>__dict__</strong></dt>
Expand Down
3 changes: 1 addition & 2 deletions docs/subaligner.network.html
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,11 @@
Returns:<br>
&nbsp;&nbsp;&nbsp;&nbsp;tuple&nbsp;--&nbsp;A&nbsp;tuple&nbsp;contains&nbsp;validation&nbsp;losses&nbsp;and&nbsp;validation&nbsp;accuracies.</tt></dd></dl>

<dl><dt><a name="Network-get_predictions"><strong>get_predictions</strong></a>(self, input_data, weights_filepath, verbose=1)</dt><dd><tt>Get&nbsp;a&nbsp;Numpy&nbsp;array&nbsp;of&nbsp;predictions.<br>
<dl><dt><a name="Network-get_predictions"><strong>get_predictions</strong></a>(self, input_data, weights_filepath)</dt><dd><tt>Get&nbsp;a&nbsp;Numpy&nbsp;array&nbsp;of&nbsp;predictions.<br>
&nbsp;<br>
Arguments:<br>
&nbsp;&nbsp;&nbsp;&nbsp;input_data&nbsp;{numpy.ndarray}&nbsp;--&nbsp;The&nbsp;input&nbsp;data,&nbsp;as&nbsp;a&nbsp;Numpy&nbsp;array.<br>
&nbsp;&nbsp;&nbsp;&nbsp;weights_filepath&nbsp;{string}&nbsp;--&nbsp;The&nbsp;weights&nbsp;file&nbsp;path.<br>
&nbsp;&nbsp;&nbsp;&nbsp;verbose&nbsp;{int}&nbsp;--&nbsp;The&nbsp;verbosity&nbsp;mode&nbsp;of&nbsp;logging,&nbsp;either&nbsp;0&nbsp;(succinct)&nbsp;or&nbsp;1&nbsp;(verbose).<br>
&nbsp;<br>
Returns:<br>
&nbsp;&nbsp;&nbsp;&nbsp;numpy.ndarray&nbsp;--&nbsp;The&nbsp;Numpy&nbsp;array&nbsp;of&nbsp;predictions.</tt></dd></dl>
Expand Down
12 changes: 5 additions & 7 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,15 @@ google-auth==1.8.1
google-auth-oauthlib==0.4.1
google-pasta==0.1.8
graphviz==0.8.3
grpcio==1.11.0
h5py==2.8.0
HeapDict==1.0.0
html5lib==0.9999999
idna==2.8
importlib-metadata==1.2.0
isort==4.3.4
joblib==0.11
Keras-Applications==1.0.7
Keras-Preprocessing==1.0.9
Keras-Applications>=1.0.8
Keras-Preprocessing>=1.0.9
kiwisolver==1.0.1
lazy-object-proxy==1.3.1
librosa==0.6.0
Expand All @@ -48,7 +47,6 @@ numpy>=1.14.1,<1.18.0
oauthlib==3.1.0
pbr==4.0.2
pluggy==0.13.1
protobuf==3.6.1
psutil==5.4.6
py==1.8.0
pyasn1==0.4.8
Expand All @@ -70,12 +68,12 @@ requests-oauthlib==1.3.0
resampy==0.2.0
rsa==4.0
scikit-learn==0.19.1
scipy==1.0.1
six==1.11.0
scipy>=1.0.1
six>=1.11.0
snakeviz==1.0.0
soupsieve==1.9
tblib==1.3.2
tensorflow>=1.12,<2.0
tensorflow>=1.12,<2.2
termcolor==1.1.0
toml==0.10.0
toolz==0.9.0
Expand Down
2 changes: 1 addition & 1 deletion subaligner/_version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""The semver for the current release."""
__version__ = "0.0.3"
__version__ = "0.0.4"
24 changes: 11 additions & 13 deletions subaligner/embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,10 @@ def __init__(
n_mfcc {int} -- The number of MFCC components (default: {13}).
frequency {float} -- The sample rate (default: {16000}).
hop_len {int} -- The number of samples per frame (default: {512}).
step_sample {float} -- The space (in seconds) between the begining of each sample (default: 1s / 25 FPS = 0.04s).
step_sample {float} -- The space (in seconds) between the beginning of each sample (default: 1s / 25 FPS = 0.04s).
len_sample {float} -- The length in seconds for the input samples (default: {0.075}).
"""

self.__mfcc_extraction_lock = (
None
) # TODO: This lock is not in use any more
self.__n_mfcc = n_mfcc # number of MFCC components
self.__frequency = frequency # sample rate
self.__hop_len = hop_len # number of samples per frame
Expand Down Expand Up @@ -95,6 +92,16 @@ def step_sample(self):

return self.__step_sample

@ step_sample.setter
def step_sample(self, step_sample):
"""Configure the step sample
Arguments:
step_sample {float} -- the value of the step sample (1 / frame_rate)
"""

self.__step_sample = step_sample

@property
def len_sample(self):
"""Get the length in seconds for the input samples.
Expand Down Expand Up @@ -284,10 +291,6 @@ def extract_data_and_label_from_audio(
"{} sound effects removed".format(original_size - len(subs))
)

if self.__mfcc_extraction_lock is not None:
# Deprecated
# self.__mfcc_extraction_lock.acquire()
pass
t = datetime.now()

# Load audio file
Expand All @@ -312,11 +315,6 @@ def extract_data_and_label_from_audio(
n_mfcc=self.__n_mfcc,
)

if self.__mfcc_extraction_lock is not None:
# Deprecated
# self.__mfcc_extraction_lock.release()
pass

# Group multiple MFCCs of 32 ms into a larger range for LSTM
# and each stride will have an overlay with the previous one
samples = []
Expand Down
57 changes: 56 additions & 1 deletion subaligner/media_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ class MediaHelper(object):
def extract_audio(video_file_path, decompress=False, freq=16000):
"""Extract audio track from the video file and save it to a WAV file.
Keyword Arguments:
Arguments:
video_file_path {string} -- The input video file path.
Keyword Arguments:
decompress {bool} -- Extract WAV if True otherwise extract AAC (default: {False}).
freq {int} -- The audio sample frequency (default: {16000}).
Returns:
Expand Down Expand Up @@ -244,3 +245,57 @@ def get_audio_segment_starts_and_ends(subs):
new_subs.append(SubRipFile(combined))
del combined[:]
return segment_starts, segment_ends, new_subs

@staticmethod
def get_frame_rate(video_file_path):
"""Extract audio track from the video file and save it to a WAV file.
Arguments:
video_file_path {string} -- The input video file path.
Returns:
float -- The frame rate
"""

with subprocess.Popen(
"ffmpeg -i {} -f null /dev/null".format(video_file_path).split(),
shell=False,
stderr=subprocess.PIPE,
) as proc:
with subprocess.Popen(
['sed', '-n', "s/" + r".*, \(.*\) fp.*" + "/\\1/p"],
shell=False,
stdin=proc.stderr,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
) as process:
try:
std_out, std_err = process.communicate(timeout=MediaHelper.__CMD_TIME_OUT)
if process.returncode != 0:
raise TerminalException(
"Cannot extract the frame rate from video: {}".format(video_file_path)
)
fps = float(std_out.decode("utf-8").split("\n")[0])
except subprocess.TimeoutExpired as te:
proc.kill()
proc.wait()
process.kill()
process.wait()
raise TerminalException(
"Timeout on extracting the frame rate from video: {}".format(video_file_path)
) from te
except Exception as e:
proc.kill()
proc.wait()
process.kill()
process.wait()
if isinstance(e, TerminalException):
raise e
else:
raise TerminalException(
"Cannot extract the frame rate from video: {}".format(video_file_path)
) from e
finally:
os.system("stty sane")

MediaHelper.__LOGGER.info("Extracted frame rate:{} fps".format(fps))
return fps
5 changes: 2 additions & 3 deletions subaligner/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,19 +270,18 @@ def layers(self):

return self.__model.layers

def get_predictions(self, input_data, weights_filepath, verbose=1):
def get_predictions(self, input_data, weights_filepath):
"""Get a Numpy array of predictions.
Arguments:
input_data {numpy.ndarray} -- The input data, as a Numpy array.
weights_filepath {string} -- The weights file path.
verbose {int} -- The verbosity mode of logging, either 0 (succinct) or 1 (verbose).
Returns:
numpy.ndarray -- The Numpy array of predictions.
"""
self.__model.load_weights(weights_filepath)
return self.__model.predict(input_data, verbose=verbose)
return self.__model.predict_on_batch(input_data)

def fit_and_get_history(
self,
Expand Down
22 changes: 18 additions & 4 deletions subaligner/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
import multiprocessing as mp

from pysrt import SubRipTime
from sklearn.metrics import log_loss
from copy import deepcopy
from aeneas.executetask import ExecuteTask
Expand Down Expand Up @@ -74,6 +75,9 @@ def predict_single_pass(
except Exception:
raise
else:
frame_rate = MediaHelper.get_frame_rate(video_file_path)
self.__feature_embedder.step_sample = 1 / frame_rate
self.__on_frame_timecodes(subs)
return subs, audio_file_path, voice_probabilities
finally:
if os.path.exists(audio_file_path):
Expand Down Expand Up @@ -110,6 +114,9 @@ def predict_dual_pass(
except Exception:
raise
else:
frame_rate = MediaHelper.get_frame_rate(video_file_path)
self.__feature_embedder.step_sample = 1 / frame_rate
self.__on_frame_timecodes(new_subs)
return new_subs, subs, voice_probabilities
finally:
if os.path.exists(audio_file_path):
Expand Down Expand Up @@ -314,7 +321,7 @@ def __predict(
network = Network.get_from_model(model_path)
Predictor.__LOGGER.debug("Start predicting...")
pred_start = datetime.datetime.now()
voice_probabilities = network.get_predictions(train_data, weights_path, verbose=int(Logger.VERBOSE))
voice_probabilities = network.get_predictions(train_data, weights_path)
finally:
self.__lock.release()

Expand Down Expand Up @@ -357,9 +364,6 @@ def __predict(
os.remove(audio_file_path)
raise ValueError("Error: No subtitles passed in")

seconds_to_shift = Predictor.__normalise_seconds_to_shift(
seconds_to_shift, self.__feature_embedder.step_sample
)
if abs(seconds_to_shift) > Predictor.__MAX_SHIFT_IN_SECS:
if os.path.exists(audio_file_path):
os.remove(audio_file_path)
Expand Down Expand Up @@ -601,3 +605,13 @@ def __adjust_durations(self, subs, audio_file_path):
os.remove(task.text_file_path_absolute)
if task.sync_map_file_path_absolute is not None and os.path.exists(task.sync_map_file_path_absolute):
os.remove(task.sync_map_file_path_absolute)

def __on_frame_timecodes(self, subs):
for sub in subs:
millis_per_frame = self.__feature_embedder.step_sample * 1000
new_start_millis = round(int(str(sub.start).split(",")[1]) / millis_per_frame + 0.5) * millis_per_frame
new_start = str(sub.start).split(",")[0] + "," + str(int(new_start_millis)).zfill(3)
new_end_millis = round(int(str(sub.end).split(",")[1]) / millis_per_frame - 0.5) * millis_per_frame
new_end = str(sub.end).split(",")[0] + "," + str(int(new_end_millis)).zfill(3)
sub.start = SubRipTime.coerce(new_start)
sub.end = SubRipTime.coerce(new_end)
Loading

0 comments on commit e7156a3

Please sign in to comment.