Merge pull request #20 from baxtree/development

Prepare for release 0.0.4
baxtree · Feb 10, 2020 · e7156a3 · e7156a3
2 parents db5589b + 4b73e09
commit e7156a3
Show file tree

Hide file tree

Showing 16 changed files with 173 additions and 55 deletions.
diff --git a/Makefile b/Makefile
@@ -49,22 +49,25 @@ test-all: ## run tests on every Python version with tox
 	.venv/bin/tox
 
 pydoc: clean-doc ## generate pydoc HTML documentation based on docstrings
-	python -m pydoc -w subaligner; mv subaligner.html docs/index.html
-	python -m pydoc -w subaligner.embedder; mv subaligner.embedder.html docs
-	python -m pydoc -w subaligner.media_helper; mv subaligner.media_helper.html docs
-	python -m pydoc -w subaligner.network; mv subaligner.network.html docs
-	python -m pydoc -w subaligner.predictor; mv subaligner.predictor.html docs
-	python -m pydoc -w subaligner.singleton; mv subaligner.singleton.html docs
-	python -m pydoc -w subaligner.trainer; mv subaligner.trainer.html docs
-	python -m pydoc -w subaligner.utils; mv subaligner.utils.html docs
-	python -m pydoc -w subaligner.subtitle; mv subaligner.subtitle.html docs
-	python -m pydoc -w subaligner.logger; mv subaligner.logger.html docs
-	python -m pydoc -w subaligner.exception; mv subaligner.exception.html docs
-	python -m pydoc -w subaligner.models; mv subaligner.models.html docs
-	python -m pydoc -w subaligner.models.training; mv subaligner.models.training.html docs
-	python -m pydoc -w subaligner.models.training.model; mv subaligner.models.training.model.html docs
-	python -m pydoc -w subaligner.models.training.weights; mv subaligner.models.training.weights.html docs
-	python -m pydoc -w subaligner._version; mv subaligner._version.html docs
+	if [ ! -e ".venv" ]; then pip3 install virtualenv; virtualenv -p python3 .venv; fi
+	.venv/bin/pip install --upgrade pip setuptools wheel; \
+	cat requirements.txt | xargs -L 1 .venv/bin/pip install; \
+	.venv/bin/python -m pydoc -w subaligner; mv subaligner.html docs/index.html
+	.venv/bin/python -m pydoc -w subaligner.embedder; mv subaligner.embedder.html docs
+	.venv/bin/python -m pydoc -w subaligner.media_helper; mv subaligner.media_helper.html docs
+	.venv/bin/python -m pydoc -w subaligner.network; mv subaligner.network.html docs
+	.venv/bin/python -m pydoc -w subaligner.predictor; mv subaligner.predictor.html docs
+	.venv/bin/python -m pydoc -w subaligner.singleton; mv subaligner.singleton.html docs
+	.venv/bin/python -m pydoc -w subaligner.trainer; mv subaligner.trainer.html docs
+	.venv/bin/python -m pydoc -w subaligner.utils; mv subaligner.utils.html docs
+	.venv/bin/python -m pydoc -w subaligner.subtitle; mv subaligner.subtitle.html docs
+	.venv/bin/python -m pydoc -w subaligner.logger; mv subaligner.logger.html docs
+	.venv/bin/python -m pydoc -w subaligner.exception; mv subaligner.exception.html docs
+	.venv/bin/python -m pydoc -w subaligner.models; mv subaligner.models.html docs
+	.venv/bin/python -m pydoc -w subaligner.models.training; mv subaligner.models.training.html docs
+	.venv/bin/python -m pydoc -w subaligner.models.training.model; mv subaligner.models.training.model.html docs
+	.venv/bin/python -m pydoc -w subaligner.models.training.weights; mv subaligner.models.training.weights.html docs
+	.venv/bin/python -m pydoc -w subaligner._version; mv subaligner._version.html docs
 	$(BROWSER) docs/index.html
 
 coverage: ## check code coverage quickly with the default Python

diff --git a/docs/index.html b/docs/index.html
@@ -6,7 +6,7 @@
 <table width="100%" cellspacing=0 cellpadding=2 border=0 summary="heading">
 <tr bgcolor="#7799ee">
 <td valign=bottom>&nbsp;<br>
-<font color="#ffffff" face="helvetica, arial">&nbsp;<br><big><big><strong>subaligner</strong></big></big> (version 0.0.3)</font></td
+<font color="#ffffff" face="helvetica, arial">&nbsp;<br><big><big><strong>subaligner</strong></big></big> (version 0.0.4)</font></td
 ><td align=right valign=bottom
 ><font color="#ffffff" face="helvetica, arial"><a href=".">index</a><br><a href="file:/Users/baix01/MyWorkspace/subaligner-github/subaligner/__init__.py">/Users/baix01/MyWorkspace/subaligner-github/subaligner/__init__.py</a></font></td></tr></table>
     <p></p>

diff --git a/docs/subaligner._version.html b/docs/subaligner._version.html
@@ -6,7 +6,7 @@
 <table width="100%" cellspacing=0 cellpadding=2 border=0 summary="heading">
 <tr bgcolor="#7799ee">
 <td valign=bottom>&nbsp;<br>
-<font color="#ffffff" face="helvetica, arial">&nbsp;<br><big><big><strong><a href="subaligner.html"><font color="#ffffff">subaligner</font></a>._version</strong></big></big> (version 0.0.3)</font></td
+<font color="#ffffff" face="helvetica, arial">&nbsp;<br><big><big><strong><a href="subaligner.html"><font color="#ffffff">subaligner</font></a>._version</strong></big></big> (version 0.0.4)</font></td
 ><td align=right valign=bottom
 ><font color="#ffffff" face="helvetica, arial"><a href=".">index</a><br><a href="file:/Users/baix01/MyWorkspace/subaligner-github/subaligner/_version.py">/Users/baix01/MyWorkspace/subaligner-github/subaligner/_version.py</a></font></td></tr></table>
     <p><tt>The&nbsp;semver&nbsp;for&nbsp;the&nbsp;current&nbsp;release.</tt></p>

diff --git a/docs/subaligner.embedder.html b/docs/subaligner.embedder.html
@@ -57,7 +57,7 @@
 &nbsp;&nbsp;&nbsp;&nbsp;n_mfcc&nbsp;{int}&nbsp;--&nbsp;The&nbsp;number&nbsp;of&nbsp;MFCC&nbsp;components&nbsp;(default:&nbsp;{13}).<br>
 &nbsp;&nbsp;&nbsp;&nbsp;frequency&nbsp;{float}&nbsp;--&nbsp;The&nbsp;sample&nbsp;rate&nbsp;&nbsp;(default:&nbsp;{16000}).<br>
 &nbsp;&nbsp;&nbsp;&nbsp;hop_len&nbsp;{int}&nbsp;--&nbsp;The&nbsp;number&nbsp;of&nbsp;samples&nbsp;per&nbsp;frame&nbsp;(default:&nbsp;{512}).<br>
-&nbsp;&nbsp;&nbsp;&nbsp;step_sample&nbsp;{float}&nbsp;--&nbsp;The&nbsp;space&nbsp;(in&nbsp;seconds)&nbsp;between&nbsp;the&nbsp;begining&nbsp;of&nbsp;each&nbsp;sample&nbsp;(default:&nbsp;1s&nbsp;/&nbsp;25&nbsp;FPS&nbsp;=&nbsp;0.04s).<br>
+&nbsp;&nbsp;&nbsp;&nbsp;step_sample&nbsp;{float}&nbsp;--&nbsp;The&nbsp;space&nbsp;(in&nbsp;seconds)&nbsp;between&nbsp;the&nbsp;beginning&nbsp;of&nbsp;each&nbsp;sample&nbsp;(default:&nbsp;1s&nbsp;/&nbsp;25&nbsp;FPS&nbsp;=&nbsp;0.04s).<br>
 &nbsp;&nbsp;&nbsp;&nbsp;len_sample&nbsp;{float}&nbsp;--&nbsp;The&nbsp;length&nbsp;in&nbsp;seconds&nbsp;for&nbsp;the&nbsp;input&nbsp;samples&nbsp;(default:&nbsp;{0.075}).</tt></dd></dl>
 
 <dl><dt><a name="FeatureEmbedder-extract_data_and_label_from_audio"><strong>extract_data_and_label_from_audio</strong></a>(self, audio_file_path, subtitle_file_path, subtitles=None, ignore_sound_effects=False)</dt><dd><tt>Generate&nbsp;a&nbsp;train&nbsp;dataset&nbsp;from&nbsp;an&nbsp;audio&nbsp;file&nbsp;and&nbsp;its&nbsp;subtitles<br>

diff --git a/docs/subaligner.media_helper.html b/docs/subaligner.media_helper.html
@@ -47,8 +47,9 @@
 <td width="100%">Static methods defined here:<br>
 <dl><dt><a name="MediaHelper-extract_audio"><strong>extract_audio</strong></a>(video_file_path, decompress=False, freq=16000)</dt><dd><tt>Extract&nbsp;audio&nbsp;track&nbsp;from&nbsp;the&nbsp;video&nbsp;file&nbsp;and&nbsp;save&nbsp;it&nbsp;to&nbsp;a&nbsp;WAV&nbsp;file.<br>
 &nbsp;<br>
-Keyword&nbsp;Arguments:<br>
+Arguments:<br>
 &nbsp;&nbsp;&nbsp;&nbsp;video_file_path&nbsp;{string}&nbsp;--&nbsp;The&nbsp;input&nbsp;video&nbsp;file&nbsp;path.<br>
+Keyword&nbsp;Arguments:<br>
 &nbsp;&nbsp;&nbsp;&nbsp;decompress&nbsp;{bool}&nbsp;--&nbsp;Extract&nbsp;WAV&nbsp;if&nbsp;True&nbsp;otherwise&nbsp;extract&nbsp;AAC&nbsp;(default:&nbsp;{False}).<br>
 &nbsp;&nbsp;&nbsp;&nbsp;freq&nbsp;{int}&nbsp;--&nbsp;The&nbsp;audio&nbsp;sample&nbsp;frequency&nbsp;(default:&nbsp;{16000}).<br>
 Returns:<br>
@@ -83,6 +84,13 @@
 Returns:<br>
 &nbsp;&nbsp;&nbsp;&nbsp;float&nbsp;--&nbsp;The&nbsp;duration&nbsp;in&nbsp;seconds.</tt></dd></dl>
 
+<dl><dt><a name="MediaHelper-get_frame_rate"><strong>get_frame_rate</strong></a>(video_file_path)</dt><dd><tt>Extract&nbsp;audio&nbsp;track&nbsp;from&nbsp;the&nbsp;video&nbsp;file&nbsp;and&nbsp;save&nbsp;it&nbsp;to&nbsp;a&nbsp;WAV&nbsp;file.<br>
+&nbsp;<br>
+Arguments:<br>
+&nbsp;&nbsp;&nbsp;&nbsp;video_file_path&nbsp;{string}&nbsp;--&nbsp;The&nbsp;input&nbsp;video&nbsp;file&nbsp;path.<br>
+Returns:<br>
+&nbsp;&nbsp;&nbsp;&nbsp;float&nbsp;--&nbsp;The&nbsp;frame&nbsp;rate</tt></dd></dl>
+
 <hr>
 Data descriptors defined here:<br>
 <dl><dt><strong>__dict__</strong></dt>

diff --git a/docs/subaligner.network.html b/docs/subaligner.network.html
@@ -87,12 +87,11 @@
 Returns:<br>
 &nbsp;&nbsp;&nbsp;&nbsp;tuple&nbsp;--&nbsp;A&nbsp;tuple&nbsp;contains&nbsp;validation&nbsp;losses&nbsp;and&nbsp;validation&nbsp;accuracies.</tt></dd></dl>
 
-<dl><dt><a name="Network-get_predictions"><strong>get_predictions</strong></a>(self, input_data, weights_filepath, verbose=1)</dt><dd><tt>Get&nbsp;a&nbsp;Numpy&nbsp;array&nbsp;of&nbsp;predictions.<br>
+<dl><dt><a name="Network-get_predictions"><strong>get_predictions</strong></a>(self, input_data, weights_filepath)</dt><dd><tt>Get&nbsp;a&nbsp;Numpy&nbsp;array&nbsp;of&nbsp;predictions.<br>
 &nbsp;<br>
 Arguments:<br>
 &nbsp;&nbsp;&nbsp;&nbsp;input_data&nbsp;{numpy.ndarray}&nbsp;--&nbsp;The&nbsp;input&nbsp;data,&nbsp;as&nbsp;a&nbsp;Numpy&nbsp;array.<br>
 &nbsp;&nbsp;&nbsp;&nbsp;weights_filepath&nbsp;{string}&nbsp;--&nbsp;The&nbsp;weights&nbsp;file&nbsp;path.<br>
-&nbsp;&nbsp;&nbsp;&nbsp;verbose&nbsp;{int}&nbsp;--&nbsp;The&nbsp;verbosity&nbsp;mode&nbsp;of&nbsp;logging,&nbsp;either&nbsp;0&nbsp;(succinct)&nbsp;or&nbsp;1&nbsp;(verbose).<br>
 &nbsp;<br>
 Returns:<br>
 &nbsp;&nbsp;&nbsp;&nbsp;numpy.ndarray&nbsp;--&nbsp;The&nbsp;Numpy&nbsp;array&nbsp;of&nbsp;predictions.</tt></dd></dl>

diff --git a/requirements.txt b/requirements.txt
@@ -23,16 +23,15 @@ google-auth==1.8.1
 google-auth-oauthlib==0.4.1
 google-pasta==0.1.8
 graphviz==0.8.3
-grpcio==1.11.0
 h5py==2.8.0
 HeapDict==1.0.0
 html5lib==0.9999999
 idna==2.8
 importlib-metadata==1.2.0
 isort==4.3.4
 joblib==0.11
-Keras-Applications==1.0.7
-Keras-Preprocessing==1.0.9
+Keras-Applications>=1.0.8
+Keras-Preprocessing>=1.0.9
 kiwisolver==1.0.1
 lazy-object-proxy==1.3.1
 librosa==0.6.0
@@ -48,7 +47,6 @@ numpy>=1.14.1,<1.18.0
 oauthlib==3.1.0
 pbr==4.0.2
 pluggy==0.13.1
-protobuf==3.6.1
 psutil==5.4.6
 py==1.8.0
 pyasn1==0.4.8
@@ -70,12 +68,12 @@ requests-oauthlib==1.3.0
 resampy==0.2.0
 rsa==4.0
 scikit-learn==0.19.1
-scipy==1.0.1
-six==1.11.0
+scipy>=1.0.1
+six>=1.11.0
 snakeviz==1.0.0
 soupsieve==1.9
 tblib==1.3.2
-tensorflow>=1.12,<2.0
+tensorflow>=1.12,<2.2
 termcolor==1.1.0
 toml==0.10.0
 toolz==0.9.0

diff --git a/subaligner/_version.py b/subaligner/_version.py
@@ -1,2 +1,2 @@
 """The semver for the current release."""
-__version__ = "0.0.3"
+__version__ = "0.0.4"
diff --git a/subaligner/embedder.py b/subaligner/embedder.py
@@ -28,13 +28,10 @@ def __init__(
             n_mfcc {int} -- The number of MFCC components (default: {13}).
             frequency {float} -- The sample rate  (default: {16000}).
             hop_len {int} -- The number of samples per frame (default: {512}).
-            step_sample {float} -- The space (in seconds) between the begining of each sample (default: 1s / 25 FPS = 0.04s).
+            step_sample {float} -- The space (in seconds) between the beginning of each sample (default: 1s / 25 FPS = 0.04s).
             len_sample {float} -- The length in seconds for the input samples (default: {0.075}).
         """
 
-        self.__mfcc_extraction_lock = (
-            None
-        )  # TODO: This lock is not in use any more
         self.__n_mfcc = n_mfcc  # number of MFCC components
         self.__frequency = frequency  # sample rate
         self.__hop_len = hop_len  # number of samples per frame
@@ -95,6 +92,16 @@ def step_sample(self):
 
         return self.__step_sample
 
+    @ step_sample.setter
+    def step_sample(self, step_sample):
+        """Configure the step sample
+
+        Arguments:
+            step_sample {float} -- the value of the step sample (1 / frame_rate)
+        """
+
+        self.__step_sample = step_sample
+
     @property
     def len_sample(self):
         """Get the length in seconds for the input samples.
@@ -284,10 +291,6 @@ def extract_data_and_label_from_audio(
                 "{} sound effects removed".format(original_size - len(subs))
             )
 
-        if self.__mfcc_extraction_lock is not None:
-            # Deprecated
-            # self.__mfcc_extraction_lock.acquire()
-            pass
         t = datetime.now()
 
         # Load audio file
@@ -312,11 +315,6 @@ def extract_data_and_label_from_audio(
             n_mfcc=self.__n_mfcc,
         )
 
-        if self.__mfcc_extraction_lock is not None:
-            # Deprecated
-            # self.__mfcc_extraction_lock.release()
-            pass
-
         # Group multiple MFCCs of 32 ms into a larger range for LSTM
         # and each stride will have an overlay with the previous one
         samples = []

diff --git a/subaligner/media_helper.py b/subaligner/media_helper.py
@@ -26,8 +26,9 @@ class MediaHelper(object):
     def extract_audio(video_file_path, decompress=False, freq=16000):
         """Extract audio track from the video file and save it to a WAV file.
 
-        Keyword Arguments:
+        Arguments:
             video_file_path {string} -- The input video file path.
+        Keyword Arguments:
             decompress {bool} -- Extract WAV if True otherwise extract AAC (default: {False}).
             freq {int} -- The audio sample frequency (default: {16000}).
         Returns:
@@ -244,3 +245,57 @@ def get_audio_segment_starts_and_ends(subs):
                 new_subs.append(SubRipFile(combined))
                 del combined[:]
         return segment_starts, segment_ends, new_subs
+
+    @staticmethod
+    def get_frame_rate(video_file_path):
+        """Extract audio track from the video file and save it to a WAV file.
+
+        Arguments:
+            video_file_path {string} -- The input video file path.
+        Returns:
+            float -- The frame rate
+        """
+
+        with subprocess.Popen(
+                "ffmpeg -i {} -f null /dev/null".format(video_file_path).split(),
+                shell=False,
+                stderr=subprocess.PIPE,
+        ) as proc:
+            with subprocess.Popen(
+                    ['sed', '-n', "s/" + r".*, \(.*\) fp.*" + "/\\1/p"],
+                    shell=False,
+                    stdin=proc.stderr,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+            ) as process:
+                try:
+                    std_out, std_err = process.communicate(timeout=MediaHelper.__CMD_TIME_OUT)
+                    if process.returncode != 0:
+                        raise TerminalException(
+                            "Cannot extract the frame rate from video: {}".format(video_file_path)
+                        )
+                    fps = float(std_out.decode("utf-8").split("\n")[0])
+                except subprocess.TimeoutExpired as te:
+                    proc.kill()
+                    proc.wait()
+                    process.kill()
+                    process.wait()
+                    raise TerminalException(
+                        "Timeout on extracting the frame rate from video: {}".format(video_file_path)
+                    ) from te
+                except Exception as e:
+                    proc.kill()
+                    proc.wait()
+                    process.kill()
+                    process.wait()
+                    if isinstance(e, TerminalException):
+                        raise e
+                    else:
+                        raise TerminalException(
+                            "Cannot extract the frame rate from video: {}".format(video_file_path)
+                        ) from e
+                finally:
+                    os.system("stty sane")
+
+        MediaHelper.__LOGGER.info("Extracted frame rate:{} fps".format(fps))
+        return fps
diff --git a/subaligner/network.py b/subaligner/network.py
@@ -270,19 +270,18 @@ def layers(self):
 
         return self.__model.layers
 
-    def get_predictions(self, input_data, weights_filepath, verbose=1):
+    def get_predictions(self, input_data, weights_filepath):
         """Get a Numpy array of predictions.
 
         Arguments:
             input_data {numpy.ndarray} -- The input data, as a Numpy array.
             weights_filepath {string} -- The weights file path.
-            verbose {int} -- The verbosity mode of logging, either 0 (succinct) or 1 (verbose).
 
         Returns:
             numpy.ndarray -- The Numpy array of predictions.
         """
         self.__model.load_weights(weights_filepath)
-        return self.__model.predict(input_data, verbose=verbose)
+        return self.__model.predict_on_batch(input_data)
 
     def fit_and_get_history(
         self,

diff --git a/subaligner/predictor.py b/subaligner/predictor.py
@@ -6,6 +6,7 @@
 import numpy as np
 import multiprocessing as mp
 
+from pysrt import SubRipTime
 from sklearn.metrics import log_loss
 from copy import deepcopy
 from aeneas.executetask import ExecuteTask
@@ -74,6 +75,9 @@ def predict_single_pass(
         except Exception:
             raise
         else:
+            frame_rate = MediaHelper.get_frame_rate(video_file_path)
+            self.__feature_embedder.step_sample = 1 / frame_rate
+            self.__on_frame_timecodes(subs)
             return subs, audio_file_path, voice_probabilities
         finally:
             if os.path.exists(audio_file_path):
@@ -110,6 +114,9 @@ def predict_dual_pass(
         except Exception:
             raise
         else:
+            frame_rate = MediaHelper.get_frame_rate(video_file_path)
+            self.__feature_embedder.step_sample = 1 / frame_rate
+            self.__on_frame_timecodes(new_subs)
             return new_subs, subs, voice_probabilities
         finally:
             if os.path.exists(audio_file_path):
@@ -314,7 +321,7 @@ def __predict(
             network = Network.get_from_model(model_path)
             Predictor.__LOGGER.debug("Start predicting...")
             pred_start = datetime.datetime.now()
-            voice_probabilities = network.get_predictions(train_data, weights_path, verbose=int(Logger.VERBOSE))
+            voice_probabilities = network.get_predictions(train_data, weights_path)
         finally:
             self.__lock.release()
 
@@ -357,9 +364,6 @@ def __predict(
                 os.remove(audio_file_path)
             raise ValueError("Error: No subtitles passed in")
 
-        seconds_to_shift = Predictor.__normalise_seconds_to_shift(
-            seconds_to_shift, self.__feature_embedder.step_sample
-        )
         if abs(seconds_to_shift) > Predictor.__MAX_SHIFT_IN_SECS:
             if os.path.exists(audio_file_path):
                 os.remove(audio_file_path)
@@ -601,3 +605,13 @@ def __adjust_durations(self, subs, audio_file_path):
                 os.remove(task.text_file_path_absolute)
             if task.sync_map_file_path_absolute is not None and os.path.exists(task.sync_map_file_path_absolute):
                 os.remove(task.sync_map_file_path_absolute)
+
+    def __on_frame_timecodes(self, subs):
+        for sub in subs:
+            millis_per_frame = self.__feature_embedder.step_sample * 1000
+            new_start_millis = round(int(str(sub.start).split(",")[1]) / millis_per_frame + 0.5) * millis_per_frame
+            new_start = str(sub.start).split(",")[0] + "," + str(int(new_start_millis)).zfill(3)
+            new_end_millis = round(int(str(sub.end).split(",")[1]) / millis_per_frame - 0.5) * millis_per_frame
+            new_end = str(sub.end).split(",")[0] + "," + str(int(new_end_millis)).zfill(3)
+            sub.start = SubRipTime.coerce(new_start)
+            sub.end = SubRipTime.coerce(new_end)