Separate GPU-accelerated steps to GPU the workload image.

sutro-planet · Feb 11, 2025 · 8494611 · 8494611
1 parent 79acf1d
commit 8494611
Show file tree

Hide file tree

Showing 32 changed files with 521 additions and 578 deletions.
diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml
@@ -15,9 +15,17 @@ jobs:
     - name: Set up Docker Buildx
       uses: docker/setup-buildx-action@v1
 
-    - name: Build
+    - name: Build EasyVideoTrans service
       uses: docker/build-push-action@v2
       with:
         context: .
         push: false
-        tags: hanfa/pytvzhen-web:${{github.event.pull_request.number}}
+        tags: hanfa/easyvideotrans:${{github.event.pull_request.number}}
+
+    - name: Build EasyVideoTrans workloads
+      uses: docker/build-push-action@v2
+      with:
+        context: .
+        file: Dockerfile-gpu-workload
+        push: false
+        tags: hanfa/easyvideotrans-workloads:${{github.event.pull_request.number}}
diff --git a/.github/workflows/docker-release.yaml b/.github/workflows/docker-release.yaml
@@ -1,4 +1,4 @@
-name: Pytvzhen-web Docker Image Release
+name: EasyVideoTrans Service Docker Image Release
 
 on:
   workflow_run:
@@ -29,4 +29,4 @@ jobs:
       with:
         context: .
         push: true
-        tags: hanfa/pytvzhen-web:latest
+        tags: hanfa/easyvideotrans:latest
diff --git a/.github/workflows/docker-workload-release.yaml b/.github/workflows/docker-workload-release.yaml
@@ -0,0 +1,32 @@
+name: EasyVideoTrans Workloads Docker Image Release
+
+on:
+  workflow_run:
+    workflows: [ "Pytvzhen-web application test" ]
+    branches: [ "master" ]
+    types:
+      - completed
+
+jobs:
+  build:
+    runs-on: self-hosted
+
+    steps:
+    - name: Check out code
+      uses: actions/checkout@v2
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v1
+
+    - name: Login to DockerHub
+      uses: docker/login-action@v1
+      with:
+        username: ${{ secrets.DOCKERHUB_USERNAME }}
+        password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+    - name: Build and push
+      uses: docker/build-push-action@v2
+      with:
+        context: .
+        push: true
+        tags: hanfa/easyvideotrans-workloads:latest
diff --git a/.gitignore b/.gitignore
@@ -27,3 +27,5 @@ output/
 !celery_results/*
 
 .DS_Store
+
+.pytest_cache
diff --git a/Dockerfile b/Dockerfile
@@ -15,7 +15,7 @@ COPY requirements.txt .
 
 # Install dependencies
 RUN pip install --upgrade pip
-RUN pip install -r requirements.txt
+RUN pip install --default-timeout=200 -r requirements.txt
 
 
 FROM base AS final

diff --git a/Dockerfile-gpu-workload b/Dockerfile-gpu-workload
@@ -0,0 +1,24 @@
+FROM python:3.9-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+
+
+COPY workloads/requirements.txt /app/
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY workloads /app/workloads/
+COPY src /app/src/
+COPY inference.py /app
+
+EXPOSE 8188
+
+CMD ["python", "inference.py"]
diff --git a/app.py b/app.py
@@ -4,11 +4,10 @@
 import zipfile
 import shutil
 import uuid
-from src.service.audio_processing.audio_remove import audio_remove
-from src.service.audio_processing.transcribe_audio import transcribe_audio_en
-from src.service.audio_processing.voice_connect import connect_voice
-from src.service.translation import get_translator, srt_sentense_merge
+from src.service.video_synthesis.voice_connect import connect_voice
+from src.service.translation import get_translator
 from src.service.tts import get_tts_client
+from src.workload_client import EasyVideoTransWorkloadClient
 from src.task_manager.celery_tasks.tasks import video_preview_task
 from src.task_manager.celery_tasks.celery_utils import get_queue_length
 from werkzeug.utils import secure_filename
@@ -19,16 +18,23 @@
 from prometheus_flask_exporter import PrometheusMetrics
 
 app = Flask(__name__, template_folder="./appendix/templates", static_folder="./appendix/static")
-app.config.from_file("./configs/pytvzhen.json", load=json.load)
+app.config.from_file("./configs/easyvideotrans.json", load=json.load)
 metrics = PrometheusMetrics(app)
 metrics.info('pytvzhen_web', 'Pytvzhen backend API', version='1.0.0')
+
 PYTVZHEN_STAGE = 'PYTVZHEN_STAGE'
 pytvzhen_api_request_counter = metrics.counter(
     'pytvzhen_api_request_counter', 'Request count by request paths',
     labels={'base_url': lambda: url_rule_to_base(request.url_rule), 'stage': lambda: pytvzhen_stage(),
             'method': lambda: request.method, 'status': lambda r: r.status_code}
 )
 
+# Setup workloads client to submit any GPU workloads to EasyVideoTrans compute backend
+gpu_workload = EasyVideoTransWorkloadClient(
+    audio_separation_endpoint=app.config['VOICE_BACKGROUND_SEPARATION_ENDPOINT'],
+    audio_transcribe_endpoint=app.config['AUDIO_TRANSCRIBE_ENDPOINT'],
+)
+
 
 def pytvzhen_stage():
     return os.environ[PYTVZHEN_STAGE] if PYTVZHEN_STAGE in os.environ else 'default'
@@ -283,9 +289,7 @@ def remove_audio_bg(video_id):
             f'not found at {output_path}, please extract it first')}), 404
 
     try:
-        baseline_path = app.config['REMOVE_BACKGROUND_MUSIC_BASELINE_MODEL_PATH']
-        audio_remove(audio_path, audio_no_bg_path, audio_bg_fn_path, baseline_path,
-                     app.config['REMOVE_BACKGROUND_MUSIC_TORCH_DEVICE'])
+        audio_bg_fn_path, audio_no_bg_fn = gpu_workload.separate_audio(audio_fn)
         return jsonify({"message": log_info_return_str(
             f"Remove remove background music for {audio_fn} as {audio_no_bg_fn} and {audio_bg_fn_path} successfully."),
             "video_id": video_id}), 200
@@ -351,10 +355,7 @@ def transcribe(video_id):
             f'not found at {audio_no_bg_path}, please extract it first')}), 404
 
     try:
-        transcribe_audio_en(app.logger, path=audio_no_bg_path, modelName=transcribe_model, language="en",
-                            srtFilePathAndName=en_srt_path)
-        srt_sentense_merge(app.logger, en_srt_path, en_srt_merged_path)
-
+        gpu_workload.transcribe_audio(audio_no_bg_path, [en_srt_path, en_srt_merged_path])
         return jsonify({"message": log_info_return_str(
             f"Transcribed SRT from {audio_no_bg_fn} as {en_srt_fn} and {en_srt_merged_fn} successfully."),
             "video_id": video_id}), 200

diff --git a/configs/easyvideotrans.json b/configs/easyvideotrans.json
@@ -0,0 +1,6 @@
+{
+  "OUTPUT_PATH": "./output",
+  "VIDEO_MAX_DURATION": 3610,
+  "VOICE_BACKGROUND_SEPARATION_ENDPOINT": "http://localhost:8199/audio-sep",
+  "AUDIO_TRANSCRIBE_ENDPOINT": "http://localhost:8199/audio-transcribe"
+}
diff --git a/configs/pytvzhen.json b/configs/pytvzhen.json
diff --git a/inference.py b/inference.py
@@ -0,0 +1,195 @@
+import os
+import time
+from pathlib import Path
+import numpy as np
+import soundfile as sf
+import librosa
+import torch
+from functools import wraps
+
+from flask import Flask, request, jsonify
+from prometheus_flask_exporter import PrometheusMetrics
+from prometheus_client import Summary, Histogram, Gauge
+
+from workloads.lib.separator import Separator
+from workloads.lib import spec_utils, nets
+from workloads.lib.audio_processing.transcribe_audio import transcribe_audio_en
+from workloads.lib.srt import srt_sentense_merge
+
+# Initialize the Flask app
+app = Flask(__name__)
+
+# Integrate Prometheus metrics
+metrics = PrometheusMetrics(app)
+metrics.info("app_info", "EasyVideoTrans GPU Workloads Processing API", version="1.0.0")
+
+# Custom Prometheus metrics
+INFERENCE_DURATION = Summary("inference_duration_seconds", "Time spent on inference")
+TRANSCRIBE_DURATION = Summary("transcribe_duration_seconds", "Time spent on transcribe")
+AUDIO_FILE_SIZE = Histogram("audio_file_size_bytes", "Size of input audio files",
+                            buckets=[1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576,
+                                     2097152, 4194304, 8388608])
+CURRENT_INFERENCE = Gauge("current_inference", "Number of ongoing inferences")
+
+# Model setup from https://github.com/tsurumeso/vocal-remover/tree/develop
+MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'workloads/pretrained_models')
+DEFAULT_MODEL_PATH = os.path.join(MODEL_DIR, 'baseline.pth')
+
+model = nets.CascadedNet(n_fft=2048, hop_length=1024, nout=32, nout_lstm=128)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.load_state_dict(torch.load(DEFAULT_MODEL_PATH, map_location=device))
+model.to(device)
+separator = Separator(model, device, batchsize=4,
+                      cropsize=256,
+                      postprocess=False)
+
+# Setup input / output configurations
+INPUT_DIR = "workloads/static/outputs"
+OUTPUT_DIR = "workloads/static/outputs"
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+
+def load_spectrogram(file_path):
+    X, sample_rate = librosa.load(
+        file_path, sr=44100, mono=False, dtype=np.float32, res_type='kaiser_fast'
+    )
+
+    if X.ndim == 1:
+        # mono to stereo
+        X = np.asarray([X, X])
+
+    x_spec = spec_utils.wave_to_spectrogram(X, hop_length=1024, n_fft=2048)
+    return x_spec, sample_rate
+
+
+@app.route("/")
+def index():
+    """
+    Health check endpoint.
+    """
+    return jsonify({"message": "Speech Separation API is running."}), 200
+
+
+def require_filename_points_to_existing_file(func):
+    @wraps(func)
+    def decorated_func(*args, **kwargs):
+
+        if not request.is_json:
+            return jsonify({"message": "Missing JSON in request"}), 400
+
+        data = request.get_json()
+        if not data or "file_name" not in data:
+            return jsonify({"error": "Invalid request. Please provide 'file_name' in the JSON payload."}), 400
+
+        # Get the file path from the payload
+        file_name = data["file_name"]
+        file_path = os.path.join(INPUT_DIR, file_name)
+
+        if not os.path.exists(file_path):
+            return jsonify({"error": f"File not found: {file_path}"}), 404
+
+        return func(file_path, *args, **kwargs)
+
+    return decorated_func
+
+
+def require_output_filenames(func):
+    @wraps(func)
+    def decorated_func(file_path, *args, **kwargs):
+        data = request.get_json()
+
+        if "output_filenames" not in data:
+            return jsonify({"error": "Invalid request. Please provide 'output_filenames' in the JSON payload."}), 400
+
+        output_filenames = data["output_filenames"]
+        output_filepaths = [os.path.join(OUTPUT_DIR, name) for name in output_filenames]
+
+        return func(file_path, output_filepaths, *args, **kwargs)
+
+    return decorated_func
+
+
+@app.route("/audio-sep", methods=["POST"])
+@require_filename_points_to_existing_file
+def audio_separation(file_path):
+    """
+    Endpoint to perform audio separation.
+    Accepts an audio file and returns separated sources.
+    """
+
+    file_stem_name = Path(file_path).stem
+
+    # Track the size of the input audio file
+    file_size = os.path.getsize(file_path)
+    AUDIO_FILE_SIZE.observe(file_size)
+
+    # Perform source separation
+    app.logger.info(f"Processing file: {file_path}")
+    start_time = time.time()
+    CURRENT_INFERENCE.inc()  # Increment the gauge for ongoing inferences
+    try:
+        x_spec, sample_rate = load_spectrogram(file_path)
+        app.logger.info(f"Done loading sound file: {file_path}")
+
+        y_spec, v_spec = separator.separate_tta(x_spec)
+
+        background_wave_fn, voice_wave_fn = f"{file_stem_name}_bg.wav", f"{file_stem_name}_no_bg.wav"
+        background_wave_path, voice_wave_path = os.path.join(OUTPUT_DIR, background_wave_fn), os.path.join(
+            OUTPUT_DIR, voice_wave_fn)
+        wave = spec_utils.spectrogram_to_wave(y_spec)
+        sf.write(background_wave_path, wave.T, int(sample_rate))
+        app.logger.info(f"Done inversed stft for background, saved to: {background_wave_path}")
+
+        wave = spec_utils.spectrogram_to_wave(v_spec)
+        sf.write(voice_wave_path, wave.T, int(sample_rate))
+        app.logger.info(f"Done inversed stft for vocal, saved to: {voice_wave_path}")
+
+        duration = time.time() - start_time
+        INFERENCE_DURATION.observe(duration)
+        CURRENT_INFERENCE.dec()  # Decrement the gauge
+
+        # Return the paths of the separated sources
+        response = {
+            "message": "Separation successful.",
+            "files": [background_wave_fn, voice_wave_fn],
+            "inference_duration_seconds": duration,
+            "input_audio_size_bytes": file_size,
+        }
+        return jsonify(response), 200
+    except Exception as e:
+        print(f"Error during separation: {e}")
+        CURRENT_INFERENCE.dec()  # Decrement the gauge in case of failure
+        return jsonify({"error": "An error occurred during audio separation."}), 500
+
+
+@app.route("/audio-transcribe", methods=["POST"])
+@require_output_filenames
+@require_filename_points_to_existing_file
+def audio_transcribe(file_path, output_filepaths):
+    app.logger.info(f"Transcribing file: {file_path}, output paths: {output_filepaths}")
+
+    start_time = time.time()
+    CURRENT_INFERENCE.inc()  # Increment the gauge for ongoing inferences
+
+    try:
+        en_srt_path, en_srt_merged_path = output_filepaths
+        transcribe_audio_en(app.logger, path=file_path, modelName="medium", language="en",
+                            srtFilePathAndName=en_srt_path)
+        srt_sentense_merge(app.logger, en_srt_path, en_srt_merged_path)
+
+        duration = time.time() - start_time
+        TRANSCRIBE_DURATION.observe(duration)
+        CURRENT_INFERENCE.dec()  # Decrement the gauge
+        response = {
+            "message": "Transcribe successful.",
+            "transcribe_duration_seconds": duration,
+        }
+        return jsonify(response), 200
+    except Exception as e:
+        print(f"Error during separation: {e}")
+        CURRENT_INFERENCE.dec()  # Decrement the gauge in case of failure
+        return jsonify({"error": "An error occurred during audio transcribe."}), 500
+
+
+if __name__ == '__main__':
+    app.run(host="0.0.0.0", port=8199)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -27,3 +27,5 @@ output/
		!celery_results/*

		.DS_Store

		.pytest_cache