Skip to content

Commit

Permalink
Separate GPU-accelerated steps to GPU the workload image.
Browse files Browse the repository at this point in the history
  • Loading branch information
HanFa committed Feb 11, 2025
1 parent 79acf1d commit 8494611
Show file tree
Hide file tree
Showing 32 changed files with 521 additions and 578 deletions.
12 changes: 10 additions & 2 deletions .github/workflows/docker-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,17 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1

- name: Build
- name: Build EasyVideoTrans service
uses: docker/build-push-action@v2
with:
context: .
push: false
tags: hanfa/pytvzhen-web:${{github.event.pull_request.number}}
tags: hanfa/easyvideotrans:${{github.event.pull_request.number}}

- name: Build EasyVideoTrans workloads
uses: docker/build-push-action@v2
with:
context: .
file: Dockerfile-gpu-workload
push: false
tags: hanfa/easyvideotrans-workloads:${{github.event.pull_request.number}}
4 changes: 2 additions & 2 deletions .github/workflows/docker-release.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Pytvzhen-web Docker Image Release
name: EasyVideoTrans Service Docker Image Release

on:
workflow_run:
Expand Down Expand Up @@ -29,4 +29,4 @@ jobs:
with:
context: .
push: true
tags: hanfa/pytvzhen-web:latest
tags: hanfa/easyvideotrans:latest
32 changes: 32 additions & 0 deletions .github/workflows/docker-workload-release.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: EasyVideoTrans Workloads Docker Image Release

on:
workflow_run:
workflows: [ "Pytvzhen-web application test" ]
branches: [ "master" ]
types:
- completed

jobs:
build:
runs-on: self-hosted

steps:
- name: Check out code
uses: actions/checkout@v2

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1

- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build and push
uses: docker/build-push-action@v2
with:
context: .
push: true
tags: hanfa/easyvideotrans-workloads:latest
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,5 @@ output/
!celery_results/*

.DS_Store

.pytest_cache
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ COPY requirements.txt .

# Install dependencies
RUN pip install --upgrade pip
RUN pip install -r requirements.txt
RUN pip install --default-timeout=200 -r requirements.txt


FROM base AS final
Expand Down
24 changes: 24 additions & 0 deletions Dockerfile-gpu-workload
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
FROM python:3.9-slim

ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1

WORKDIR /app

RUN apt-get update && apt-get install -y \
ffmpeg \
git \
&& rm -rf /var/lib/apt/lists/*


COPY workloads/requirements.txt /app/

RUN pip install --no-cache-dir -r requirements.txt

COPY workloads /app/workloads/
COPY src /app/src/
COPY inference.py /app

EXPOSE 8188

CMD ["python", "inference.py"]
25 changes: 13 additions & 12 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@
import zipfile
import shutil
import uuid
from src.service.audio_processing.audio_remove import audio_remove
from src.service.audio_processing.transcribe_audio import transcribe_audio_en
from src.service.audio_processing.voice_connect import connect_voice
from src.service.translation import get_translator, srt_sentense_merge
from src.service.video_synthesis.voice_connect import connect_voice
from src.service.translation import get_translator
from src.service.tts import get_tts_client
from src.workload_client import EasyVideoTransWorkloadClient
from src.task_manager.celery_tasks.tasks import video_preview_task
from src.task_manager.celery_tasks.celery_utils import get_queue_length
from werkzeug.utils import secure_filename
Expand All @@ -19,16 +18,23 @@
from prometheus_flask_exporter import PrometheusMetrics

app = Flask(__name__, template_folder="./appendix/templates", static_folder="./appendix/static")
app.config.from_file("./configs/pytvzhen.json", load=json.load)
app.config.from_file("./configs/easyvideotrans.json", load=json.load)
metrics = PrometheusMetrics(app)
metrics.info('pytvzhen_web', 'Pytvzhen backend API', version='1.0.0')

PYTVZHEN_STAGE = 'PYTVZHEN_STAGE'
pytvzhen_api_request_counter = metrics.counter(
'pytvzhen_api_request_counter', 'Request count by request paths',
labels={'base_url': lambda: url_rule_to_base(request.url_rule), 'stage': lambda: pytvzhen_stage(),
'method': lambda: request.method, 'status': lambda r: r.status_code}
)

# Setup workloads client to submit any GPU workloads to EasyVideoTrans compute backend
gpu_workload = EasyVideoTransWorkloadClient(
audio_separation_endpoint=app.config['VOICE_BACKGROUND_SEPARATION_ENDPOINT'],
audio_transcribe_endpoint=app.config['AUDIO_TRANSCRIBE_ENDPOINT'],
)


def pytvzhen_stage():
return os.environ[PYTVZHEN_STAGE] if PYTVZHEN_STAGE in os.environ else 'default'
Expand Down Expand Up @@ -283,9 +289,7 @@ def remove_audio_bg(video_id):
f'not found at {output_path}, please extract it first')}), 404

try:
baseline_path = app.config['REMOVE_BACKGROUND_MUSIC_BASELINE_MODEL_PATH']
audio_remove(audio_path, audio_no_bg_path, audio_bg_fn_path, baseline_path,
app.config['REMOVE_BACKGROUND_MUSIC_TORCH_DEVICE'])
audio_bg_fn_path, audio_no_bg_fn = gpu_workload.separate_audio(audio_fn)
return jsonify({"message": log_info_return_str(
f"Remove remove background music for {audio_fn} as {audio_no_bg_fn} and {audio_bg_fn_path} successfully."),
"video_id": video_id}), 200
Expand Down Expand Up @@ -351,10 +355,7 @@ def transcribe(video_id):
f'not found at {audio_no_bg_path}, please extract it first')}), 404

try:
transcribe_audio_en(app.logger, path=audio_no_bg_path, modelName=transcribe_model, language="en",
srtFilePathAndName=en_srt_path)
srt_sentense_merge(app.logger, en_srt_path, en_srt_merged_path)

gpu_workload.transcribe_audio(audio_no_bg_path, [en_srt_path, en_srt_merged_path])
return jsonify({"message": log_info_return_str(
f"Transcribed SRT from {audio_no_bg_fn} as {en_srt_fn} and {en_srt_merged_fn} successfully."),
"video_id": video_id}), 200
Expand Down
6 changes: 6 additions & 0 deletions configs/easyvideotrans.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"OUTPUT_PATH": "./output",
"VIDEO_MAX_DURATION": 3610,
"VOICE_BACKGROUND_SEPARATION_ENDPOINT": "http://localhost:8199/audio-sep",
"AUDIO_TRANSCRIBE_ENDPOINT": "http://localhost:8199/audio-transcribe"
}
6 changes: 0 additions & 6 deletions configs/pytvzhen.json

This file was deleted.

195 changes: 195 additions & 0 deletions inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
import os
import time
from pathlib import Path
import numpy as np
import soundfile as sf
import librosa
import torch
from functools import wraps

from flask import Flask, request, jsonify
from prometheus_flask_exporter import PrometheusMetrics
from prometheus_client import Summary, Histogram, Gauge

from workloads.lib.separator import Separator
from workloads.lib import spec_utils, nets
from workloads.lib.audio_processing.transcribe_audio import transcribe_audio_en
from workloads.lib.srt import srt_sentense_merge

# Initialize the Flask app
app = Flask(__name__)

# Integrate Prometheus metrics
metrics = PrometheusMetrics(app)
metrics.info("app_info", "EasyVideoTrans GPU Workloads Processing API", version="1.0.0")

# Custom Prometheus metrics
INFERENCE_DURATION = Summary("inference_duration_seconds", "Time spent on inference")
TRANSCRIBE_DURATION = Summary("transcribe_duration_seconds", "Time spent on transcribe")
AUDIO_FILE_SIZE = Histogram("audio_file_size_bytes", "Size of input audio files",
buckets=[1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576,
2097152, 4194304, 8388608])
CURRENT_INFERENCE = Gauge("current_inference", "Number of ongoing inferences")

# Model setup from https://github.com/tsurumeso/vocal-remover/tree/develop
MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'workloads/pretrained_models')
DEFAULT_MODEL_PATH = os.path.join(MODEL_DIR, 'baseline.pth')

model = nets.CascadedNet(n_fft=2048, hop_length=1024, nout=32, nout_lstm=128)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load(DEFAULT_MODEL_PATH, map_location=device))
model.to(device)
separator = Separator(model, device, batchsize=4,
cropsize=256,
postprocess=False)

# Setup input / output configurations
INPUT_DIR = "workloads/static/outputs"
OUTPUT_DIR = "workloads/static/outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)


def load_spectrogram(file_path):
X, sample_rate = librosa.load(
file_path, sr=44100, mono=False, dtype=np.float32, res_type='kaiser_fast'
)

if X.ndim == 1:
# mono to stereo
X = np.asarray([X, X])

x_spec = spec_utils.wave_to_spectrogram(X, hop_length=1024, n_fft=2048)
return x_spec, sample_rate


@app.route("/")
def index():
"""
Health check endpoint.
"""
return jsonify({"message": "Speech Separation API is running."}), 200


def require_filename_points_to_existing_file(func):
@wraps(func)
def decorated_func(*args, **kwargs):

if not request.is_json:
return jsonify({"message": "Missing JSON in request"}), 400

data = request.get_json()
if not data or "file_name" not in data:
return jsonify({"error": "Invalid request. Please provide 'file_name' in the JSON payload."}), 400

# Get the file path from the payload
file_name = data["file_name"]
file_path = os.path.join(INPUT_DIR, file_name)

if not os.path.exists(file_path):
return jsonify({"error": f"File not found: {file_path}"}), 404

return func(file_path, *args, **kwargs)

return decorated_func


def require_output_filenames(func):
@wraps(func)
def decorated_func(file_path, *args, **kwargs):
data = request.get_json()

if "output_filenames" not in data:
return jsonify({"error": "Invalid request. Please provide 'output_filenames' in the JSON payload."}), 400

output_filenames = data["output_filenames"]
output_filepaths = [os.path.join(OUTPUT_DIR, name) for name in output_filenames]

return func(file_path, output_filepaths, *args, **kwargs)

return decorated_func


@app.route("/audio-sep", methods=["POST"])
@require_filename_points_to_existing_file
def audio_separation(file_path):
"""
Endpoint to perform audio separation.
Accepts an audio file and returns separated sources.
"""

file_stem_name = Path(file_path).stem

# Track the size of the input audio file
file_size = os.path.getsize(file_path)
AUDIO_FILE_SIZE.observe(file_size)

# Perform source separation
app.logger.info(f"Processing file: {file_path}")
start_time = time.time()
CURRENT_INFERENCE.inc() # Increment the gauge for ongoing inferences
try:
x_spec, sample_rate = load_spectrogram(file_path)
app.logger.info(f"Done loading sound file: {file_path}")

y_spec, v_spec = separator.separate_tta(x_spec)

background_wave_fn, voice_wave_fn = f"{file_stem_name}_bg.wav", f"{file_stem_name}_no_bg.wav"
background_wave_path, voice_wave_path = os.path.join(OUTPUT_DIR, background_wave_fn), os.path.join(
OUTPUT_DIR, voice_wave_fn)
wave = spec_utils.spectrogram_to_wave(y_spec)
sf.write(background_wave_path, wave.T, int(sample_rate))
app.logger.info(f"Done inversed stft for background, saved to: {background_wave_path}")

wave = spec_utils.spectrogram_to_wave(v_spec)
sf.write(voice_wave_path, wave.T, int(sample_rate))
app.logger.info(f"Done inversed stft for vocal, saved to: {voice_wave_path}")

duration = time.time() - start_time
INFERENCE_DURATION.observe(duration)
CURRENT_INFERENCE.dec() # Decrement the gauge

# Return the paths of the separated sources
response = {
"message": "Separation successful.",
"files": [background_wave_fn, voice_wave_fn],
"inference_duration_seconds": duration,
"input_audio_size_bytes": file_size,
}
return jsonify(response), 200
except Exception as e:
print(f"Error during separation: {e}")
CURRENT_INFERENCE.dec() # Decrement the gauge in case of failure
return jsonify({"error": "An error occurred during audio separation."}), 500


@app.route("/audio-transcribe", methods=["POST"])
@require_output_filenames
@require_filename_points_to_existing_file
def audio_transcribe(file_path, output_filepaths):
app.logger.info(f"Transcribing file: {file_path}, output paths: {output_filepaths}")

start_time = time.time()
CURRENT_INFERENCE.inc() # Increment the gauge for ongoing inferences

try:
en_srt_path, en_srt_merged_path = output_filepaths
transcribe_audio_en(app.logger, path=file_path, modelName="medium", language="en",
srtFilePathAndName=en_srt_path)
srt_sentense_merge(app.logger, en_srt_path, en_srt_merged_path)

duration = time.time() - start_time
TRANSCRIBE_DURATION.observe(duration)
CURRENT_INFERENCE.dec() # Decrement the gauge
response = {
"message": "Transcribe successful.",
"transcribe_duration_seconds": duration,
}
return jsonify(response), 200
except Exception as e:
print(f"Error during separation: {e}")
CURRENT_INFERENCE.dec() # Decrement the gauge in case of failure
return jsonify({"error": "An error occurred during audio transcribe."}), 500


if __name__ == '__main__':
app.run(host="0.0.0.0", port=8199)
Loading

0 comments on commit 8494611

Please sign in to comment.