From e349b962999302c5717890843e2211769e30257b Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Tue, 24 Sep 2024 16:16:02 -0400 Subject: [PATCH] Refactor build script for Linux and MacOS in CI workflow --- examples/CMakeLists.txt | 4 ++++ examples/audio_capture.cpp | 2 ++ examples/audio_capture.h | 1 - examples/realtime_transcription.cpp | 4 +++- scripts/build-windows.ps1 | 6 ++++- .../transcription/include/transcription.h | 3 +++ .../include/whisper-processing.h | 4 +++- .../transcription/src/transcription.cpp | 23 ++++++++++++++++--- .../transcription/src/whisper-processing.cpp | 8 +++---- 9 files changed, 43 insertions(+), 12 deletions(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 0100938..daa9260 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -3,3 +3,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/BuildSDL.cmake) add_executable(RealtimeTranscription realtime_transcription.cpp audio_capture.cpp) target_link_libraries(RealtimeTranscription PRIVATE SDL2 Core Transcription) + +# add target "examples" to the global target list +add_custom_target(examples) +add_dependencies(examples RealtimeTranscription) diff --git a/examples/audio_capture.cpp b/examples/audio_capture.cpp index 0d18470..dfe99ee 100644 --- a/examples/audio_capture.cpp +++ b/examples/audio_capture.cpp @@ -12,6 +12,8 @@ AudioCapture::~AudioCapture() { } bool AudioCapture::initialize(int device_index, int requested_sample_rate) { + SDL_InitSubSystem(SDL_INIT_AUDIO); + SDL_AudioSpec desired_spec, obtained_spec; SDL_zero(desired_spec); diff --git a/examples/audio_capture.h b/examples/audio_capture.h index fd67cd3..621edde 100644 --- a/examples/audio_capture.h +++ b/examples/audio_capture.h @@ -1,6 +1,5 @@ #pragma once -#define SDL_MAIN_HANDLED #include #include diff --git a/examples/realtime_transcription.cpp b/examples/realtime_transcription.cpp index 9cee776..7f6c96e 100644 --- a/examples/realtime_transcription.cpp +++ b/examples/realtime_transcription.cpp @@ -1,11 +1,13 @@ #include + +#define SDL_MAIN_HANDLED #include "audio_capture.h" #include int main() { - SDL_SetMainReady(); + // SDL_SetMainReady(); // Initialize the library locaal::Transcription tt; diff --git a/scripts/build-windows.ps1 b/scripts/build-windows.ps1 index 4984202..49fba7c 100644 --- a/scripts/build-windows.ps1 +++ b/scripts/build-windows.ps1 @@ -1,6 +1,7 @@ param( [switch]$Verbose, - [switch]$Clean + [switch]$Clean, + [switch]$Examples ) $verboseFlag = "" @@ -33,5 +34,8 @@ Invoke-Expression $configureCommand # Build step $buildCommand = "cmake --build $buildDir --config Release $verboseBuildFlag" +if ($Examples) { + $buildCommand += " --target examples" +} Write-Host "Executing build command: $buildCommand" Invoke-Expression $buildCommand diff --git a/src/modules/transcription/include/transcription.h b/src/modules/transcription/include/transcription.h index 4daa89a..e7e5632 100644 --- a/src/modules/transcription/include/transcription.h +++ b/src/modules/transcription/include/transcription.h @@ -4,6 +4,8 @@ #include #include +struct transcription_context; + namespace locaal { struct TranscriptionResult { @@ -41,6 +43,7 @@ class Transcription { std::function transcriptionCallback_; // Add any other necessary private members + transcription_context *gf; }; } // namespace locaal diff --git a/src/modules/transcription/include/whisper-processing.h b/src/modules/transcription/include/whisper-processing.h index ef645d7..0208f38 100644 --- a/src/modules/transcription/include/whisper-processing.h +++ b/src/modules/transcription/include/whisper-processing.h @@ -29,7 +29,9 @@ struct DetectionResultWithText { std::string language; }; -void whisper_loop(void *data); +struct transcription_context; + +void whisper_loop(struct transcription_context *gf); struct whisper_context *init_whisper_context(const std::string &model_path, struct transcription_context *gf); void run_inference_and_callbacks(transcription_context *gf, uint64_t start_offset_ms, diff --git a/src/modules/transcription/src/transcription.cpp b/src/modules/transcription/src/transcription.cpp index 92652ee..fc6022e 100644 --- a/src/modules/transcription/src/transcription.cpp +++ b/src/modules/transcription/src/transcription.cpp @@ -1,17 +1,34 @@ #include "transcription.h" #include "logger.h" +#include "transcription-context.h" +#include "whisper-utils.h" + #include +void set_text_callback(struct transcription_context *gf, const DetectionResultWithText &str) +{ + Logger::log(Logger::Level::INFO, "Transcription: %s", str.text.c_str()); +} + +void clear_current_caption(transcription_context *gf_){}; + +// Callback sent when the VAD finds an audio chunk. Sample rate = WHISPER_SAMPLE_RATE, channels = 1 +// The audio chunk is in 32-bit float format +void audio_chunk_callback(struct transcription_context *gf, const std::vector pcm32f_data, + int vad_state, const DetectionResultWithText &result){}; + namespace locaal { Transcription::Transcription() { // Constructor implementation + gf = new transcription_context(); } Transcription::~Transcription() { // Destructor implementation + delete gf; } void Transcription::setTranscriptionParams(const std::string &language) @@ -41,15 +58,15 @@ void Transcription::setTranscriptionCallback( void Transcription::startTranscription() { Logger::log(Logger::Level::INFO, "Starting transcription..."); - // Implement the logic to start the transcription process - // This might involve starting a new thread, initializing audio capture, etc. + // start the transcription thread + start_whisper_thread_with_path(this->gf, "en", "silero_vad_model_file"); } void Transcription::stopTranscription() { Logger::log(Logger::Level::INFO, "Stopping transcription..."); // Implement the logic to stop the transcription process - // This might involve stopping the transcription thread, cleaning up resources, etc. + shutdown_whisper_thread(this->gf); } void Transcription::processAudio(const std::vector &audioData) diff --git a/src/modules/transcription/src/whisper-processing.cpp b/src/modules/transcription/src/whisper-processing.cpp index 9db0e6d..e53b490 100644 --- a/src/modules/transcription/src/whisper-processing.cpp +++ b/src/modules/transcription/src/whisper-processing.cpp @@ -330,15 +330,13 @@ void run_inference_and_callbacks(transcription_context *gf, uint64_t start_offse } } -void whisper_loop(void *data) +void whisper_loop(transcription_context *gf) { - if (data == nullptr) { - Logger::log(Logger::Level::ERROR_LOG, "whisper_loop: data is null"); + if (gf == nullptr) { + Logger::log(Logger::Level::ERROR_LOG, "whisper_loop: context is null"); return; } - struct transcription_context *gf = static_cast(data); - Logger::log(gf->log_level, "Starting whisper thread"); vad_state current_vad_state = {false, now_ms(), 0, 0};