From 3e99276a2739defcff77695dee2c7dd3ac8da4bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bjarke=20Viks=C3=B8e?= Date: Wed, 3 Jul 2024 12:43:53 +0200 Subject: [PATCH 1/2] Incorrect timetstamps Fixes #2271 - Adds consecutive timestamps after end of last segment as the new starting ts - Add these timestamp to output when "print-special" enabled - Fixes fflush usage in live reporting I was not able to test this with the special "token_timestamps" option. --- src/whisper.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/whisper.cpp b/src/whisper.cpp index dc06ee272ed..0610a84cac7 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -6240,11 +6240,15 @@ int whisper_full_with_state( } } text = ""; + t0 = t1; while (i < (int) tokens_cur.size() && tokens_cur[i].id > whisper_token_beg(ctx)) { + if (params.print_special) { + text += whisper_token_to_str(ctx, tokens_cur[i].id); + } + t0 = seek + 2 * (tokens_cur[i].tid - whisper_token_beg(ctx)); i++; } i--; - t0 = t1; i0 = i + 1; speaker_turn_next = false; } @@ -6261,8 +6265,8 @@ int whisper_full_with_state( printf("[%s --> %s] %s\n", to_timestamp(tt0).c_str(), to_timestamp(tt1).c_str(), text.c_str()); } else { printf("%s", text.c_str()); - fflush(stdout); } + fflush(stdout); } result_all.push_back({ tt0, tt1, text, {} , speaker_turn_next }); From 6c05cf7b0dd83cc90b9f4f86c80f4c9d22e3c5c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bjarke=20Viks=C3=B8e?= Date: Thu, 4 Jul 2024 08:37:28 +0200 Subject: [PATCH 2/2] Skip initial timestamp --- src/whisper.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/whisper.cpp b/src/whisper.cpp index 0610a84cac7..b28bc98279b 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -6241,14 +6241,13 @@ int whisper_full_with_state( } text = ""; t0 = t1; - while (i < (int) tokens_cur.size() && tokens_cur[i].id > whisper_token_beg(ctx)) { + while (i + 1 < (int) tokens_cur.size() && tokens_cur[i + 1].id > whisper_token_beg(ctx)) { + i++; if (params.print_special) { text += whisper_token_to_str(ctx, tokens_cur[i].id); } t0 = seek + 2 * (tokens_cur[i].tid - whisper_token_beg(ctx)); - i++; } - i--; i0 = i + 1; speaker_turn_next = false; }