-
Notifications
You must be signed in to change notification settings - Fork 0
/
encoder-cli.cpp
76 lines (56 loc) · 1.98 KB
/
encoder-cli.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#include "lib/common/common.h"
#include <iostream>
#include <vector>
#include <algorithm>
#include <cctype>
#include <sstream>
#include "lib/encoder.h"
int main(int argc, char** argv) {
setbuf(stdout, NULL);
if(argc < 3) {
std::cerr << "Usage: " << argv[0] << " <model_path> <input_wav_file> [optional_arg]" << std::endl;
return 1;
}
const char* model_path = argv[1];
const char* fname_inp = argv[2];
bool optional_arg = false;
if (argc > 3) {
std::string arg(argv[3]);
std::transform(arg.begin(), arg.end(), arg.begin(), ::tolower);
optional_arg = (arg == "true");
}
// Initialize whisper context from file
encoder_context* ctx = encoder_init_from_file(model_path);
if(ctx == nullptr) {
std::cerr << "Failed to initialize whisper context from file: " << model_path << std::endl;
return 1;
}
std::vector<float> pcmf32; // mono-channel F32 PCM
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
if (!::read_wav(fname_inp, pcmf32, pcmf32s, false)) {
fprintf(stderr, "error: failed to read WAV file '%s'\n", fname_inp);
}
encoder_full_params eparams = encoder_full_default_params();
// TODO: Use the context for something
int res = encoder_full_parallel(
ctx,
eparams,
pcmf32.data(),
pcmf32.size(),
1);
std::ostringstream json_output;
json_output << "{";
json_output << "\"return_code\": " << (res ? 0 : 1) << ", ";
json_output << "\"embedding\": [";
if (res == 0) {
for (size_t i = 0; i < ctx->state->encoder_embedding.size(); ++i) {
json_output << ctx->state->encoder_embedding[i];
if (i < ctx->state->encoder_embedding.size() - 1) {
json_output << ", ";
}
}
}
json_output << "]}";
std::cout << json_output.str() << std::endl;
return res;
}