Skip to content

Commit

Permalink
feat(audio): add Resample shard and AudioToBytes conversion
Browse files Browse the repository at this point in the history
- Implement Resample shard to resample audio data with configurable output sample rate
- Add AudioToBytes shard to convert audio buffers to byte arrays
- Update audio test to include resampling and writing resampled audio to file
  • Loading branch information
sinkingsugar committed Jan 20, 2025
1 parent c007725 commit f9625a1
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 1 deletion.
112 changes: 112 additions & 0 deletions shards/modules/audio/audio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1203,6 +1203,117 @@ struct WriteFile {
}
};

struct Resample {
ma_resampler _resampler;
bool _initialized{false};

static SHOptionalString help() { return SHCCSTR("This shard resamples audio data."); }

static SHTypesInfo inputTypes() { return CoreInfo::AudioType; }
static SHOptionalString inputHelp() {
return SHCCSTR("Accepts audio data as an Audio chunk, containing the sample rate, number of samples, "
"number of channels, and the audio samples.");
}
static SHTypesInfo outputTypes() { return CoreInfo::AudioType; }
static SHOptionalString outputHelp() { return SHCCSTR("Outputs the resampled audio data."); }

static const SHTable *properties() { return &experimental.payload.tableValue; }

PARAM_PARAMVAR(_outRate, "OutRate", "The output sample rate.", {CoreInfo::IntType, CoreInfo::IntVarType});

PARAM_IMPL(PARAM_IMPL_FOR(_outRate));

PARAM_REQUIRED_VARIABLES()
SHTypeInfo compose(SHInstanceData &data) {
PARAM_COMPOSE_REQUIRED_VARIABLES(data);
return outputTypes().elements[0];
}

void warmup(SHContext *context) {
PARAM_WARMUP(context);
_leftoverSamples.clear();
}

void cleanup(SHContext *context) {
if (_initialized) {
ma_resampler_uninit(&_resampler, NULL);
_initialized = false;
}

PARAM_CLEANUP(context);
}

std::vector<float> _buffer;
std::vector<float> _leftoverSamples;
std::vector<float> _combinedInput;
ma_uint32 _inSampleRate{0};
ma_uint32 _outSampleRate{0};
ma_uint32 _channels{0};

SHVar activate(SHContext *context, const SHVar &input) {
if (!_initialized) {
ma_resampler_config config =
ma_resampler_config_init(ma_format_f32, input.payload.audioValue.channels, input.payload.audioValue.sampleRate,
_outRate.get().payload.intValue, ma_resample_algorithm_linear);
ma_result res = ma_resampler_init(&config, NULL, &_resampler);
if (res != MA_SUCCESS) {
throw ActivationError("Failed to initialize resampler");
}
_initialized = true;
_inSampleRate = input.payload.audioValue.sampleRate;
_outSampleRate = _outRate.get().payload.intValue;
_channels = input.payload.audioValue.channels;
}

if (input.payload.audioValue.sampleRate != _inSampleRate) {
throw ActivationError("Input sample rate does not match initialized sample rate");
}

// Prepare combined input buffer with leftover samples and new input
_combinedInput.clear();
if (!_leftoverSamples.empty()) {
_combinedInput.insert(_combinedInput.end(), _leftoverSamples.begin(), _leftoverSamples.end());
}

// Add new input samples
_combinedInput.insert(_combinedInput.end(), input.payload.audioValue.samples,
input.payload.audioValue.samples + (input.payload.audioValue.nsamples * _channels));

ma_uint64 totalFramesIn = _combinedInput.size() / _channels;
ma_uint64 frameCountIn = totalFramesIn;
ma_uint64 frameCountOut = 0;

// Get expected output frame count
ma_result res = ma_resampler_get_expected_output_frame_count(&_resampler, frameCountIn, &frameCountOut);
if (res != MA_SUCCESS) {
throw ActivationError("Failed to get expected output frame count");
}

_buffer.resize(frameCountOut * _channels);
res = ma_resampler_process_pcm_frames(&_resampler, _combinedInput.data(), &frameCountIn, _buffer.data(), &frameCountOut);

if (res != MA_SUCCESS) {
SHLOG_ERROR("Failed to resample audio: {} {}", res, frameCountIn);
throw ActivationError("Failed to resample audio");
}

// Store unconsumed samples for next iteration
if (frameCountIn < totalFramesIn) {
size_t unconsumedSamples = (totalFramesIn - frameCountIn) * _channels;
_leftoverSamples.assign(_combinedInput.end() - unconsumedSamples, _combinedInput.end());
} else {
_leftoverSamples.clear();
}

SHVar output = input;
output.payload.audioValue.nsamples = frameCountOut;
output.payload.audioValue.samples = _buffer.data();
output.payload.audioValue.sampleRate = _outSampleRate;

return output;
}
};

struct Engine {
static inline shards::logging::Logger Logger = shards::logging::getOrCreate("audio");
static constexpr uint32_t EngineCC = 'snde';
Expand Down Expand Up @@ -1814,6 +1925,7 @@ SHARDS_REGISTER_FN(audio) {
REGISTER_SHARD("Audio.ReadFile", shards::Audio::ReadFile);
REGISTER_SHARD("Audio.ReadFileBytes", shards::Audio::ReadFileBytes);
REGISTER_SHARD("Audio.WriteFile", shards::Audio::WriteFile);
REGISTER_SHARD("Audio.Resample", shards::Audio::Resample);

REGISTER_SHARD("Audio.Engine", shards::Audio::Engine);

Expand Down
22 changes: 22 additions & 0 deletions shards/modules/core/casting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1182,6 +1182,28 @@ struct ImageToBytes {
}
};

struct AudioToBytes {
static SHTypesInfo inputTypes() { return CoreInfo::AudioType; }
static SHOptionalString inputHelp() { return SHCCSTR("Accepts an audio buffer as input."); }

static SHTypesInfo outputTypes() { return CoreInfo::BytesType; }
static SHOptionalString outputHelp() { return SHCCSTR("The input audio buffer represented as a byte array."); }

static SHOptionalString help() { return SHCCSTR("Converts an audio buffer into a byte array."); }

std::vector<uint8_t> _output;

size_t audioDeriveDataLength(const SHAudio &audio) {
return audio.nsamples * audio.channels * sizeof(float);
}

SHVar activate(SHContext *context, const SHVar &input) {
auto &audio = input.payload.audioValue;
uint32_t audioDataLength = audioDeriveDataLength(audio);
return Var(_output.data(), audioDataLength);
}
};

SHARDS_REGISTER_FN(casting) {
REGISTER_SHARD("ToInt", ToNumber<SHType::Int>);
REGISTER_SHARD("ToInt2", ToNumber<SHType::Int2>);
Expand Down
4 changes: 3 additions & 1 deletion shards/tests/audio.shs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
@mesh(main)

@wire(play-files {
Audio.ReadFile("./data/Ode_to_Joy.ogg" From: 1.0 To: 2.0) | Log
Audio.ReadFile("./data/Ode_to_Joy.ogg" From: 1.0 To: 4.0) | Log
Audio.Resample(OutRate: 16000) | Log
Audio.WriteFile("example-resampled.wav" SampleRate: 16000)
} Looped: true)

@schedule(main play-files)
Expand Down

0 comments on commit f9625a1

Please sign in to comment.