Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfixes/Improvements for media player #37

Merged
merged 19 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 20 additions & 15 deletions esphome/components/nabu/audio_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,21 +110,26 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
}
} else {
// Try to decode more data
size_t bytes_available = this->input_ring_buffer_->available();
size_t bytes_to_read = std::min(bytes_available, this->internal_buffer_size_ - this->input_buffer_length_);

if ((this->potentially_failed_count_ > 0) && (bytes_to_read == 0)) {
// We didn't have enough data last time, and we have no new data, so just return
return AudioDecoderState::DECODING;
}

// Shift unread data in input buffer to start
if ((this->input_buffer_length_ > 0) && (this->input_buffer_length_ < this->internal_buffer_size_)) {
if (this->input_buffer_length_ > 0) {
memmove(this->input_buffer_, this->input_buffer_current_, this->input_buffer_length_);
}
this->input_buffer_current_ = this->input_buffer_;

// read in new ring buffer data to fill the remaining input buffer
size_t bytes_available = this->input_ring_buffer_->available();
size_t bytes_to_read = std::min(bytes_available, this->internal_buffer_size_ - this->input_buffer_length_);
size_t bytes_read = 0;

if (bytes_to_read > 0) {
uint8_t *new_mp3_data = this->input_buffer_ + this->input_buffer_length_;
bytes_read = this->input_ring_buffer_->read((void *) new_mp3_data, bytes_to_read);
uint8_t *new_audio_data = this->input_buffer_ + this->input_buffer_length_;
bytes_read = this->input_ring_buffer_->read((void *) new_audio_data, bytes_to_read);

this->input_buffer_length_ += bytes_read;
}
Expand Down Expand Up @@ -153,6 +158,8 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
++this->potentially_failed_count_;
} else if (state == FileDecoderState::END_OF_FILE) {
this->end_of_file_ = true;
} else if (state == FileDecoderState::FAILED) {
return AudioDecoderState::FAILED;
} else {
this->potentially_failed_count_ = 0;
}
Expand Down Expand Up @@ -191,9 +198,7 @@ FileDecoderState AudioDecoder::decode_wav_() {

printf("sample channels: %d\n", this->channels_.value());
printf("sample rate: %" PRId32 "\n", this->sample_rate_.value());
// printf("number of samples: %d\n",
// this->wav_decoder_->chunk_bytes_left() / (this->channels_.value() * (this->bits_per_sample.value()
// / 8)));
printf("bits per sample: %d\n", this->sample_depth_.value());
this->wav_bytes_left_ = this->wav_decoder_->chunk_bytes_left();
header_finished = true;
} else if (result == wav_decoder::WAV_DECODER_SUCCESS_NEXT) {
Expand Down Expand Up @@ -226,11 +231,10 @@ FileDecoderState AudioDecoder::decode_wav_() {
this->wav_bytes_left_ -= bytes_to_write;
}

return FileDecoderState::MORE_TO_PROCESS;
return FileDecoderState::IDLE;
}

return FileDecoderState::END_OF_FILE;
// return DecoderState::FINISHED;
}

FileDecoderState AudioDecoder::decode_mp3_() {
Expand Down Expand Up @@ -280,14 +284,14 @@ FileDecoderState AudioDecoder::decode_flac_() {
// Header hasn't been read
auto result = this->flac_decoder_->read_header(this->input_buffer_length_);

if (result == flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
return FileDecoderState::POTENTIALLY_FAILED;
}

size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
this->input_buffer_current_ += bytes_consumed;
this->input_buffer_length_ = this->flac_decoder_->get_bytes_left();

if (result == flac::FLAC_DECODER_HEADER_OUT_OF_DATA) {
return FileDecoderState::POTENTIALLY_FAILED;
}

if (result != flac::FLAC_DECODER_SUCCESS) {
printf("failed to read flac header. Error: %d\n", result);
return FileDecoderState::FAILED;
Expand All @@ -311,10 +315,11 @@ FileDecoderState AudioDecoder::decode_flac_() {
this->flac_decoder_->decode_frame(this->input_buffer_length_, (int16_t *) this->output_buffer_, &output_samples);

if (result == flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
// not an issue, just needs more data!
// Not an issue, just needs more data that we'll get next time.
return FileDecoderState::POTENTIALLY_FAILED;
} else if (result > flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
// Serious error, can't recover
printf("FLAC Decoder Error %d\n", result);
return FileDecoderState::FAILED;
}

Expand Down
56 changes: 46 additions & 10 deletions esphome/components/nabu/audio_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace nabu {

static const size_t QUEUE_COUNT = 10;

static const size_t HTTP_BUFFER_SIZE = 32 * 1024;
static const size_t HTTP_BUFFER_SIZE = 64 * 1024;
static const size_t BUFFER_SIZE_SAMPLES = 32768;
static const size_t BUFFER_SIZE_BYTES = BUFFER_SIZE_SAMPLES * sizeof(int16_t);

Expand Down Expand Up @@ -50,21 +50,23 @@ AudioPipeline::AudioPipeline(AudioMixer *mixer, AudioPipelineType pipeline_type)
this->event_group_ = xEventGroupCreate();
}

void AudioPipeline::start(const std::string &uri, const std::string &task_name, UBaseType_t priority) {
this->common_start_(task_name, priority);
void AudioPipeline::start(const std::string &uri, uint32_t target_sample_rate, const std::string &task_name,
UBaseType_t priority) {
this->common_start_(target_sample_rate, task_name, priority);

this->current_uri_ = uri;
xEventGroupSetBits(this->event_group_, READER_COMMAND_INIT_HTTP);
}

void AudioPipeline::start(media_player::MediaFile *media_file, const std::string &task_name, UBaseType_t priority) {
this->common_start_(task_name, priority);
void AudioPipeline::start(media_player::MediaFile *media_file, uint32_t target_sample_rate,
const std::string &task_name, UBaseType_t priority) {
this->common_start_(target_sample_rate, task_name, priority);

this->current_media_file_ = media_file;
xEventGroupSetBits(this->event_group_, READER_COMMAND_INIT_FILE);
}

void AudioPipeline::common_start_(const std::string &task_name, UBaseType_t priority) {
void AudioPipeline::common_start_(uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority) {
if (this->read_task_handle_ == nullptr) {
this->read_task_handle_ =
xTaskCreateStatic(AudioPipeline::read_task_, (task_name + "_read").c_str(), 8192, (void *) this, priority,
Expand All @@ -82,13 +84,33 @@ void AudioPipeline::common_start_(const std::string &task_name, UBaseType_t prio
}

this->stop();

this->target_sample_rate_ = target_sample_rate;
}

AudioPipelineState AudioPipeline::get_state() {
EventBits_t event_bits = xEventGroupGetBits(this->event_group_);
if (!this->read_task_handle_ && !this->decode_task_handle_ && !this->resample_task_handle_) {
return AudioPipelineState::STOPPED;
} else if (event_bits & (READER_MESSAGE_FINISHED | DECODER_MESSAGE_FINISHED | RESAMPLER_MESSAGE_FINISHED)) {
}

if ((event_bits & READER_MESSAGE_ERROR)) {
xEventGroupClearBits(this->event_group_, READER_MESSAGE_ERROR);
return AudioPipelineState::ERROR_READING;
}

if ((event_bits & DECODER_MESSAGE_ERROR)) {
xEventGroupClearBits(this->event_group_, DECODER_MESSAGE_ERROR);
return AudioPipelineState::ERROR_DECODING;
}

if ((event_bits & RESAMPLER_MESSAGE_ERROR)) {
xEventGroupClearBits(this->event_group_, RESAMPLER_MESSAGE_ERROR);
return AudioPipelineState::ERROR_RESAMPLING;
}

if ((event_bits & READER_MESSAGE_FINISHED) && (event_bits & DECODER_MESSAGE_FINISHED) &&
(event_bits & RESAMPLER_MESSAGE_FINISHED)) {
return AudioPipelineState::STOPPED;
}

Expand All @@ -105,6 +127,15 @@ void AudioPipeline::stop() {
true, // Wait for all the bits,
pdMS_TO_TICKS(200)); // Block temporarily before deleting each task

// Clear the ring buffer in the mixer; avoids playing incorrect audio when starting a new file while paused
CommandEvent command_event;
if (this->pipeline_type_ == AudioPipelineType::MEDIA) {
command_event.command = CommandEventType::CLEAR_MEDIA;
} else {
command_event.command = CommandEventType::CLEAR_ANNOUNCEMENT;
}
this->mixer_->send_command(&command_event);

xEventGroupClearBits(this->event_group_, ALL_BITS);
this->reset_ring_buffers();
}
Expand Down Expand Up @@ -187,8 +218,9 @@ void AudioPipeline::decode_task_(void *params) {
xEventGroupClearBits(this_pipeline->event_group_, EventGroupBits::DECODER_MESSAGE_FINISHED);

{
AudioDecoder decoder = AudioDecoder(this_pipeline->raw_file_ring_buffer_.get(),
this_pipeline->decoded_ring_buffer_.get(), BUFFER_SIZE_BYTES);
AudioDecoder decoder =
AudioDecoder(this_pipeline->raw_file_ring_buffer_.get(), this_pipeline->decoded_ring_buffer_.get(),
HTTP_BUFFER_SIZE); // BUFFER_SIZE_BYTES);
decoder.start(this_pipeline->current_media_file_type_);

bool has_stream_info = false;
Expand Down Expand Up @@ -256,7 +288,11 @@ void AudioPipeline::resample_task_(void *params) {
AudioResampler resampler =
AudioResampler(this_pipeline->decoded_ring_buffer_.get(), output_ring_buffer, BUFFER_SIZE_SAMPLES);

resampler.start(this_pipeline->current_stream_info_);
if (!resampler.start(this_pipeline->current_stream_info_, this_pipeline->target_sample_rate_)) {
// Unsupported incoming audio stream
xEventGroupSetBits(this_pipeline->event_group_,
EventGroupBits::RESAMPLER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
}

while (true) {
event_bits = xEventGroupGetBits(this_pipeline->event_group_);
Expand Down
15 changes: 8 additions & 7 deletions esphome/components/nabu/audio_pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,19 @@ enum class AudioPipelineType : uint8_t {
};

enum class AudioPipelineState : uint8_t {
STARTING,
STARTED,
PLAYING,
PAUSED,
STOPPING,
STOPPED,
ERROR_READING,
ERROR_DECODING,
ERROR_RESAMPLING,
};

class AudioPipeline {
public:
AudioPipeline(AudioMixer *mixer, AudioPipelineType pipeline_type);

void start(const std::string &uri, const std::string &task_name, UBaseType_t priority = 1);
void start(media_player::MediaFile *media_file, const std::string &task_name, UBaseType_t priority = 1);
void start(const std::string &uri, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority = 1);
void start(media_player::MediaFile *media_file, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority = 1);

void stop();

Expand All @@ -50,7 +49,9 @@ class AudioPipeline {
void reset_ring_buffers();

protected:
void common_start_(const std::string &task_name, UBaseType_t priority);
void common_start_(uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority);

uint32_t target_sample_rate_;

AudioMixer *mixer_;

Expand Down
72 changes: 47 additions & 25 deletions esphome/components/nabu/audio_resampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,11 @@ AudioResampler::~AudioResampler() {
resampleFree(this->resampler_);
this->resampler_ = nullptr;
}

// dsps_fird_s16_aexx_free(&this->fir_filter_);
}

void AudioResampler::start(media_player::StreamInfo &stream_info) {
bool AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate) {
this->stream_info_ = stream_info;

this->input_buffer_current_ = this->input_buffer_;
Expand All @@ -63,33 +65,40 @@ void AudioResampler::start(media_player::StreamInfo &stream_info) {

this->needs_mono_to_stereo_ = (stream_info.channels != 2);

if ((stream_info.channels > 2) || (stream_info_.bits_per_sample != 16)) {
// TODO: Make these values configurable
return false;
}

if (stream_info.channels > 0) {
this->channel_factor_ = 2 / stream_info.channels;
printf("Converting %d channels to 2 channels\n", stream_info.channels);
}
constexpr float resample_rate = 16000.0f;
if (stream_info.sample_rate != 16000) {
if (stream_info.sample_rate == 48000) {
// Special case, we can do this a lot faster with esp-dsp code!
const uint8_t decimation = 48000 / 16000;
const float fir_out_offset = 0; //((FIR_FILTER_LENGTH / decimation / 2) - 1);

int8_t shift = this->generate_q15_fir_coefficients_(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH,
(float) 0.5 / decimation);
// dsps_16_array_rev(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH);
dsps_fird_init_s16(&this->fir_filter_, this->fir_filter_coeffecients_, this->fir_delay_, FIR_FILTER_LENGTH,
decimation, fir_out_offset, -shift);
this->decimation_filter_ = true;
this->needs_resampling_ = true;
// memset(this->fir_delay_, 0, FIR_FILTER_LENGTH*sizeof(int16_t));
} else {

if (stream_info.sample_rate != target_sample_rate) {
// if (stream_info.sample_rate == 48000) {
// // Special case, we can do this a lot faster with esp-dsp code!
// const uint8_t decimation = 48000 / 16000;
// const float fir_out_offset = 0; //((FIR_FILTER_LENGTH / decimation / 2) - 1);

// int8_t shift = this->generate_q15_fir_coefficients_(this->fir_filter_coeffecients_, (uint32_t)
// FIR_FILTER_LENGTH,
// (float) 0.5 / decimation);
// // dsps_16_array_rev(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH);
// dsps_fird_init_s16(&this->fir_filter_, this->fir_filter_coeffecients_, this->fir_delay_, FIR_FILTER_LENGTH,
// decimation, fir_out_offset, -shift);
// this->decimation_filter_ = true;
// this->needs_resampling_ = true;
// // memset(this->fir_delay_, 0, FIR_FILTER_LENGTH*sizeof(int16_t));
// } else
{
int flags = 0;

this->needs_resampling_ = true;

this->sample_ratio_ = resample_rate / static_cast<float>(stream_info.sample_rate);
this->sample_ratio_ = static_cast<float>(target_sample_rate) / static_cast<float>(stream_info.sample_rate);

printf("Resampling from %d Hz to 16000 Hz\n", stream_info.sample_rate);
printf("Resampling from %d Hz to %d Hz\n", stream_info.sample_rate, target_sample_rate);

if (this->sample_ratio_ < 1.0) {
this->lowpass_ratio_ -= (10.24 / 16);
Expand Down Expand Up @@ -137,6 +146,8 @@ void AudioResampler::start(media_player::StreamInfo &stream_info) {
} else {
this->needs_resampling_ = false;
}

return true;
}

AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
Expand Down Expand Up @@ -165,6 +176,19 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
// Refill input buffer
//////

// Depending on if we are converting mono to stereo or if we are upsampling, we may need to restrict how many input
// samples we transfer
size_t max_input_samples = this->internal_buffer_samples_;

// Mono to stereo -> cut in half
max_input_samples /= (2 / this->stream_info_.channels);

if (this->sample_ratio_ > 1.0) {
// Upsampling -> reduce by a factor of the ceiling of sample_ratio_
uint32_t upsampling_factor = std::ceil(this->sample_ratio_);
max_input_samples /= upsampling_factor;
}

// Move old data to the start of the buffer
if (this->input_buffer_length_ > 0) {
memmove((void *) this->input_buffer_, (void *) this->input_buffer_current_, this->input_buffer_length_);
Expand All @@ -173,8 +197,7 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {

// Copy new data to the end of the of the buffer
size_t bytes_available = this->input_ring_buffer_->available();
size_t bytes_to_read =
std::min(bytes_available, this->internal_buffer_samples_ * sizeof(int16_t) - this->input_buffer_length_);
size_t bytes_to_read = std::min(bytes_available, max_input_samples * sizeof(int16_t) - this->input_buffer_length_);

if (bytes_to_read > 0) {
int16_t *new_input_buffer_data = this->input_buffer_ + this->input_buffer_length_ / sizeof(int16_t);
Expand Down Expand Up @@ -207,7 +230,8 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
}
} else {
// Interleaved stereo samples
// TODO: This doesn't sound correct! I need to use separate filters for each channel so the delay line isn't mixed
// TODO: This doesn't sound correct! I need to use separate filters for each channel so the delay line isn't
// mixed
size_t available_samples = this->input_buffer_length_ / sizeof(int16_t);
for (int i = 0; i < available_samples / 2; ++i) {
// split interleaved samples into two separate streams
Expand Down Expand Up @@ -244,15 +268,12 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {

size_t samples_read = this->input_buffer_length_ / sizeof(int16_t);

// This is inefficient! It reconverts any samples that weren't used in the previous resampling run
for (int i = 0; i < samples_read; ++i) {
this->float_input_buffer_[i] = static_cast<float>(this->input_buffer_[i]) / 32768.0f;
}

size_t frames_read = samples_read / this->stream_info_.channels;

// The low pass filter seems to be causing glitches... probably because samples are repeated due to the above
// ineffeciency!
if (this->pre_filter_) {
for (int i = 0; i < this->stream_info_.channels; ++i) {
biquad_apply_buffer(&this->lowpass_[i][0], this->float_input_buffer_ + i, frames_read,
Expand Down Expand Up @@ -363,6 +384,7 @@ int8_t AudioResampler::generate_q15_fir_coefficients_(int16_t *fir_coeffs, const
}

free(fir_window);
free(float_coeffs);

return shift;
}
Expand Down
Loading