From 98844edec0c7c2f738b4f8413bfc45d97318b8c0 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Fri, 2 Aug 2024 06:14:41 -0400 Subject: [PATCH 01/19] quickly disable new special case fir filter --- esphome/components/nabu/audio_resampler.cpp | 29 +++++++++++---------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp index 4860c28..836da69 100644 --- a/esphome/components/nabu/audio_resampler.cpp +++ b/esphome/components/nabu/audio_resampler.cpp @@ -69,20 +69,21 @@ void AudioResampler::start(media_player::StreamInfo &stream_info) { } constexpr float resample_rate = 16000.0f; if (stream_info.sample_rate != 16000) { - if (stream_info.sample_rate == 48000) { - // Special case, we can do this a lot faster with esp-dsp code! - const uint8_t decimation = 48000 / 16000; - const float fir_out_offset = 0; //((FIR_FILTER_LENGTH / decimation / 2) - 1); - - int8_t shift = this->generate_q15_fir_coefficients_(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH, - (float) 0.5 / decimation); - // dsps_16_array_rev(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH); - dsps_fird_init_s16(&this->fir_filter_, this->fir_filter_coeffecients_, this->fir_delay_, FIR_FILTER_LENGTH, - decimation, fir_out_offset, -shift); - this->decimation_filter_ = true; - this->needs_resampling_ = true; - // memset(this->fir_delay_, 0, FIR_FILTER_LENGTH*sizeof(int16_t)); - } else { + // if (stream_info.sample_rate == 48000) { + // // Special case, we can do this a lot faster with esp-dsp code! + // const uint8_t decimation = 48000 / 16000; + // const float fir_out_offset = 0; //((FIR_FILTER_LENGTH / decimation / 2) - 1); + + // int8_t shift = this->generate_q15_fir_coefficients_(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH, + // (float) 0.5 / decimation); + // // dsps_16_array_rev(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH); + // dsps_fird_init_s16(&this->fir_filter_, this->fir_filter_coeffecients_, this->fir_delay_, FIR_FILTER_LENGTH, + // decimation, fir_out_offset, -shift); + // this->decimation_filter_ = true; + // this->needs_resampling_ = true; + // // memset(this->fir_delay_, 0, FIR_FILTER_LENGTH*sizeof(int16_t)); + // } else + { int flags = 0; this->needs_resampling_ = true; From a73759616a0547f9b2992c8b94a172338ddf032a Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Fri, 2 Aug 2024 06:34:38 -0400 Subject: [PATCH 02/19] fix pipeline stopped vs playing logic --- esphome/components/nabu/audio_pipeline.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp index f95d270..6a77b75 100644 --- a/esphome/components/nabu/audio_pipeline.cpp +++ b/esphome/components/nabu/audio_pipeline.cpp @@ -88,7 +88,8 @@ AudioPipelineState AudioPipeline::get_state() { EventBits_t event_bits = xEventGroupGetBits(this->event_group_); if (!this->read_task_handle_ && !this->decode_task_handle_ && !this->resample_task_handle_) { return AudioPipelineState::STOPPED; - } else if (event_bits & (READER_MESSAGE_FINISHED | DECODER_MESSAGE_FINISHED | RESAMPLER_MESSAGE_FINISHED)) { + } else if ((event_bits & READER_MESSAGE_FINISHED) && (event_bits & DECODER_MESSAGE_FINISHED) && + (event_bits & RESAMPLER_MESSAGE_FINISHED)) { return AudioPipelineState::STOPPED; } From 5db296381277da56032890f206794fdfb8206ae8 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Fri, 2 Aug 2024 11:01:00 -0400 Subject: [PATCH 03/19] improve robustness of flac decoding --- esphome/components/nabu/audio_decoder.cpp | 25 ++++++----- esphome/components/nabu/flac_decoder.cpp | 52 +++++++++++------------ esphome/components/nabu/flac_decoder.h | 32 +++++++------- 3 files changed, 57 insertions(+), 52 deletions(-) diff --git a/esphome/components/nabu/audio_decoder.cpp b/esphome/components/nabu/audio_decoder.cpp index 11720a5..d844c54 100644 --- a/esphome/components/nabu/audio_decoder.cpp +++ b/esphome/components/nabu/audio_decoder.cpp @@ -112,7 +112,7 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) { // Try to decode more data // Shift unread data in input buffer to start - if ((this->input_buffer_length_ > 0) && (this->input_buffer_length_ < this->internal_buffer_size_)) { + if (this->input_buffer_length_ > 0) { memmove(this->input_buffer_, this->input_buffer_current_, this->input_buffer_length_); } this->input_buffer_current_ = this->input_buffer_; @@ -123,8 +123,8 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) { size_t bytes_read = 0; if (bytes_to_read > 0) { - uint8_t *new_mp3_data = this->input_buffer_ + this->input_buffer_length_; - bytes_read = this->input_ring_buffer_->read((void *) new_mp3_data, bytes_to_read); + uint8_t *new_audio_data = this->input_buffer_ + this->input_buffer_length_; + bytes_read = this->input_ring_buffer_->read((void *) new_audio_data, bytes_to_read); this->input_buffer_length_ += bytes_read; } @@ -153,6 +153,8 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) { ++this->potentially_failed_count_; } else if (state == FileDecoderState::END_OF_FILE) { this->end_of_file_ = true; + } else if (state == FileDecoderState::FAILED) { + return AudioDecoderState::FAILED; } else { this->potentially_failed_count_ = 0; } @@ -191,9 +193,7 @@ FileDecoderState AudioDecoder::decode_wav_() { printf("sample channels: %d\n", this->channels_.value()); printf("sample rate: %" PRId32 "\n", this->sample_rate_.value()); - // printf("number of samples: %d\n", - // this->wav_decoder_->chunk_bytes_left() / (this->channels_.value() * (this->bits_per_sample.value() - // / 8))); + printf("bits per sample: %d\n", this->sample_depth_.value()); this->wav_bytes_left_ = this->wav_decoder_->chunk_bytes_left(); header_finished = true; } else if (result == wav_decoder::WAV_DECODER_SUCCESS_NEXT) { @@ -280,14 +280,14 @@ FileDecoderState AudioDecoder::decode_flac_() { // Header hasn't been read auto result = this->flac_decoder_->read_header(this->input_buffer_length_); - if (result == flac::FLAC_DECODER_ERROR_OUT_OF_DATA) { - return FileDecoderState::POTENTIALLY_FAILED; - } - size_t bytes_consumed = this->flac_decoder_->get_bytes_index(); this->input_buffer_current_ += bytes_consumed; this->input_buffer_length_ = this->flac_decoder_->get_bytes_left(); + if (result == flac::FLAC_DECODER_HEADER_OUT_OF_DATA) { + return FileDecoderState::POTENTIALLY_FAILED; + } + if (result != flac::FLAC_DECODER_SUCCESS) { printf("failed to read flac header. Error: %d\n", result); return FileDecoderState::FAILED; @@ -303,7 +303,7 @@ FileDecoderState AudioDecoder::decode_flac_() { return FileDecoderState::FAILED; } - return FileDecoderState::MORE_TO_PROCESS; + return FileDecoderState::IDLE; } uint32_t output_samples = 0; @@ -312,9 +312,12 @@ FileDecoderState AudioDecoder::decode_flac_() { if (result == flac::FLAC_DECODER_ERROR_OUT_OF_DATA) { // not an issue, just needs more data! + // TODO: How does this never happen? This may be related to not being able to find the sync word when using shorter + // input buffers return FileDecoderState::POTENTIALLY_FAILED; } else if (result > flac::FLAC_DECODER_ERROR_OUT_OF_DATA) { // Serious error, can't recover + printf("FLAC Decoder Error %d\n", result); return FileDecoderState::FAILED; } diff --git a/esphome/components/nabu/flac_decoder.cpp b/esphome/components/nabu/flac_decoder.cpp index 976ab3d..460876f 100644 --- a/esphome/components/nabu/flac_decoder.cpp +++ b/esphome/components/nabu/flac_decoder.cpp @@ -14,29 +14,29 @@ FLACDecoderResult FLACDecoder::read_header(size_t buffer_length) { this->buffer_index_ = 0; this->bytes_left_ = buffer_length; - if (this->out_of_data_) { - return FLAC_DECODER_ERROR_OUT_OF_DATA; - } + this->out_of_data_ = (buffer_length == 0); - // File must start with 'fLaC' - if (this->read_uint(32) != FLAC_MAGIC_NUMBER) { - return FLAC_DECODER_ERROR_BAD_MAGIC_NUMBER; + if (!this->partial_header_read_) { + // File must start with 'fLaC' + if (this->read_uint(32) != FLAC_MAGIC_NUMBER) { + return FLAC_DECODER_ERROR_BAD_MAGIC_NUMBER; + } } - // Read header blocks - bool last = false; - uint32_t type = 0; - uint32_t length = 0; + while (!this->partial_header_last_ || (this->partial_header_length_ > 0)) { + if (this->bytes_left_ == 0) { + // We'll try to finish reading it once more data is loaded + this->partial_header_read_ = true; + return FLAC_DECODER_HEADER_OUT_OF_DATA; + } - while (!last) { - if (this->out_of_data_) { - return FLAC_DECODER_ERROR_OUT_OF_DATA; + if (this->partial_header_length_ == 0) { + this->partial_header_last_ = this->read_uint(1) != 0; + this->partial_header_type_ = this->read_uint(7); + this->partial_header_length_ = this->read_uint(24); } - last = this->read_uint(1) != 0; - type = this->read_uint(7); - length = this->read_uint(24); - if (type == 0) { + if (this->partial_header_type_ == 0) { // Stream info block this->min_block_size_ = this->read_uint(16); this->max_block_size_ = this->read_uint(16); @@ -48,18 +48,19 @@ FLACDecoderResult FLACDecoder::read_header(size_t buffer_length) { this->sample_depth_ = this->read_uint(5) + 1; this->num_samples_ = this->read_uint(36); this->read_uint(128); + + this->partial_header_length_ = 0; } else { // Variable block - for (uint32_t i = 0; i < length; i++) { - this->read_uint(8); - - // Exit early if we run out of data here - if (this->out_of_data_) { - return FLAC_DECODER_ERROR_OUT_OF_DATA; + while (this->partial_header_length_ > 0) { + if (this->bytes_left_ == 0) { + break; } - } // for each byte in block + this->read_uint(8); + --this->partial_header_length_; + } } // variable block - } // while not last + } if ((this->sample_rate_ == 0) || (this->num_channels_ == 0) || (this->sample_depth_ == 0) || (this->max_block_size_ == 0)) { @@ -386,7 +387,6 @@ uint32_t FLACDecoder::read_uint(std::size_t num_bits) { while (this->bit_buffer_length_ < num_bits) { uint8_t next_byte = this->buffer_[this->buffer_index_]; this->buffer_index_++; - this->buffer_total_read_++; this->bytes_left_--; if (this->bytes_left_ == 0) { this->out_of_data_ = true; diff --git a/esphome/components/nabu/flac_decoder.h b/esphome/components/nabu/flac_decoder.h index 0d050ad..ef2ec64 100644 --- a/esphome/components/nabu/flac_decoder.h +++ b/esphome/components/nabu/flac_decoder.h @@ -27,16 +27,17 @@ const static uint32_t FLAC_UINT_MASK[] = { enum FLACDecoderResult { FLAC_DECODER_SUCCESS = 0, FLAC_DECODER_NO_MORE_FRAMES = 1, - FLAC_DECODER_ERROR_OUT_OF_DATA = 2, - FLAC_DECODER_ERROR_BAD_MAGIC_NUMBER = 3, - FLAC_DECODER_ERROR_SYNC_NOT_FOUND = 4, - FLAC_DECODER_ERROR_BAD_BLOCK_SIZE_CODE = 5, - FLAC_DECODER_ERROR_BAD_HEADER = 6, - FLAC_DECODER_ERROR_RESERVED_CHANNEL_ASSIGNMENT = 7, - FLAC_DECODER_ERROR_RESERVED_SUBFRAME_TYPE = 8, - FLAC_DECODER_ERROR_BAD_FIXED_PREDICTION_ORDER = 9, - FLAC_DECODER_ERROR_RESERVED_RESIDUAL_CODING_METHOD = 10, - FLAC_DECODER_ERROR_BLOCK_SIZE_NOT_DIVISIBLE_RICE = 11, + FLAC_DECODER_HEADER_OUT_OF_DATA = 2, + FLAC_DECODER_ERROR_OUT_OF_DATA = 3, + FLAC_DECODER_ERROR_BAD_MAGIC_NUMBER = 4, + FLAC_DECODER_ERROR_SYNC_NOT_FOUND = 5, + FLAC_DECODER_ERROR_BAD_BLOCK_SIZE_CODE = 6, + FLAC_DECODER_ERROR_BAD_HEADER = 7, + FLAC_DECODER_ERROR_RESERVED_CHANNEL_ASSIGNMENT = 8, + FLAC_DECODER_ERROR_RESERVED_SUBFRAME_TYPE = 9, + FLAC_DECODER_ERROR_BAD_FIXED_PREDICTION_ORDER = 10, + FLAC_DECODER_ERROR_RESERVED_RESIDUAL_CODING_METHOD = 11, + FLAC_DECODER_ERROR_BLOCK_SIZE_NOT_DIVISIBLE_RICE = 12, }; // Coefficients for fixed linear prediction @@ -52,8 +53,7 @@ class FLACDecoder { * buffer_size - size of the data buffer * min_buffer_size - min bytes in buffer before fill_buffer is called */ - FLACDecoder(uint8_t *buffer) - : buffer_(buffer) {} + FLACDecoder(uint8_t *buffer) : buffer_(buffer) {} ~FLACDecoder() { this->free_buffers(); } @@ -129,9 +129,6 @@ class FLACDecoder { /* Next index to read from the input buffer. */ std::size_t buffer_index_ = 0; - /* Total number of bytes read across frames (debugging). */ - std::size_t buffer_total_read_ = 0; - /* Number of byte that haven't been read from the input buffer yet. */ std::size_t bytes_left_ = 0; @@ -167,6 +164,11 @@ class FLACDecoder { /* Buffer of decoded samples at full precision (single channel). */ std::vector> block_result_; + + bool partial_header_read_{false}; + bool partial_header_last_{false}; + uint32_t partial_header_type_{0}; + uint32_t partial_header_length_{0}; }; } // namespace flac From 228dc296f5dc681351cd973d74742b77fa0ca344 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Fri, 2 Aug 2024 11:01:25 -0400 Subject: [PATCH 04/19] Increase http buffer size and allow setting output sample rate --- esphome/components/nabu/audio_pipeline.cpp | 20 ++++++++++--------- esphome/components/nabu/audio_pipeline.h | 8 +++++--- esphome/components/nabu/audio_resampler.cpp | 10 +++++----- esphome/components/nabu/audio_resampler.h | 2 +- esphome/components/nabu/media_player.py | 3 +++ esphome/components/nabu/nabu_media_player.cpp | 13 ++++++------ esphome/components/nabu/nabu_media_player.h | 2 ++ 7 files changed, 33 insertions(+), 25 deletions(-) diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp index 6a77b75..553739a 100644 --- a/esphome/components/nabu/audio_pipeline.cpp +++ b/esphome/components/nabu/audio_pipeline.cpp @@ -9,7 +9,7 @@ namespace nabu { static const size_t QUEUE_COUNT = 10; -static const size_t HTTP_BUFFER_SIZE = 32 * 1024; +static const size_t HTTP_BUFFER_SIZE = 128 * 1024; static const size_t BUFFER_SIZE_SAMPLES = 32768; static const size_t BUFFER_SIZE_BYTES = BUFFER_SIZE_SAMPLES * sizeof(int16_t); @@ -50,21 +50,21 @@ AudioPipeline::AudioPipeline(AudioMixer *mixer, AudioPipelineType pipeline_type) this->event_group_ = xEventGroupCreate(); } -void AudioPipeline::start(const std::string &uri, const std::string &task_name, UBaseType_t priority) { - this->common_start_(task_name, priority); +void AudioPipeline::start(const std::string &uri, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority) { + this->common_start_(target_sample_rate, task_name, priority); this->current_uri_ = uri; xEventGroupSetBits(this->event_group_, READER_COMMAND_INIT_HTTP); } -void AudioPipeline::start(media_player::MediaFile *media_file, const std::string &task_name, UBaseType_t priority) { - this->common_start_(task_name, priority); +void AudioPipeline::start(media_player::MediaFile *media_file, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority) { + this->common_start_(target_sample_rate, task_name, priority); this->current_media_file_ = media_file; xEventGroupSetBits(this->event_group_, READER_COMMAND_INIT_FILE); } -void AudioPipeline::common_start_(const std::string &task_name, UBaseType_t priority) { +void AudioPipeline::common_start_(uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority) { if (this->read_task_handle_ == nullptr) { this->read_task_handle_ = xTaskCreateStatic(AudioPipeline::read_task_, (task_name + "_read").c_str(), 8192, (void *) this, priority, @@ -82,6 +82,8 @@ void AudioPipeline::common_start_(const std::string &task_name, UBaseType_t prio } this->stop(); + + this->target_sample_rate_ = target_sample_rate; } AudioPipelineState AudioPipeline::get_state() { @@ -189,7 +191,7 @@ void AudioPipeline::decode_task_(void *params) { { AudioDecoder decoder = AudioDecoder(this_pipeline->raw_file_ring_buffer_.get(), - this_pipeline->decoded_ring_buffer_.get(), BUFFER_SIZE_BYTES); + this_pipeline->decoded_ring_buffer_.get(), HTTP_BUFFER_SIZE);//BUFFER_SIZE_BYTES); decoder.start(this_pipeline->current_media_file_type_); bool has_stream_info = false; @@ -224,7 +226,7 @@ void AudioPipeline::decode_task_(void *params) { } // Block to give other tasks opportunity to run - delay(10); + delay(15); } } } @@ -257,7 +259,7 @@ void AudioPipeline::resample_task_(void *params) { AudioResampler resampler = AudioResampler(this_pipeline->decoded_ring_buffer_.get(), output_ring_buffer, BUFFER_SIZE_SAMPLES); - resampler.start(this_pipeline->current_stream_info_); + resampler.start(this_pipeline->current_stream_info_, this_pipeline->target_sample_rate_); while (true) { event_bits = xEventGroupGetBits(this_pipeline->event_group_); diff --git a/esphome/components/nabu/audio_pipeline.h b/esphome/components/nabu/audio_pipeline.h index 4b1996f..c11dc05 100644 --- a/esphome/components/nabu/audio_pipeline.h +++ b/esphome/components/nabu/audio_pipeline.h @@ -40,8 +40,8 @@ class AudioPipeline { public: AudioPipeline(AudioMixer *mixer, AudioPipelineType pipeline_type); - void start(const std::string &uri, const std::string &task_name, UBaseType_t priority = 1); - void start(media_player::MediaFile *media_file, const std::string &task_name, UBaseType_t priority = 1); + void start(const std::string &uri, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority = 1); + void start(media_player::MediaFile *media_file, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority = 1); void stop(); @@ -50,7 +50,9 @@ class AudioPipeline { void reset_ring_buffers(); protected: - void common_start_(const std::string &task_name, UBaseType_t priority); + void common_start_(uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority); + + uint32_t target_sample_rate_; AudioMixer *mixer_; diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp index 836da69..f8ea717 100644 --- a/esphome/components/nabu/audio_resampler.cpp +++ b/esphome/components/nabu/audio_resampler.cpp @@ -48,7 +48,7 @@ AudioResampler::~AudioResampler() { } } -void AudioResampler::start(media_player::StreamInfo &stream_info) { +void AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate) { this->stream_info_ = stream_info; this->input_buffer_current_ = this->input_buffer_; @@ -67,8 +67,8 @@ void AudioResampler::start(media_player::StreamInfo &stream_info) { this->channel_factor_ = 2 / stream_info.channels; printf("Converting %d channels to 2 channels\n", stream_info.channels); } - constexpr float resample_rate = 16000.0f; - if (stream_info.sample_rate != 16000) { + + if (stream_info.sample_rate != target_sample_rate) { // if (stream_info.sample_rate == 48000) { // // Special case, we can do this a lot faster with esp-dsp code! // const uint8_t decimation = 48000 / 16000; @@ -88,9 +88,9 @@ void AudioResampler::start(media_player::StreamInfo &stream_info) { this->needs_resampling_ = true; - this->sample_ratio_ = resample_rate / static_cast(stream_info.sample_rate); + this->sample_ratio_ = static_cast(target_sample_rate) / static_cast(stream_info.sample_rate); - printf("Resampling from %d Hz to 16000 Hz\n", stream_info.sample_rate); + printf("Resampling from %d Hz to %d Hz\n", stream_info.sample_rate, target_sample_rate); if (this->sample_ratio_ < 1.0) { this->lowpass_ratio_ -= (10.24 / 16); diff --git a/esphome/components/nabu/audio_resampler.h b/esphome/components/nabu/audio_resampler.h index 1176c1a..176a094 100644 --- a/esphome/components/nabu/audio_resampler.h +++ b/esphome/components/nabu/audio_resampler.h @@ -30,7 +30,7 @@ class AudioResampler { size_t internal_buffer_samples); ~AudioResampler(); - void start(media_player::StreamInfo &stream_info); + void start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate); AudioResamplerState resample(bool stop_gracefully); diff --git a/esphome/components/nabu/media_player.py b/esphome/components/nabu/media_player.py index fb58299..7d7fe3b 100644 --- a/esphome/components/nabu/media_player.py +++ b/esphome/components/nabu/media_player.py @@ -46,6 +46,7 @@ TYPE_WEB = "web" CONF_FILES = "files" +CONF_SAMPLE_RATE = "sample_rate" nabu_ns = cg.esphome_ns.namespace("nabu") NabuMediaPlayer = nabu_ns.class_("NabuMediaPlayer") @@ -176,6 +177,7 @@ def _file_schema(value): cv.GenerateID(): cv.declare_id(NabuMediaPlayer), cv.GenerateID(CONF_I2S_AUDIO_ID): cv.use_id(I2SAudioComponent), cv.Required(CONF_I2S_DOUT_PIN): pins.internal_gpio_output_pin_number, + cv.Optional(CONF_SAMPLE_RATE, default=16000): cv.int_range(min=1), cv.Optional(CONF_BITS_PER_SAMPLE, default="16bit"): cv.All( _validate_bits, cv.enum(BITS_PER_SAMPLE) ), @@ -203,6 +205,7 @@ async def to_code(config): await cg.register_parented(var, config[CONF_I2S_AUDIO_ID]) cg.add(var.set_dout_pin(config[CONF_I2S_DOUT_PIN])) cg.add(var.set_bits_per_sample(config[CONF_BITS_PER_SAMPLE])) + cg.add(var.set_sample_rate(config[CONF_SAMPLE_RATE])) if files_list := config.get(CONF_FILES): media_files = [] diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp index f817d62..5995813 100644 --- a/esphome/components/nabu/nabu_media_player.cpp +++ b/esphome/components/nabu/nabu_media_player.cpp @@ -23,7 +23,6 @@ namespace nabu { // - Create a registry of media files in Python // - Add a yaml action to play a specific media file -static const size_t SAMPLE_RATE_HZ = 16000; // 16 kHz static const size_t QUEUE_COUNT = 20; static const size_t DMA_BUFFER_COUNT = 4; static const size_t DMA_BUFFER_SIZE = 512; @@ -148,7 +147,7 @@ static void stats_task(void *arg) { static const char *const TAG = "nabu_media_player"; void NabuMediaPlayer::setup() { - // xTaskCreatePinnedToCore(stats_task, "stats", 4096, NULL, STATS_TASK_PRIO, NULL, tskNO_AFFINITY); + xTaskCreatePinnedToCore(stats_task, "stats", 4096, NULL, STATS_TASK_PRIO, NULL, tskNO_AFFINITY); state = media_player::MEDIA_PLAYER_STATE_IDLE; @@ -212,7 +211,7 @@ void NabuMediaPlayer::speaker_task(void *params) { i2s_driver_config_t config = { .mode = (i2s_mode_t) (this_speaker->parent_->get_i2s_mode() | I2S_MODE_TX), - .sample_rate = 16000, + .sample_rate = this_speaker->sample_rate_, .bits_per_sample = this_speaker->bits_per_sample_, .channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT, .communication_format = I2S_COMM_FORMAT_STAND_I2S, @@ -347,13 +346,13 @@ void NabuMediaPlayer::watch_media_commands_() { make_unique(this->audio_mixer_.get(), AudioPipelineType::ANNOUNCEMENT); } - this->announcement_pipeline_->start(this->announcement_url_.value(), "ann", 7); + this->announcement_pipeline_->start(this->announcement_url_.value(), this->sample_rate_, "ann", 7); } else { if (this->media_pipeline_ == nullptr) { this->media_pipeline_ = make_unique(this->audio_mixer_.get(), AudioPipelineType::MEDIA); } - this->media_pipeline_->start(this->media_url_.value(), "media", 2); + this->media_pipeline_->start(this->media_url_.value(), this->sample_rate_, "media", 2); if (this->is_paused_) { CommandEvent command_event; @@ -371,13 +370,13 @@ void NabuMediaPlayer::watch_media_commands_() { make_unique(this->audio_mixer_.get(), AudioPipelineType::ANNOUNCEMENT); } - this->announcement_pipeline_->start(this->announcement_file_.value(), "ann", 7); + this->announcement_pipeline_->start(this->announcement_file_.value(), this->sample_rate_, "ann", 7); } else { if (this->media_pipeline_ == nullptr) { this->media_pipeline_ = make_unique(this->audio_mixer_.get(), AudioPipelineType::MEDIA); } - this->media_pipeline_->start(this->media_file_.value(), "media", 2); + this->media_pipeline_->start(this->media_file_.value(), this->sample_rate_, "media", 5); if (this->is_paused_) { CommandEvent command_event; diff --git a/esphome/components/nabu/nabu_media_player.h b/esphome/components/nabu/nabu_media_player.h index 4ff4260..28a5530 100644 --- a/esphome/components/nabu/nabu_media_player.h +++ b/esphome/components/nabu/nabu_media_player.h @@ -57,6 +57,7 @@ class NabuMediaPlayer : public Component, void set_dout_pin(uint8_t pin) { this->dout_pin_ = pin; } void set_bits_per_sample(i2s_bits_per_sample_t bits_per_sample) { this->bits_per_sample_ = bits_per_sample; } + void set_sample_rate(uint32_t sample_rate) { this->sample_rate_ = sample_rate; } protected: // Receives commands from HA or from the voice assistant component @@ -101,6 +102,7 @@ class NabuMediaPlayer : public Component, QueueHandle_t speaker_command_queue_; i2s_bits_per_sample_t bits_per_sample_; + uint32_t sample_rate_; uint8_t dout_pin_{0}; bool is_paused_{false}; From 5d51d2574fd01d982f77a7aa46384d53c3551ab5 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Fri, 2 Aug 2024 13:24:30 -0400 Subject: [PATCH 05/19] fix flac sync errors after running out of data --- esphome/components/nabu/audio_decoder.cpp | 17 ++++++++++------- esphome/components/nabu/audio_pipeline.cpp | 4 ++-- esphome/components/nabu/flac_decoder.cpp | 15 +++++++++++++++ 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/esphome/components/nabu/audio_decoder.cpp b/esphome/components/nabu/audio_decoder.cpp index d844c54..9a16d50 100644 --- a/esphome/components/nabu/audio_decoder.cpp +++ b/esphome/components/nabu/audio_decoder.cpp @@ -110,16 +110,21 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) { } } else { // Try to decode more data + size_t bytes_available = this->input_ring_buffer_->available(); + size_t bytes_to_read = std::min(bytes_available, this->internal_buffer_size_ - this->input_buffer_length_); + + if ((this->potentially_failed_count_ > 0) && (bytes_to_read == 0)) { + // We didn't have enough data last time, and we have no new data, so just return + return AudioDecoderState::DECODING; + } // Shift unread data in input buffer to start if (this->input_buffer_length_ > 0) { memmove(this->input_buffer_, this->input_buffer_current_, this->input_buffer_length_); + this->input_buffer_current_ = this->input_buffer_; } - this->input_buffer_current_ = this->input_buffer_; // read in new ring buffer data to fill the remaining input buffer - size_t bytes_available = this->input_ring_buffer_->available(); - size_t bytes_to_read = std::min(bytes_available, this->internal_buffer_size_ - this->input_buffer_length_); size_t bytes_read = 0; if (bytes_to_read > 0) { @@ -303,7 +308,7 @@ FileDecoderState AudioDecoder::decode_flac_() { return FileDecoderState::FAILED; } - return FileDecoderState::IDLE; + return FileDecoderState::MORE_TO_PROCESS; } uint32_t output_samples = 0; @@ -311,9 +316,7 @@ FileDecoderState AudioDecoder::decode_flac_() { this->flac_decoder_->decode_frame(this->input_buffer_length_, (int16_t *) this->output_buffer_, &output_samples); if (result == flac::FLAC_DECODER_ERROR_OUT_OF_DATA) { - // not an issue, just needs more data! - // TODO: How does this never happen? This may be related to not being able to find the sync word when using shorter - // input buffers + // Not an issue, just needs more data that we'll get next time. return FileDecoderState::POTENTIALLY_FAILED; } else if (result > flac::FLAC_DECODER_ERROR_OUT_OF_DATA) { // Serious error, can't recover diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp index 553739a..a30e3e2 100644 --- a/esphome/components/nabu/audio_pipeline.cpp +++ b/esphome/components/nabu/audio_pipeline.cpp @@ -9,7 +9,7 @@ namespace nabu { static const size_t QUEUE_COUNT = 10; -static const size_t HTTP_BUFFER_SIZE = 128 * 1024; +static const size_t HTTP_BUFFER_SIZE = 64 * 1024; static const size_t BUFFER_SIZE_SAMPLES = 32768; static const size_t BUFFER_SIZE_BYTES = BUFFER_SIZE_SAMPLES * sizeof(int16_t); @@ -226,7 +226,7 @@ void AudioPipeline::decode_task_(void *params) { } // Block to give other tasks opportunity to run - delay(15); + delay(10); } } } diff --git a/esphome/components/nabu/flac_decoder.cpp b/esphome/components/nabu/flac_decoder.cpp index 460876f..a17440a 100644 --- a/esphome/components/nabu/flac_decoder.cpp +++ b/esphome/components/nabu/flac_decoder.cpp @@ -13,6 +13,8 @@ namespace flac { FLACDecoderResult FLACDecoder::read_header(size_t buffer_length) { this->buffer_index_ = 0; this->bytes_left_ = buffer_length; + this->bit_buffer_ = 0; + this->bit_buffer_length_ = 0; this->out_of_data_ = (buffer_length == 0); @@ -74,6 +76,8 @@ FLACDecoderResult FLACDecoder::read_header(size_t buffer_length) { FLACDecoderResult FLACDecoder::decode_frame(size_t buffer_length, int16_t *output_buffer, uint32_t *num_samples) { this->buffer_index_ = 0; this->bytes_left_ = buffer_length; + this->out_of_data_ = false; + *num_samples = 0; if (!this->block_samples_) { @@ -87,6 +91,9 @@ FLACDecoderResult FLACDecoder::decode_frame(size_t buffer_length, int16_t *outpu return FLAC_DECODER_NO_MORE_FRAMES; } + uint64_t previous_bit_buffer = this->bit_buffer_; + uint32_t previous_bit_buffer_length = this->bit_buffer_length_; + // sync code if (this->read_uint(14) != 0x3FFE) { return FLAC_DECODER_ERROR_SYNC_NOT_FOUND; @@ -108,6 +115,8 @@ FLACDecoderResult FLACDecoder::decode_frame(size_t buffer_length, int16_t *outpu next_int = (next_int << 1) & 0xFF; if (this->out_of_data_) { + this->bit_buffer_ = previous_bit_buffer; + this->bit_buffer_length_ = previous_bit_buffer_length; return FLAC_DECODER_ERROR_OUT_OF_DATA; } } @@ -140,6 +149,12 @@ FLACDecoderResult FLACDecoder::decode_frame(size_t buffer_length, int16_t *outpu this->decode_subframes(block_size, this->sample_depth_, channel_assignment); *num_samples = block_size * this->num_channels_; + if (this->bytes_left_ < 2) { + this->bit_buffer_ = previous_bit_buffer; + this->bit_buffer_length_ = previous_bit_buffer_length; + return FLAC_DECODER_ERROR_OUT_OF_DATA; + } + // Footer this->align_to_byte(); this->read_uint(16); From ea2b1878a20a63888d7a80c1f3999358f2dc16e3 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Fri, 2 Aug 2024 13:24:38 -0400 Subject: [PATCH 06/19] limit http transfer size per loop --- esphome/components/nabu/audio_reader.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/esphome/components/nabu/audio_reader.cpp b/esphome/components/nabu/audio_reader.cpp index 915a02f..ea5725a 100644 --- a/esphome/components/nabu/audio_reader.cpp +++ b/esphome/components/nabu/audio_reader.cpp @@ -7,6 +7,8 @@ namespace esphome { namespace nabu { + static const size_t MAX_HTTP_BYTES_TO_READ_PER_ITERATION = 4096; + AudioReader::AudioReader(esphome::RingBuffer *output_ring_buffer, size_t transfer_buffer_size) { this->output_ring_buffer_ = output_ring_buffer; @@ -101,6 +103,7 @@ AudioReaderState AudioReader::read() { AudioReaderState AudioReader::file_read_() { if (this->media_file_bytes_left_ > 0) { size_t bytes_to_write = std::min(this->media_file_bytes_left_, this->output_ring_buffer_->free()); + bytes_to_write = std::min(bytes_to_write, MAX_HTTP_BYTES_TO_READ_PER_ITERATION); if (bytes_to_write == 0) { return AudioReaderState::READING; From 830f56caf8eb532ffcb7aaddc5e8a74cf49296a4 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Fri, 2 Aug 2024 13:25:53 -0400 Subject: [PATCH 07/19] disable task debug logging --- esphome/components/nabu/nabu_media_player.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp index 5995813..616777e 100644 --- a/esphome/components/nabu/nabu_media_player.cpp +++ b/esphome/components/nabu/nabu_media_player.cpp @@ -147,7 +147,7 @@ static void stats_task(void *arg) { static const char *const TAG = "nabu_media_player"; void NabuMediaPlayer::setup() { - xTaskCreatePinnedToCore(stats_task, "stats", 4096, NULL, STATS_TASK_PRIO, NULL, tskNO_AFFINITY); + // xTaskCreatePinnedToCore(stats_task, "stats", 4096, NULL, STATS_TASK_PRIO, NULL, tskNO_AFFINITY); state = media_player::MEDIA_PLAYER_STATE_IDLE; From 67f19a61733c5b050695126bb3fb9647362d53d2 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Fri, 2 Aug 2024 13:56:26 -0400 Subject: [PATCH 08/19] revert min http transfer size --- esphome/components/nabu/audio_reader.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/esphome/components/nabu/audio_reader.cpp b/esphome/components/nabu/audio_reader.cpp index ea5725a..915a02f 100644 --- a/esphome/components/nabu/audio_reader.cpp +++ b/esphome/components/nabu/audio_reader.cpp @@ -7,8 +7,6 @@ namespace esphome { namespace nabu { - static const size_t MAX_HTTP_BYTES_TO_READ_PER_ITERATION = 4096; - AudioReader::AudioReader(esphome::RingBuffer *output_ring_buffer, size_t transfer_buffer_size) { this->output_ring_buffer_ = output_ring_buffer; @@ -103,7 +101,6 @@ AudioReaderState AudioReader::read() { AudioReaderState AudioReader::file_read_() { if (this->media_file_bytes_left_ > 0) { size_t bytes_to_write = std::min(this->media_file_bytes_left_, this->output_ring_buffer_->free()); - bytes_to_write = std::min(bytes_to_write, MAX_HTTP_BYTES_TO_READ_PER_ITERATION); if (bytes_to_write == 0) { return AudioReaderState::READING; From 11cf4ee84b08583d14aeee3df5442f5e3a55060e Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Sun, 4 Aug 2024 07:59:07 -0400 Subject: [PATCH 09/19] always reset input_buffer_current --- esphome/components/nabu/audio_decoder.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/esphome/components/nabu/audio_decoder.cpp b/esphome/components/nabu/audio_decoder.cpp index 9a16d50..7684fc8 100644 --- a/esphome/components/nabu/audio_decoder.cpp +++ b/esphome/components/nabu/audio_decoder.cpp @@ -121,8 +121,8 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) { // Shift unread data in input buffer to start if (this->input_buffer_length_ > 0) { memmove(this->input_buffer_, this->input_buffer_current_, this->input_buffer_length_); - this->input_buffer_current_ = this->input_buffer_; } + this->input_buffer_current_ = this->input_buffer_; // read in new ring buffer data to fill the remaining input buffer size_t bytes_read = 0; @@ -231,11 +231,10 @@ FileDecoderState AudioDecoder::decode_wav_() { this->wav_bytes_left_ -= bytes_to_write; } - return FileDecoderState::MORE_TO_PROCESS; + return FileDecoderState::IDLE; } return FileDecoderState::END_OF_FILE; - // return DecoderState::FINISHED; } FileDecoderState AudioDecoder::decode_mp3_() { From dddf4dedcca50145f09c47c9e73ec585efb4128d Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Sun, 4 Aug 2024 08:08:19 -0400 Subject: [PATCH 10/19] update TODOs and describe media player framework --- esphome/components/nabu/nabu_media_player.cpp | 50 ++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp index 616777e..017a0bf 100644 --- a/esphome/components/nabu/nabu_media_player.cpp +++ b/esphome/components/nabu/nabu_media_player.cpp @@ -14,14 +14,62 @@ namespace esphome { namespace nabu { // TODO: +// - Have better logging outputs +// - Output file type and stream information + any resampling processes +// - Remove printf +// - Log which part of an audio pipeline has an error +// - Verify we can handle the audio stream (no more than 2 channels, etc...) and abort if not +// - Add I2C getting of mute status on boot +// - Clear mixer input buffer if an audio tream is stopped +// - Block media commands until the bluetooth stack is disabled (will run out of memory otherwise) // - Tune task memory requirements and potentially buffer sizes if issues appear -// - Biquad filters work for downsampling without handling float buffer carefully, upsampling will require some care // - Ducking improvements // - Ducking ratio probably isn't the best way to specify, as volume perception is not linear // - Add a YAML action for setting the ducking level instead of requiring a lambda // - Clean up process around playing back local media files // - Create a registry of media files in Python +// - What do I need to give them an ESPHome id? // - Add a yaml action to play a specific media file +// +// +// Framework: +// - Media player that can handle two streams; one for media and one for announcements +// - If played together, they are mixed with the announcement stream staying at full volume +// - The media audio can be further ducked via the ``set_ducking_ratio`` function +// - Each stream is handled by an ``AudioPipeline`` object with three parts/tasks +// - ``AudioReader`` handles reading from an HTTP source or from a PROGMEM flash set at compile time +// - ``AudioDecoder`` handles decoding the audio file. All formats are limited to two channels and 16 bits per sample +// - FLAC +// - WAV +// - MP3 (based on the libhelix decoder - a random mp3 file may be incompatible) +// - ``AudioResampler`` handles converting the sample rate to the configured output sample rate and converting mono +// to stereo +// - The quality is not good, and it is slow! Please send audio at the configured sample rate to avoid these issues +// - Each task will always run once started, but they will not doing anything until they are needed +// - FreeRTOS Event Groups make up the inter-task communication +// - The ``AudioPipeline`` sets up an output ring buffer for the Reader and Decoder parts. The next part/task +// automatically pulls from the previous ring buffer +// - The streams are mixed together in the ``AudioMixer`` task +// - Each stream has a corresponding input buffer that the ``AudioResampler`` feeds directly +// - Pausing the media stream is done here +// - Media stream ducking is done here +// - The output ring buffer feeds the ``speaker_task`` directly. It is kept small intentionally to avoid latency when +// pausing +// - Audio output is handled by the ``speaker_task``. It configures the I2S bus and copies audio from the mixer's +// output ring buffer to the DMA buffers +// - Media player commands are received by the ``control`` function. The commands are added to the +// ``media_control_command_queue_`` to be processed in the component's loop +// - Starting a stream intializes the appropriate pipeline or stops it if it is already running +// - Volume and mute commands are achieved by the ``mute``, ``unmute``, ``set_volume`` functions. They communicate +// directly with the DAC over I2C. +// - Volume commands are ignored if the media control queue is full to avoid crashing when the track wheel is spun +// fast +// - Pausing is sent to the ``AudioMixer`` task. It only effects the media stream. +// - The components main loop performs housekeeping: +// - It reads the media control queue and processes it directly +// - It watches the state of speaker and mixer tasks +// - It determines the overall state of the media player by considering the state of each pipeline +// - announcement playback takes highest priority static const size_t QUEUE_COUNT = 20; static const size_t DMA_BUFFER_COUNT = 4; From 8a2c8bddf2a6a75cfbe779e6fcd5fcc6290e0973 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Sun, 4 Aug 2024 08:58:01 -0400 Subject: [PATCH 11/19] read mute status from dac at setup --- esphome/components/nabu/nabu_media_player.cpp | 31 +++++++++++++++++-- esphome/components/nabu/nabu_media_player.h | 5 ++- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp index 017a0bf..5cf0ed3 100644 --- a/esphome/components/nabu/nabu_media_player.cpp +++ b/esphome/components/nabu/nabu_media_player.cpp @@ -19,7 +19,6 @@ namespace nabu { // - Remove printf // - Log which part of an audio pipeline has an error // - Verify we can handle the audio stream (no more than 2 channels, etc...) and abort if not -// - Add I2C getting of mute status on boot // - Clear mixer input buffer if an audio tream is stopped // - Block media commands until the bluetooth stack is disabled (will run out of memory otherwise) // - Tune task memory requirements and potentially buffer sizes if issues appear @@ -214,7 +213,10 @@ void NabuMediaPlayer::setup() { xTaskCreate(NabuMediaPlayer::speaker_task, "speaker_task", 3072, (void *) this, 23, &this->speaker_task_handle_); - this->get_dac_volume_(); + if (!this->get_dac_volume_().has_value() || !this->get_dac_mute_().has_value()) { + ESP_LOGE(TAG, "Couldn't communicate with DAC"); + this->mark_failed(); + } // if (!this->write_byte(DAC_PAGE_SELECTION_REGISTER, 0x01)) { // ESP_LOGE(TAG, "DAC failed to switch register page"); @@ -703,6 +705,31 @@ bool NabuMediaPlayer::set_volume_(float volume, bool publish) { return true; } +optional NabuMediaPlayer::get_dac_mute_(bool publish) { + if (!this->write_byte(DAC_PAGE_SELECTION_REGISTER, DAC_MUTE_PAGE)) { + ESP_LOGE(TAG, "DAC failed to switch to mute page registers"); + return {}; + } + + uint8_t dac_mute_left = 0; + uint8_t dac_mute_right = 0; + if (!this->read_byte(DAC_LEFT_MUTE_REGISTER, &dac_mute_left) || + !this->read_byte(DAC_RIGHT_MUTE_REGISTER, &dac_mute_right)) { + ESP_LOGE(TAG, "DAC failed to read mute status"); + return {}; + } + + bool is_muted = false; + if (dac_mute_left == DAC_MUTE_COMMAND && dac_mute_right == DAC_MUTE_COMMAND) { + is_muted = true; + } + + if (publish) { + this->is_muted_ = is_muted; + } + return is_muted; +} + bool NabuMediaPlayer::mute_() { if (!this->write_byte(DAC_PAGE_SELECTION_REGISTER, DAC_MUTE_PAGE)) { ESP_LOGE(TAG, "DAC failed to switch to mute page registers"); diff --git a/esphome/components/nabu/nabu_media_player.h b/esphome/components/nabu/nabu_media_player.h index 28a5530..149ee6b 100644 --- a/esphome/components/nabu/nabu_media_player.h +++ b/esphome/components/nabu/nabu_media_player.h @@ -64,9 +64,12 @@ class NabuMediaPlayer : public Component, // Sends commands to the media_control_commanda_queue_ void control(const media_player::MediaPlayerCall &call) override; - /// @return volume read from DAC between 0.0 and 1.0, if successful + /// @return Volume read from DAC between 0.0 and 1.0, if successful. Updates volume_ if publish is true. optional get_dac_volume_(bool publish = true); + /// @return Mute status read from DAC, if successful. Updates is_muted_ if publish is true. + optional get_dac_mute_(bool publish = true); + /// @return true if I2C writes were successful bool set_volume_(float volume, bool publish = true); From 75d7462c2bda3b104dcef922f4e455da873d5246 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Sun, 4 Aug 2024 09:05:17 -0400 Subject: [PATCH 12/19] raise error if bits per sample is too high for our optimized version --- esphome/components/nabu/flac_decoder.cpp | 6 ++++++ esphome/components/nabu/flac_decoder.h | 1 + 2 files changed, 7 insertions(+) diff --git a/esphome/components/nabu/flac_decoder.cpp b/esphome/components/nabu/flac_decoder.cpp index a17440a..8fe0bea 100644 --- a/esphome/components/nabu/flac_decoder.cpp +++ b/esphome/components/nabu/flac_decoder.cpp @@ -69,6 +69,12 @@ FLACDecoderResult FLACDecoder::read_header(size_t buffer_length) { return FLAC_DECODER_ERROR_BAD_HEADER; } + if (this->sample_depth_ > 16) { + // This decoder can support higher sample depths, but it would require using int32s throughout. We limit to 16 bits + // per sample for the sake of speed, as we can then use a quick esp-dsp function for the dot product calculation + return FLAC_DECODER_ERROR_UNSUPPORTED_BITS_PER_SAMPLE; + } + // Successfully read header return FLAC_DECODER_SUCCESS; } // read_header diff --git a/esphome/components/nabu/flac_decoder.h b/esphome/components/nabu/flac_decoder.h index ef2ec64..4ec33d6 100644 --- a/esphome/components/nabu/flac_decoder.h +++ b/esphome/components/nabu/flac_decoder.h @@ -38,6 +38,7 @@ enum FLACDecoderResult { FLAC_DECODER_ERROR_BAD_FIXED_PREDICTION_ORDER = 10, FLAC_DECODER_ERROR_RESERVED_RESIDUAL_CODING_METHOD = 11, FLAC_DECODER_ERROR_BLOCK_SIZE_NOT_DIVISIBLE_RICE = 12, + FLAC_DECODER_ERROR_UNSUPPORTED_BITS_PER_SAMPLE = 13, }; // Coefficients for fixed linear prediction From 47eb168371cace0f73a7b2732fef79bc0ce6ac5f Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Sun, 4 Aug 2024 09:20:39 -0400 Subject: [PATCH 13/19] block unprocessable streams --- esphome/components/nabu/audio_pipeline.cpp | 17 ++++++++++++----- esphome/components/nabu/audio_resampler.cpp | 9 ++++++++- esphome/components/nabu/audio_resampler.h | 6 +++++- esphome/components/nabu/nabu_media_player.cpp | 3 +-- 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp index a30e3e2..9c52fc4 100644 --- a/esphome/components/nabu/audio_pipeline.cpp +++ b/esphome/components/nabu/audio_pipeline.cpp @@ -50,14 +50,16 @@ AudioPipeline::AudioPipeline(AudioMixer *mixer, AudioPipelineType pipeline_type) this->event_group_ = xEventGroupCreate(); } -void AudioPipeline::start(const std::string &uri, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority) { +void AudioPipeline::start(const std::string &uri, uint32_t target_sample_rate, const std::string &task_name, + UBaseType_t priority) { this->common_start_(target_sample_rate, task_name, priority); this->current_uri_ = uri; xEventGroupSetBits(this->event_group_, READER_COMMAND_INIT_HTTP); } -void AudioPipeline::start(media_player::MediaFile *media_file, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority) { +void AudioPipeline::start(media_player::MediaFile *media_file, uint32_t target_sample_rate, + const std::string &task_name, UBaseType_t priority) { this->common_start_(target_sample_rate, task_name, priority); this->current_media_file_ = media_file; @@ -190,8 +192,9 @@ void AudioPipeline::decode_task_(void *params) { xEventGroupClearBits(this_pipeline->event_group_, EventGroupBits::DECODER_MESSAGE_FINISHED); { - AudioDecoder decoder = AudioDecoder(this_pipeline->raw_file_ring_buffer_.get(), - this_pipeline->decoded_ring_buffer_.get(), HTTP_BUFFER_SIZE);//BUFFER_SIZE_BYTES); + AudioDecoder decoder = + AudioDecoder(this_pipeline->raw_file_ring_buffer_.get(), this_pipeline->decoded_ring_buffer_.get(), + HTTP_BUFFER_SIZE); // BUFFER_SIZE_BYTES); decoder.start(this_pipeline->current_media_file_type_); bool has_stream_info = false; @@ -259,7 +262,11 @@ void AudioPipeline::resample_task_(void *params) { AudioResampler resampler = AudioResampler(this_pipeline->decoded_ring_buffer_.get(), output_ring_buffer, BUFFER_SIZE_SAMPLES); - resampler.start(this_pipeline->current_stream_info_, this_pipeline->target_sample_rate_); + if (!resampler.start(this_pipeline->current_stream_info_, this_pipeline->target_sample_rate_)) { + // Unsupported incoming audio stream + xEventGroupSetBits(this_pipeline->event_group_, + EventGroupBits::RESAMPLER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP); + } while (true) { event_bits = xEventGroupGetBits(this_pipeline->event_group_); diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp index f8ea717..1cf2765 100644 --- a/esphome/components/nabu/audio_resampler.cpp +++ b/esphome/components/nabu/audio_resampler.cpp @@ -48,7 +48,7 @@ AudioResampler::~AudioResampler() { } } -void AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate) { +bool AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate) { this->stream_info_ = stream_info; this->input_buffer_current_ = this->input_buffer_; @@ -63,6 +63,11 @@ void AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t targe this->needs_mono_to_stereo_ = (stream_info.channels != 2); + if ((stream_info.channels > 2) || (stream_info_.bits_per_sample != 16)) { + // TODO: Make these values configurable + return false; + } + if (stream_info.channels > 0) { this->channel_factor_ = 2 / stream_info.channels; printf("Converting %d channels to 2 channels\n", stream_info.channels); @@ -138,6 +143,8 @@ void AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t targe } else { this->needs_resampling_ = false; } + + return true; } AudioResamplerState AudioResampler::resample(bool stop_gracefully) { diff --git a/esphome/components/nabu/audio_resampler.h b/esphome/components/nabu/audio_resampler.h index 176a094..aa36086 100644 --- a/esphome/components/nabu/audio_resampler.h +++ b/esphome/components/nabu/audio_resampler.h @@ -30,7 +30,11 @@ class AudioResampler { size_t internal_buffer_samples); ~AudioResampler(); - void start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate); + /// @brief Sets up the various bits necessary to resample + /// @param stream_info the incoming sample rate, bits per sample, and number of channels + /// @param target_sample_rate the necessary sample rate to convert to + /// @return True if it convert the incoming stream, false otherwise + bool start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate); AudioResamplerState resample(bool stop_gracefully); diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp index 5cf0ed3..ee9fac0 100644 --- a/esphome/components/nabu/nabu_media_player.cpp +++ b/esphome/components/nabu/nabu_media_player.cpp @@ -18,8 +18,7 @@ namespace nabu { // - Output file type and stream information + any resampling processes // - Remove printf // - Log which part of an audio pipeline has an error -// - Verify we can handle the audio stream (no more than 2 channels, etc...) and abort if not -// - Clear mixer input buffer if an audio tream is stopped +// - Clear mixer input buffer if an audio stream is stopped // - Block media commands until the bluetooth stack is disabled (will run out of memory otherwise) // - Tune task memory requirements and potentially buffer sizes if issues appear // - Ducking improvements From fc1a5682ca8db809abef9e35cad32e8aac0a997b Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Sun, 4 Aug 2024 09:29:51 -0400 Subject: [PATCH 14/19] clear appropriate mixer buffer when stopping a pipeline --- esphome/components/nabu/audio_pipeline.cpp | 9 +++++++++ esphome/components/nabu/nabu_media_player.cpp | 1 - 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp index 9c52fc4..92deb75 100644 --- a/esphome/components/nabu/audio_pipeline.cpp +++ b/esphome/components/nabu/audio_pipeline.cpp @@ -110,6 +110,15 @@ void AudioPipeline::stop() { true, // Wait for all the bits, pdMS_TO_TICKS(200)); // Block temporarily before deleting each task + // Clear the ring buffer in the mixer; avoids playing incorrect audio when starting a new file while paused + CommandEvent command_event; + if (this->pipeline_type_ == AudioPipelineType::MEDIA) { + command_event.command = CommandEventType::CLEAR_MEDIA; + } else { + command_event.command = CommandEventType::CLEAR_ANNOUNCEMENT; + } + this->mixer_->send_command(&command_event); + xEventGroupClearBits(this->event_group_, ALL_BITS); this->reset_ring_buffers(); } diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp index ee9fac0..49147db 100644 --- a/esphome/components/nabu/nabu_media_player.cpp +++ b/esphome/components/nabu/nabu_media_player.cpp @@ -18,7 +18,6 @@ namespace nabu { // - Output file type and stream information + any resampling processes // - Remove printf // - Log which part of an audio pipeline has an error -// - Clear mixer input buffer if an audio stream is stopped // - Block media commands until the bluetooth stack is disabled (will run out of memory otherwise) // - Tune task memory requirements and potentially buffer sizes if issues appear // - Ducking improvements From 2fa940b5a3c233aaf370c397f7ee7edbf96e7eee Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Sun, 4 Aug 2024 09:48:16 -0400 Subject: [PATCH 15/19] log which pipeline element has an error --- esphome/components/nabu/audio_pipeline.cpp | 19 +++++++++++- esphome/components/nabu/audio_pipeline.h | 7 ++--- esphome/components/nabu/nabu_media_player.cpp | 29 +++++++++++++++---- 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp index 92deb75..e9520b1 100644 --- a/esphome/components/nabu/audio_pipeline.cpp +++ b/esphome/components/nabu/audio_pipeline.cpp @@ -92,7 +92,24 @@ AudioPipelineState AudioPipeline::get_state() { EventBits_t event_bits = xEventGroupGetBits(this->event_group_); if (!this->read_task_handle_ && !this->decode_task_handle_ && !this->resample_task_handle_) { return AudioPipelineState::STOPPED; - } else if ((event_bits & READER_MESSAGE_FINISHED) && (event_bits & DECODER_MESSAGE_FINISHED) && + } + + if ((event_bits & READER_MESSAGE_ERROR)) { + xEventGroupClearBits(this->event_group_, READER_MESSAGE_ERROR); + return AudioPipelineState::ERROR_READING; + } + + if ((event_bits & DECODER_MESSAGE_ERROR)) { + xEventGroupClearBits(this->event_group_, DECODER_MESSAGE_ERROR); + return AudioPipelineState::ERROR_DECODING; + } + + if ((event_bits & RESAMPLER_MESSAGE_ERROR)) { + xEventGroupClearBits(this->event_group_, RESAMPLER_MESSAGE_ERROR); + return AudioPipelineState::ERROR_RESAMPLING; + } + + if ((event_bits & READER_MESSAGE_FINISHED) && (event_bits & DECODER_MESSAGE_FINISHED) && (event_bits & RESAMPLER_MESSAGE_FINISHED)) { return AudioPipelineState::STOPPED; } diff --git a/esphome/components/nabu/audio_pipeline.h b/esphome/components/nabu/audio_pipeline.h index c11dc05..20a8d91 100644 --- a/esphome/components/nabu/audio_pipeline.h +++ b/esphome/components/nabu/audio_pipeline.h @@ -28,12 +28,11 @@ enum class AudioPipelineType : uint8_t { }; enum class AudioPipelineState : uint8_t { - STARTING, - STARTED, PLAYING, - PAUSED, - STOPPING, STOPPED, + ERROR_READING, + ERROR_DECODING, + ERROR_RESAMPLING, }; class AudioPipeline { diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp index 49147db..28bdec9 100644 --- a/esphome/components/nabu/nabu_media_player.cpp +++ b/esphome/components/nabu/nabu_media_player.cpp @@ -17,7 +17,6 @@ namespace nabu { // - Have better logging outputs // - Output file type and stream information + any resampling processes // - Remove printf -// - Log which part of an audio pipeline has an error // - Block media commands until the bluetooth stack is disabled (will run out of memory otherwise) // - Tune task memory requirements and potentially buffer sizes if issues appear // - Ducking improvements @@ -452,7 +451,7 @@ void NabuMediaPlayer::watch_media_commands_() { this->is_paused_ = false; break; case media_player::MEDIA_PLAYER_COMMAND_PAUSE: - if (this->media_pipeline_state_ == AudioPipelineState::PLAYING) { + if (!this->is_paused_) { command_event.command = CommandEventType::PAUSE_MEDIA; this->audio_mixer_->send_command(&command_event); } @@ -557,12 +556,31 @@ void NabuMediaPlayer::loop() { if (this->announcement_pipeline_ != nullptr) this->announcement_pipeline_state_ = this->announcement_pipeline_->get_state(); + + if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_READING) { + ESP_LOGE(TAG, "Encountered an error reading the announcement file"); + } + if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_DECODING) { + ESP_LOGE(TAG, "Encountered an error decoding the announcement file"); + } + if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_RESAMPLING) { + ESP_LOGE(TAG, "Encountered an error resampling the announcement file"); + } if (this->media_pipeline_ != nullptr) this->media_pipeline_state_ = this->media_pipeline_->get_state(); - if ((this->announcement_pipeline_state_ != AudioPipelineState::STOPPING) && - (this->announcement_pipeline_state_ != AudioPipelineState::STOPPED)) { + if (this->media_pipeline_state_ == AudioPipelineState::ERROR_READING) { + ESP_LOGE(TAG, "Encountered an error reading the media file"); + } + if (this->media_pipeline_state_ == AudioPipelineState::ERROR_DECODING) { + ESP_LOGE(TAG, "Encountered an error decoding the media file"); + } + if (this->media_pipeline_state_ == AudioPipelineState::ERROR_RESAMPLING) { + ESP_LOGE(TAG, "Encountered an error resampling the media file"); + } + + if (this->announcement_pipeline_state_ != AudioPipelineState::STOPPED) { this->state = media_player::MEDIA_PLAYER_STATE_ANNOUNCING; if (this->is_idle_muted_ && !this->is_muted_) { // this->unmute_(); @@ -576,8 +594,7 @@ void NabuMediaPlayer::loop() { // this->mute_(); this->is_idle_muted_ = true; } - } else if ((this->media_pipeline_state_ == AudioPipelineState::STOPPING) || - (this->media_pipeline_state_ == AudioPipelineState::STOPPED)) { + } else if (this->media_pipeline_state_ == AudioPipelineState::STOPPED) { this->state = media_player::MEDIA_PLAYER_STATE_IDLE; if (!this->is_idle_muted_) { // this->mute_(); From cb2b5ab60f85cb3cdde8525617b051e9bc37f20d Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Sun, 4 Aug 2024 10:07:56 -0400 Subject: [PATCH 16/19] never transfer more input samples than can be processed in 1 step --- esphome/components/nabu/audio_resampler.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp index 1cf2765..810a409 100644 --- a/esphome/components/nabu/audio_resampler.cpp +++ b/esphome/components/nabu/audio_resampler.cpp @@ -173,6 +173,17 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) { // Refill input buffer ////// + // Depending on if we are converting mono to stereo or if we are upsampling, we may need to restrict how many input samples to load + // Mono to stereo -> cut in half + // Upsampling -> reduce by a factor of the ceiling of sample_ratio_ + + size_t max_input_samples = this->internal_buffer_samples_; + + max_input_samples /= this->stream_info_.channels; + + uint32_t upsampling_factor = std::ceil(this->sample_ratio_); + max_input_samples /= upsampling_factor; + // Move old data to the start of the buffer if (this->input_buffer_length_ > 0) { memmove((void *) this->input_buffer_, (void *) this->input_buffer_current_, this->input_buffer_length_); @@ -182,7 +193,7 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) { // Copy new data to the end of the of the buffer size_t bytes_available = this->input_ring_buffer_->available(); size_t bytes_to_read = - std::min(bytes_available, this->internal_buffer_samples_ * sizeof(int16_t) - this->input_buffer_length_); + std::min(bytes_available, max_input_samples * sizeof(int16_t) - this->input_buffer_length_); if (bytes_to_read > 0) { int16_t *new_input_buffer_data = this->input_buffer_ + this->input_buffer_length_ / sizeof(int16_t); @@ -252,15 +263,12 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) { size_t samples_read = this->input_buffer_length_ / sizeof(int16_t); - // This is inefficient! It reconverts any samples that weren't used in the previous resampling run for (int i = 0; i < samples_read; ++i) { this->float_input_buffer_[i] = static_cast(this->input_buffer_[i]) / 32768.0f; } size_t frames_read = samples_read / this->stream_info_.channels; - // The low pass filter seems to be causing glitches... probably because samples are repeated due to the above - // ineffeciency! if (this->pre_filter_) { for (int i = 0; i < this->stream_info_.channels; ++i) { biquad_apply_buffer(&this->lowpass_[i][0], this->float_input_buffer_ + i, frames_read, From 7e5869d04a7db9fef73fa3c77fda12cfb46f3481 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Sun, 4 Aug 2024 11:46:34 -0400 Subject: [PATCH 17/19] correctly account for mono to stereo adjustment --- esphome/components/nabu/audio_resampler.cpp | 23 +++++++++++---------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp index 810a409..0ffba43 100644 --- a/esphome/components/nabu/audio_resampler.cpp +++ b/esphome/components/nabu/audio_resampler.cpp @@ -79,7 +79,8 @@ bool AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t targe // const uint8_t decimation = 48000 / 16000; // const float fir_out_offset = 0; //((FIR_FILTER_LENGTH / decimation / 2) - 1); - // int8_t shift = this->generate_q15_fir_coefficients_(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH, + // int8_t shift = this->generate_q15_fir_coefficients_(this->fir_filter_coeffecients_, (uint32_t) + // FIR_FILTER_LENGTH, // (float) 0.5 / decimation); // // dsps_16_array_rev(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH); // dsps_fird_init_s16(&this->fir_filter_, this->fir_filter_coeffecients_, this->fir_delay_, FIR_FILTER_LENGTH, @@ -87,7 +88,7 @@ bool AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t targe // this->decimation_filter_ = true; // this->needs_resampling_ = true; // // memset(this->fir_delay_, 0, FIR_FILTER_LENGTH*sizeof(int16_t)); - // } else + // } else { int flags = 0; @@ -173,14 +174,14 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) { // Refill input buffer ////// - // Depending on if we are converting mono to stereo or if we are upsampling, we may need to restrict how many input samples to load - // Mono to stereo -> cut in half - // Upsampling -> reduce by a factor of the ceiling of sample_ratio_ - + // Depending on if we are converting mono to stereo or if we are upsampling, we may need to restrict how many input + // samples we transfer size_t max_input_samples = this->internal_buffer_samples_; - max_input_samples /= this->stream_info_.channels; - + // Mono to stereo -> cut in half + max_input_samples /= (2 / this->stream_info_.channels); + + // Upsampling -> reduce by a factor of the ceiling of sample_ratio_ uint32_t upsampling_factor = std::ceil(this->sample_ratio_); max_input_samples /= upsampling_factor; @@ -192,8 +193,7 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) { // Copy new data to the end of the of the buffer size_t bytes_available = this->input_ring_buffer_->available(); - size_t bytes_to_read = - std::min(bytes_available, max_input_samples * sizeof(int16_t) - this->input_buffer_length_); + size_t bytes_to_read = std::min(bytes_available, max_input_samples * sizeof(int16_t) - this->input_buffer_length_); if (bytes_to_read > 0) { int16_t *new_input_buffer_data = this->input_buffer_ + this->input_buffer_length_ / sizeof(int16_t); @@ -226,7 +226,8 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) { } } else { // Interleaved stereo samples - // TODO: This doesn't sound correct! I need to use separate filters for each channel so the delay line isn't mixed + // TODO: This doesn't sound correct! I need to use separate filters for each channel so the delay line isn't + // mixed size_t available_samples = this->input_buffer_length_ / sizeof(int16_t); for (int i = 0; i < available_samples / 2; ++i) { // split interleaved samples into two separate streams From ed74ac31582cadaa38185c4aabac4a5ad723b177 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Sun, 4 Aug 2024 15:40:03 -0400 Subject: [PATCH 18/19] check for upsampling before scaling max input samples --- esphome/components/nabu/audio_resampler.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp index 0ffba43..ce740a4 100644 --- a/esphome/components/nabu/audio_resampler.cpp +++ b/esphome/components/nabu/audio_resampler.cpp @@ -181,9 +181,11 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) { // Mono to stereo -> cut in half max_input_samples /= (2 / this->stream_info_.channels); - // Upsampling -> reduce by a factor of the ceiling of sample_ratio_ - uint32_t upsampling_factor = std::ceil(this->sample_ratio_); - max_input_samples /= upsampling_factor; + if (this->sample_ratio_ > 1.0) { + // Upsampling -> reduce by a factor of the ceiling of sample_ratio_ + uint32_t upsampling_factor = std::ceil(this->sample_ratio_); + max_input_samples /= upsampling_factor; + } // Move old data to the start of the buffer if (this->input_buffer_length_ > 0) { From 9aefd4d4753c8135858a437d7030f1da8af9be03 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Mon, 5 Aug 2024 09:32:03 -0400 Subject: [PATCH 19/19] fix memory leak --- esphome/components/nabu/audio_resampler.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp index ce740a4..0203cc6 100644 --- a/esphome/components/nabu/audio_resampler.cpp +++ b/esphome/components/nabu/audio_resampler.cpp @@ -46,6 +46,8 @@ AudioResampler::~AudioResampler() { resampleFree(this->resampler_); this->resampler_ = nullptr; } + + // dsps_fird_s16_aexx_free(&this->fir_filter_); } bool AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate) { @@ -382,6 +384,7 @@ int8_t AudioResampler::generate_q15_fir_coefficients_(int16_t *fir_coeffs, const } free(fir_window); + free(float_coeffs); return shift; }