From 98844edec0c7c2f738b4f8413bfc45d97318b8c0 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Fri, 2 Aug 2024 06:14:41 -0400
Subject: [PATCH 01/19] quickly disable new special case fir filter

---
 esphome/components/nabu/audio_resampler.cpp | 29 +++++++++++----------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp
index 4860c28..836da69 100644
--- a/esphome/components/nabu/audio_resampler.cpp
+++ b/esphome/components/nabu/audio_resampler.cpp
@@ -69,20 +69,21 @@ void AudioResampler::start(media_player::StreamInfo &stream_info) {
   }
   constexpr float resample_rate = 16000.0f;
   if (stream_info.sample_rate != 16000) {
-    if (stream_info.sample_rate == 48000) {
-      // Special case, we can do this a lot faster with esp-dsp code!
-      const uint8_t decimation = 48000 / 16000;
-      const float fir_out_offset = 0;  //((FIR_FILTER_LENGTH / decimation / 2) - 1);
-
-      int8_t shift = this->generate_q15_fir_coefficients_(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH,
-                                                          (float) 0.5 / decimation);
-      // dsps_16_array_rev(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH);
-      dsps_fird_init_s16(&this->fir_filter_, this->fir_filter_coeffecients_, this->fir_delay_, FIR_FILTER_LENGTH,
-                         decimation, fir_out_offset, -shift);
-      this->decimation_filter_ = true;
-      this->needs_resampling_ = true;
-      // memset(this->fir_delay_, 0, FIR_FILTER_LENGTH*sizeof(int16_t));
-    } else {
+    // if (stream_info.sample_rate == 48000) {
+    //   // Special case, we can do this a lot faster with esp-dsp code!
+    //   const uint8_t decimation = 48000 / 16000;
+    //   const float fir_out_offset = 0;  //((FIR_FILTER_LENGTH / decimation / 2) - 1);
+
+    //   int8_t shift = this->generate_q15_fir_coefficients_(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH,
+    //                                                       (float) 0.5 / decimation);
+    //   // dsps_16_array_rev(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH);
+    //   dsps_fird_init_s16(&this->fir_filter_, this->fir_filter_coeffecients_, this->fir_delay_, FIR_FILTER_LENGTH,
+    //                      decimation, fir_out_offset, -shift);
+    //   this->decimation_filter_ = true;
+    //   this->needs_resampling_ = true;
+    //   // memset(this->fir_delay_, 0, FIR_FILTER_LENGTH*sizeof(int16_t));
+    // } else 
+    {
       int flags = 0;
 
       this->needs_resampling_ = true;

From a73759616a0547f9b2992c8b94a172338ddf032a Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Fri, 2 Aug 2024 06:34:38 -0400
Subject: [PATCH 02/19] fix pipeline stopped vs playing logic

---
 esphome/components/nabu/audio_pipeline.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp
index f95d270..6a77b75 100644
--- a/esphome/components/nabu/audio_pipeline.cpp
+++ b/esphome/components/nabu/audio_pipeline.cpp
@@ -88,7 +88,8 @@ AudioPipelineState AudioPipeline::get_state() {
   EventBits_t event_bits = xEventGroupGetBits(this->event_group_);
   if (!this->read_task_handle_ && !this->decode_task_handle_ && !this->resample_task_handle_) {
     return AudioPipelineState::STOPPED;
-  } else if (event_bits & (READER_MESSAGE_FINISHED | DECODER_MESSAGE_FINISHED | RESAMPLER_MESSAGE_FINISHED)) {
+  } else if ((event_bits & READER_MESSAGE_FINISHED) && (event_bits & DECODER_MESSAGE_FINISHED) &&
+             (event_bits & RESAMPLER_MESSAGE_FINISHED)) {
     return AudioPipelineState::STOPPED;
   }
 

From 5db296381277da56032890f206794fdfb8206ae8 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Fri, 2 Aug 2024 11:01:00 -0400
Subject: [PATCH 03/19] improve robustness of flac decoding

---
 esphome/components/nabu/audio_decoder.cpp | 25 ++++++-----
 esphome/components/nabu/flac_decoder.cpp  | 52 +++++++++++------------
 esphome/components/nabu/flac_decoder.h    | 32 +++++++-------
 3 files changed, 57 insertions(+), 52 deletions(-)

diff --git a/esphome/components/nabu/audio_decoder.cpp b/esphome/components/nabu/audio_decoder.cpp
index 11720a5..d844c54 100644
--- a/esphome/components/nabu/audio_decoder.cpp
+++ b/esphome/components/nabu/audio_decoder.cpp
@@ -112,7 +112,7 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
       // Try to decode more data
 
       // Shift unread data in input buffer to start
-      if ((this->input_buffer_length_ > 0) && (this->input_buffer_length_ < this->internal_buffer_size_)) {
+      if (this->input_buffer_length_ > 0) {
         memmove(this->input_buffer_, this->input_buffer_current_, this->input_buffer_length_);
       }
       this->input_buffer_current_ = this->input_buffer_;
@@ -123,8 +123,8 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
       size_t bytes_read = 0;
 
       if (bytes_to_read > 0) {
-        uint8_t *new_mp3_data = this->input_buffer_ + this->input_buffer_length_;
-        bytes_read = this->input_ring_buffer_->read((void *) new_mp3_data, bytes_to_read);
+        uint8_t *new_audio_data = this->input_buffer_ + this->input_buffer_length_;
+        bytes_read = this->input_ring_buffer_->read((void *) new_audio_data, bytes_to_read);
 
         this->input_buffer_length_ += bytes_read;
       }
@@ -153,6 +153,8 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
       ++this->potentially_failed_count_;
     } else if (state == FileDecoderState::END_OF_FILE) {
       this->end_of_file_ = true;
+    } else if (state == FileDecoderState::FAILED) {
+      return AudioDecoderState::FAILED;
     } else {
       this->potentially_failed_count_ = 0;
     }
@@ -191,9 +193,7 @@ FileDecoderState AudioDecoder::decode_wav_() {
 
           printf("sample channels: %d\n", this->channels_.value());
           printf("sample rate: %" PRId32 "\n", this->sample_rate_.value());
-          // printf("number of samples: %d\n",
-          //        this->wav_decoder_->chunk_bytes_left() / (this->channels_.value() * (this->bits_per_sample.value()
-          //        / 8)));
+          printf("bits per sample: %d\n", this->sample_depth_.value());
           this->wav_bytes_left_ = this->wav_decoder_->chunk_bytes_left();
           header_finished = true;
         } else if (result == wav_decoder::WAV_DECODER_SUCCESS_NEXT) {
@@ -280,14 +280,14 @@ FileDecoderState AudioDecoder::decode_flac_() {
     // Header hasn't been read
     auto result = this->flac_decoder_->read_header(this->input_buffer_length_);
 
-    if (result == flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
-      return FileDecoderState::POTENTIALLY_FAILED;
-    }
-
     size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
     this->input_buffer_current_ += bytes_consumed;
     this->input_buffer_length_ = this->flac_decoder_->get_bytes_left();
 
+    if (result == flac::FLAC_DECODER_HEADER_OUT_OF_DATA) {
+      return FileDecoderState::POTENTIALLY_FAILED;
+    }
+
     if (result != flac::FLAC_DECODER_SUCCESS) {
       printf("failed to read flac header. Error: %d\n", result);
       return FileDecoderState::FAILED;
@@ -303,7 +303,7 @@ FileDecoderState AudioDecoder::decode_flac_() {
       return FileDecoderState::FAILED;
     }
 
-    return FileDecoderState::MORE_TO_PROCESS;
+    return FileDecoderState::IDLE;
   }
 
   uint32_t output_samples = 0;
@@ -312,9 +312,12 @@ FileDecoderState AudioDecoder::decode_flac_() {
 
   if (result == flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
     // not an issue, just needs more data!
+    // TODO: How does this never happen? This may be related to not being able to find the sync word when using shorter
+    // input buffers
     return FileDecoderState::POTENTIALLY_FAILED;
   } else if (result > flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
     // Serious error, can't recover
+    printf("FLAC Decoder Error %d\n", result);
     return FileDecoderState::FAILED;
   }
 
diff --git a/esphome/components/nabu/flac_decoder.cpp b/esphome/components/nabu/flac_decoder.cpp
index 976ab3d..460876f 100644
--- a/esphome/components/nabu/flac_decoder.cpp
+++ b/esphome/components/nabu/flac_decoder.cpp
@@ -14,29 +14,29 @@ FLACDecoderResult FLACDecoder::read_header(size_t buffer_length) {
   this->buffer_index_ = 0;
   this->bytes_left_ = buffer_length;
 
-  if (this->out_of_data_) {
-    return FLAC_DECODER_ERROR_OUT_OF_DATA;
-  }
+  this->out_of_data_ = (buffer_length == 0);
 
-  // File must start with 'fLaC'
-  if (this->read_uint(32) != FLAC_MAGIC_NUMBER) {
-    return FLAC_DECODER_ERROR_BAD_MAGIC_NUMBER;
+  if (!this->partial_header_read_) {
+    // File must start with 'fLaC'
+    if (this->read_uint(32) != FLAC_MAGIC_NUMBER) {
+      return FLAC_DECODER_ERROR_BAD_MAGIC_NUMBER;
+    }
   }
 
-  // Read header blocks
-  bool last = false;
-  uint32_t type = 0;
-  uint32_t length = 0;
+  while (!this->partial_header_last_ || (this->partial_header_length_ > 0)) {
+    if (this->bytes_left_ == 0) {
+      // We'll try to finish reading it once more data is loaded
+      this->partial_header_read_ = true;
+      return FLAC_DECODER_HEADER_OUT_OF_DATA;
+    }
 
-  while (!last) {
-    if (this->out_of_data_) {
-      return FLAC_DECODER_ERROR_OUT_OF_DATA;
+    if (this->partial_header_length_ == 0) {
+      this->partial_header_last_ = this->read_uint(1) != 0;
+      this->partial_header_type_ = this->read_uint(7);
+      this->partial_header_length_ = this->read_uint(24);
     }
 
-    last = this->read_uint(1) != 0;
-    type = this->read_uint(7);
-    length = this->read_uint(24);
-    if (type == 0) {
+    if (this->partial_header_type_ == 0) {
       // Stream info block
       this->min_block_size_ = this->read_uint(16);
       this->max_block_size_ = this->read_uint(16);
@@ -48,18 +48,19 @@ FLACDecoderResult FLACDecoder::read_header(size_t buffer_length) {
       this->sample_depth_ = this->read_uint(5) + 1;
       this->num_samples_ = this->read_uint(36);
       this->read_uint(128);
+
+      this->partial_header_length_ = 0;
     } else {
       // Variable block
-      for (uint32_t i = 0; i < length; i++) {
-        this->read_uint(8);
-
-        // Exit early if we run out of data here
-        if (this->out_of_data_) {
-          return FLAC_DECODER_ERROR_OUT_OF_DATA;
+      while (this->partial_header_length_ > 0) {
+        if (this->bytes_left_ == 0) {
+          break;
         }
-      }  // for each byte in block
+        this->read_uint(8);
+        --this->partial_header_length_;
+      }
     }  // variable block
-  }  // while not last
+  }
 
   if ((this->sample_rate_ == 0) || (this->num_channels_ == 0) || (this->sample_depth_ == 0) ||
       (this->max_block_size_ == 0)) {
@@ -386,7 +387,6 @@ uint32_t FLACDecoder::read_uint(std::size_t num_bits) {
   while (this->bit_buffer_length_ < num_bits) {
     uint8_t next_byte = this->buffer_[this->buffer_index_];
     this->buffer_index_++;
-    this->buffer_total_read_++;
     this->bytes_left_--;
     if (this->bytes_left_ == 0) {
       this->out_of_data_ = true;
diff --git a/esphome/components/nabu/flac_decoder.h b/esphome/components/nabu/flac_decoder.h
index 0d050ad..ef2ec64 100644
--- a/esphome/components/nabu/flac_decoder.h
+++ b/esphome/components/nabu/flac_decoder.h
@@ -27,16 +27,17 @@ const static uint32_t FLAC_UINT_MASK[] = {
 enum FLACDecoderResult {
   FLAC_DECODER_SUCCESS = 0,
   FLAC_DECODER_NO_MORE_FRAMES = 1,
-  FLAC_DECODER_ERROR_OUT_OF_DATA = 2,
-  FLAC_DECODER_ERROR_BAD_MAGIC_NUMBER = 3,
-  FLAC_DECODER_ERROR_SYNC_NOT_FOUND = 4,
-  FLAC_DECODER_ERROR_BAD_BLOCK_SIZE_CODE = 5,
-  FLAC_DECODER_ERROR_BAD_HEADER = 6,
-  FLAC_DECODER_ERROR_RESERVED_CHANNEL_ASSIGNMENT = 7,
-  FLAC_DECODER_ERROR_RESERVED_SUBFRAME_TYPE = 8,
-  FLAC_DECODER_ERROR_BAD_FIXED_PREDICTION_ORDER = 9,
-  FLAC_DECODER_ERROR_RESERVED_RESIDUAL_CODING_METHOD = 10,
-  FLAC_DECODER_ERROR_BLOCK_SIZE_NOT_DIVISIBLE_RICE = 11,
+  FLAC_DECODER_HEADER_OUT_OF_DATA = 2,
+  FLAC_DECODER_ERROR_OUT_OF_DATA = 3,
+  FLAC_DECODER_ERROR_BAD_MAGIC_NUMBER = 4,
+  FLAC_DECODER_ERROR_SYNC_NOT_FOUND = 5,
+  FLAC_DECODER_ERROR_BAD_BLOCK_SIZE_CODE = 6,
+  FLAC_DECODER_ERROR_BAD_HEADER = 7,
+  FLAC_DECODER_ERROR_RESERVED_CHANNEL_ASSIGNMENT = 8,
+  FLAC_DECODER_ERROR_RESERVED_SUBFRAME_TYPE = 9,
+  FLAC_DECODER_ERROR_BAD_FIXED_PREDICTION_ORDER = 10,
+  FLAC_DECODER_ERROR_RESERVED_RESIDUAL_CODING_METHOD = 11,
+  FLAC_DECODER_ERROR_BLOCK_SIZE_NOT_DIVISIBLE_RICE = 12,
 };
 
 // Coefficients for fixed linear prediction
@@ -52,8 +53,7 @@ class FLACDecoder {
    * buffer_size - size of the data buffer
    * min_buffer_size - min bytes in buffer before fill_buffer is called
    */
-  FLACDecoder(uint8_t *buffer)
-      : buffer_(buffer) {}
+  FLACDecoder(uint8_t *buffer) : buffer_(buffer) {}
 
   ~FLACDecoder() { this->free_buffers(); }
 
@@ -129,9 +129,6 @@ class FLACDecoder {
   /* Next index to read from the input buffer. */
   std::size_t buffer_index_ = 0;
 
-  /* Total number of bytes read across frames (debugging). */
-  std::size_t buffer_total_read_ = 0;
-
   /* Number of byte that haven't been read from the input buffer yet. */
   std::size_t bytes_left_ = 0;
 
@@ -167,6 +164,11 @@ class FLACDecoder {
 
   /* Buffer of decoded samples at full precision (single channel). */
   std::vector<int16_t, esphome::ExternalRAMAllocator<int16_t>> block_result_;
+
+  bool partial_header_read_{false};
+  bool partial_header_last_{false};
+  uint32_t partial_header_type_{0};
+  uint32_t partial_header_length_{0};
 };
 
 }  // namespace flac

From 228dc296f5dc681351cd973d74742b77fa0ca344 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Fri, 2 Aug 2024 11:01:25 -0400
Subject: [PATCH 04/19] Increase http buffer size and allow setting output
 sample rate

---
 esphome/components/nabu/audio_pipeline.cpp    | 20 ++++++++++---------
 esphome/components/nabu/audio_pipeline.h      |  8 +++++---
 esphome/components/nabu/audio_resampler.cpp   | 10 +++++-----
 esphome/components/nabu/audio_resampler.h     |  2 +-
 esphome/components/nabu/media_player.py       |  3 +++
 esphome/components/nabu/nabu_media_player.cpp | 13 ++++++------
 esphome/components/nabu/nabu_media_player.h   |  2 ++
 7 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp
index 6a77b75..553739a 100644
--- a/esphome/components/nabu/audio_pipeline.cpp
+++ b/esphome/components/nabu/audio_pipeline.cpp
@@ -9,7 +9,7 @@ namespace nabu {
 
 static const size_t QUEUE_COUNT = 10;
 
-static const size_t HTTP_BUFFER_SIZE = 32 * 1024;
+static const size_t HTTP_BUFFER_SIZE = 128 * 1024;
 static const size_t BUFFER_SIZE_SAMPLES = 32768;
 static const size_t BUFFER_SIZE_BYTES = BUFFER_SIZE_SAMPLES * sizeof(int16_t);
 
@@ -50,21 +50,21 @@ AudioPipeline::AudioPipeline(AudioMixer *mixer, AudioPipelineType pipeline_type)
   this->event_group_ = xEventGroupCreate();
 }
 
-void AudioPipeline::start(const std::string &uri, const std::string &task_name, UBaseType_t priority) {
-  this->common_start_(task_name, priority);
+void AudioPipeline::start(const std::string &uri, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority) {
+  this->common_start_(target_sample_rate, task_name, priority);
 
   this->current_uri_ = uri;
   xEventGroupSetBits(this->event_group_, READER_COMMAND_INIT_HTTP);
 }
 
-void AudioPipeline::start(media_player::MediaFile *media_file, const std::string &task_name, UBaseType_t priority) {
-  this->common_start_(task_name, priority);
+void AudioPipeline::start(media_player::MediaFile *media_file, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority) {
+  this->common_start_(target_sample_rate, task_name, priority);
 
   this->current_media_file_ = media_file;
   xEventGroupSetBits(this->event_group_, READER_COMMAND_INIT_FILE);
 }
 
-void AudioPipeline::common_start_(const std::string &task_name, UBaseType_t priority) {
+void AudioPipeline::common_start_(uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority) {
   if (this->read_task_handle_ == nullptr) {
     this->read_task_handle_ =
         xTaskCreateStatic(AudioPipeline::read_task_, (task_name + "_read").c_str(), 8192, (void *) this, priority,
@@ -82,6 +82,8 @@ void AudioPipeline::common_start_(const std::string &task_name, UBaseType_t prio
   }
 
   this->stop();
+
+  this->target_sample_rate_ = target_sample_rate;
 }
 
 AudioPipelineState AudioPipeline::get_state() {
@@ -189,7 +191,7 @@ void AudioPipeline::decode_task_(void *params) {
 
     {
       AudioDecoder decoder = AudioDecoder(this_pipeline->raw_file_ring_buffer_.get(),
-                                          this_pipeline->decoded_ring_buffer_.get(), BUFFER_SIZE_BYTES);
+                                          this_pipeline->decoded_ring_buffer_.get(), HTTP_BUFFER_SIZE);//BUFFER_SIZE_BYTES);
       decoder.start(this_pipeline->current_media_file_type_);
 
       bool has_stream_info = false;
@@ -224,7 +226,7 @@ void AudioPipeline::decode_task_(void *params) {
         }
 
         // Block to give other tasks opportunity to run
-        delay(10);
+        delay(15);
       }
     }
   }
@@ -257,7 +259,7 @@ void AudioPipeline::resample_task_(void *params) {
       AudioResampler resampler =
           AudioResampler(this_pipeline->decoded_ring_buffer_.get(), output_ring_buffer, BUFFER_SIZE_SAMPLES);
 
-      resampler.start(this_pipeline->current_stream_info_);
+      resampler.start(this_pipeline->current_stream_info_, this_pipeline->target_sample_rate_);
 
       while (true) {
         event_bits = xEventGroupGetBits(this_pipeline->event_group_);
diff --git a/esphome/components/nabu/audio_pipeline.h b/esphome/components/nabu/audio_pipeline.h
index 4b1996f..c11dc05 100644
--- a/esphome/components/nabu/audio_pipeline.h
+++ b/esphome/components/nabu/audio_pipeline.h
@@ -40,8 +40,8 @@ class AudioPipeline {
  public:
   AudioPipeline(AudioMixer *mixer, AudioPipelineType pipeline_type);
 
-  void start(const std::string &uri, const std::string &task_name, UBaseType_t priority = 1);
-  void start(media_player::MediaFile *media_file, const std::string &task_name, UBaseType_t priority = 1);
+  void start(const std::string &uri, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority = 1);
+  void start(media_player::MediaFile *media_file, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority = 1);
 
   void stop();
 
@@ -50,7 +50,9 @@ class AudioPipeline {
   void reset_ring_buffers();
 
  protected:
-  void common_start_(const std::string &task_name, UBaseType_t priority);
+  void common_start_(uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority);
+
+  uint32_t target_sample_rate_;
 
   AudioMixer *mixer_;
 
diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp
index 836da69..f8ea717 100644
--- a/esphome/components/nabu/audio_resampler.cpp
+++ b/esphome/components/nabu/audio_resampler.cpp
@@ -48,7 +48,7 @@ AudioResampler::~AudioResampler() {
   }
 }
 
-void AudioResampler::start(media_player::StreamInfo &stream_info) {
+void AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate) {
   this->stream_info_ = stream_info;
 
   this->input_buffer_current_ = this->input_buffer_;
@@ -67,8 +67,8 @@ void AudioResampler::start(media_player::StreamInfo &stream_info) {
     this->channel_factor_ = 2 / stream_info.channels;
     printf("Converting %d channels to 2 channels\n", stream_info.channels);
   }
-  constexpr float resample_rate = 16000.0f;
-  if (stream_info.sample_rate != 16000) {
+
+  if (stream_info.sample_rate != target_sample_rate) {
     // if (stream_info.sample_rate == 48000) {
     //   // Special case, we can do this a lot faster with esp-dsp code!
     //   const uint8_t decimation = 48000 / 16000;
@@ -88,9 +88,9 @@ void AudioResampler::start(media_player::StreamInfo &stream_info) {
 
       this->needs_resampling_ = true;
 
-      this->sample_ratio_ = resample_rate / static_cast<float>(stream_info.sample_rate);
+      this->sample_ratio_ = static_cast<float>(target_sample_rate) / static_cast<float>(stream_info.sample_rate);
 
-      printf("Resampling from %d Hz to 16000 Hz\n", stream_info.sample_rate);
+      printf("Resampling from %d Hz to %d Hz\n", stream_info.sample_rate, target_sample_rate);
 
       if (this->sample_ratio_ < 1.0) {
         this->lowpass_ratio_ -= (10.24 / 16);
diff --git a/esphome/components/nabu/audio_resampler.h b/esphome/components/nabu/audio_resampler.h
index 1176c1a..176a094 100644
--- a/esphome/components/nabu/audio_resampler.h
+++ b/esphome/components/nabu/audio_resampler.h
@@ -30,7 +30,7 @@ class AudioResampler {
                  size_t internal_buffer_samples);
   ~AudioResampler();
 
-  void start(media_player::StreamInfo &stream_info);
+  void start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate);
 
   AudioResamplerState resample(bool stop_gracefully);
 
diff --git a/esphome/components/nabu/media_player.py b/esphome/components/nabu/media_player.py
index fb58299..7d7fe3b 100644
--- a/esphome/components/nabu/media_player.py
+++ b/esphome/components/nabu/media_player.py
@@ -46,6 +46,7 @@
 TYPE_WEB = "web"
 
 CONF_FILES = "files"
+CONF_SAMPLE_RATE = "sample_rate"
 
 nabu_ns = cg.esphome_ns.namespace("nabu")
 NabuMediaPlayer = nabu_ns.class_("NabuMediaPlayer")
@@ -176,6 +177,7 @@ def _file_schema(value):
         cv.GenerateID(): cv.declare_id(NabuMediaPlayer),
         cv.GenerateID(CONF_I2S_AUDIO_ID): cv.use_id(I2SAudioComponent),
         cv.Required(CONF_I2S_DOUT_PIN): pins.internal_gpio_output_pin_number,
+        cv.Optional(CONF_SAMPLE_RATE, default=16000): cv.int_range(min=1),
         cv.Optional(CONF_BITS_PER_SAMPLE, default="16bit"): cv.All(
             _validate_bits, cv.enum(BITS_PER_SAMPLE)
         ),
@@ -203,6 +205,7 @@ async def to_code(config):
     await cg.register_parented(var, config[CONF_I2S_AUDIO_ID])
     cg.add(var.set_dout_pin(config[CONF_I2S_DOUT_PIN]))
     cg.add(var.set_bits_per_sample(config[CONF_BITS_PER_SAMPLE]))
+    cg.add(var.set_sample_rate(config[CONF_SAMPLE_RATE]))
 
     if files_list := config.get(CONF_FILES):
         media_files = []
diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp
index f817d62..5995813 100644
--- a/esphome/components/nabu/nabu_media_player.cpp
+++ b/esphome/components/nabu/nabu_media_player.cpp
@@ -23,7 +23,6 @@ namespace nabu {
 //    - Create a registry of media files in Python
 //    - Add a yaml action to play a specific media file
 
-static const size_t SAMPLE_RATE_HZ = 16000;  // 16 kHz
 static const size_t QUEUE_COUNT = 20;
 static const size_t DMA_BUFFER_COUNT = 4;
 static const size_t DMA_BUFFER_SIZE = 512;
@@ -148,7 +147,7 @@ static void stats_task(void *arg) {
 static const char *const TAG = "nabu_media_player";
 
 void NabuMediaPlayer::setup() {
-  // xTaskCreatePinnedToCore(stats_task, "stats", 4096, NULL, STATS_TASK_PRIO, NULL, tskNO_AFFINITY);
+  xTaskCreatePinnedToCore(stats_task, "stats", 4096, NULL, STATS_TASK_PRIO, NULL, tskNO_AFFINITY);
 
   state = media_player::MEDIA_PLAYER_STATE_IDLE;
 
@@ -212,7 +211,7 @@ void NabuMediaPlayer::speaker_task(void *params) {
 
   i2s_driver_config_t config = {
       .mode = (i2s_mode_t) (this_speaker->parent_->get_i2s_mode() | I2S_MODE_TX),
-      .sample_rate = 16000,
+      .sample_rate = this_speaker->sample_rate_,
       .bits_per_sample = this_speaker->bits_per_sample_,
       .channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT,
       .communication_format = I2S_COMM_FORMAT_STAND_I2S,
@@ -347,13 +346,13 @@ void NabuMediaPlayer::watch_media_commands_() {
               make_unique<AudioPipeline>(this->audio_mixer_.get(), AudioPipelineType::ANNOUNCEMENT);
         }
 
-        this->announcement_pipeline_->start(this->announcement_url_.value(), "ann", 7);
+        this->announcement_pipeline_->start(this->announcement_url_.value(), this->sample_rate_, "ann", 7);
       } else {
         if (this->media_pipeline_ == nullptr) {
           this->media_pipeline_ = make_unique<AudioPipeline>(this->audio_mixer_.get(), AudioPipelineType::MEDIA);
         }
 
-        this->media_pipeline_->start(this->media_url_.value(), "media", 2);
+        this->media_pipeline_->start(this->media_url_.value(), this->sample_rate_, "media", 2);
 
         if (this->is_paused_) {
           CommandEvent command_event;
@@ -371,13 +370,13 @@ void NabuMediaPlayer::watch_media_commands_() {
               make_unique<AudioPipeline>(this->audio_mixer_.get(), AudioPipelineType::ANNOUNCEMENT);
         }
 
-        this->announcement_pipeline_->start(this->announcement_file_.value(), "ann", 7);
+        this->announcement_pipeline_->start(this->announcement_file_.value(), this->sample_rate_, "ann", 7);
       } else {
         if (this->media_pipeline_ == nullptr) {
           this->media_pipeline_ = make_unique<AudioPipeline>(this->audio_mixer_.get(), AudioPipelineType::MEDIA);
         }
 
-        this->media_pipeline_->start(this->media_file_.value(), "media", 2);
+        this->media_pipeline_->start(this->media_file_.value(), this->sample_rate_, "media", 5);
 
         if (this->is_paused_) {
           CommandEvent command_event;
diff --git a/esphome/components/nabu/nabu_media_player.h b/esphome/components/nabu/nabu_media_player.h
index 4ff4260..28a5530 100644
--- a/esphome/components/nabu/nabu_media_player.h
+++ b/esphome/components/nabu/nabu_media_player.h
@@ -57,6 +57,7 @@ class NabuMediaPlayer : public Component,
 
   void set_dout_pin(uint8_t pin) { this->dout_pin_ = pin; }
   void set_bits_per_sample(i2s_bits_per_sample_t bits_per_sample) { this->bits_per_sample_ = bits_per_sample; }
+  void set_sample_rate(uint32_t sample_rate) { this->sample_rate_ = sample_rate; }
 
  protected:
   // Receives commands from HA or from the voice assistant component
@@ -101,6 +102,7 @@ class NabuMediaPlayer : public Component,
   QueueHandle_t speaker_command_queue_;
 
   i2s_bits_per_sample_t bits_per_sample_;
+  uint32_t sample_rate_;
   uint8_t dout_pin_{0};
 
   bool is_paused_{false};

From 5d51d2574fd01d982f77a7aa46384d53c3551ab5 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Fri, 2 Aug 2024 13:24:30 -0400
Subject: [PATCH 05/19] fix flac sync errors after running out of data

---
 esphome/components/nabu/audio_decoder.cpp  | 17 ++++++++++-------
 esphome/components/nabu/audio_pipeline.cpp |  4 ++--
 esphome/components/nabu/flac_decoder.cpp   | 15 +++++++++++++++
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/esphome/components/nabu/audio_decoder.cpp b/esphome/components/nabu/audio_decoder.cpp
index d844c54..9a16d50 100644
--- a/esphome/components/nabu/audio_decoder.cpp
+++ b/esphome/components/nabu/audio_decoder.cpp
@@ -110,16 +110,21 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
       }
     } else {
       // Try to decode more data
+      size_t bytes_available = this->input_ring_buffer_->available();
+      size_t bytes_to_read = std::min(bytes_available, this->internal_buffer_size_ - this->input_buffer_length_);
+
+      if ((this->potentially_failed_count_ > 0) && (bytes_to_read == 0)) {
+        // We didn't have enough data last time, and we have no new data, so just return
+        return AudioDecoderState::DECODING;
+      }
 
       // Shift unread data in input buffer to start
       if (this->input_buffer_length_ > 0) {
         memmove(this->input_buffer_, this->input_buffer_current_, this->input_buffer_length_);
+        this->input_buffer_current_ = this->input_buffer_;
       }
-      this->input_buffer_current_ = this->input_buffer_;
 
       // read in new ring buffer data to fill the remaining input buffer
-      size_t bytes_available = this->input_ring_buffer_->available();
-      size_t bytes_to_read = std::min(bytes_available, this->internal_buffer_size_ - this->input_buffer_length_);
       size_t bytes_read = 0;
 
       if (bytes_to_read > 0) {
@@ -303,7 +308,7 @@ FileDecoderState AudioDecoder::decode_flac_() {
       return FileDecoderState::FAILED;
     }
 
-    return FileDecoderState::IDLE;
+    return FileDecoderState::MORE_TO_PROCESS;
   }
 
   uint32_t output_samples = 0;
@@ -311,9 +316,7 @@ FileDecoderState AudioDecoder::decode_flac_() {
       this->flac_decoder_->decode_frame(this->input_buffer_length_, (int16_t *) this->output_buffer_, &output_samples);
 
   if (result == flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
-    // not an issue, just needs more data!
-    // TODO: How does this never happen? This may be related to not being able to find the sync word when using shorter
-    // input buffers
+    // Not an issue, just needs more data that we'll get next time.
     return FileDecoderState::POTENTIALLY_FAILED;
   } else if (result > flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
     // Serious error, can't recover
diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp
index 553739a..a30e3e2 100644
--- a/esphome/components/nabu/audio_pipeline.cpp
+++ b/esphome/components/nabu/audio_pipeline.cpp
@@ -9,7 +9,7 @@ namespace nabu {
 
 static const size_t QUEUE_COUNT = 10;
 
-static const size_t HTTP_BUFFER_SIZE = 128 * 1024;
+static const size_t HTTP_BUFFER_SIZE = 64 * 1024;
 static const size_t BUFFER_SIZE_SAMPLES = 32768;
 static const size_t BUFFER_SIZE_BYTES = BUFFER_SIZE_SAMPLES * sizeof(int16_t);
 
@@ -226,7 +226,7 @@ void AudioPipeline::decode_task_(void *params) {
         }
 
         // Block to give other tasks opportunity to run
-        delay(15);
+        delay(10);
       }
     }
   }
diff --git a/esphome/components/nabu/flac_decoder.cpp b/esphome/components/nabu/flac_decoder.cpp
index 460876f..a17440a 100644
--- a/esphome/components/nabu/flac_decoder.cpp
+++ b/esphome/components/nabu/flac_decoder.cpp
@@ -13,6 +13,8 @@ namespace flac {
 FLACDecoderResult FLACDecoder::read_header(size_t buffer_length) {
   this->buffer_index_ = 0;
   this->bytes_left_ = buffer_length;
+  this->bit_buffer_ = 0;
+  this->bit_buffer_length_ = 0;
 
   this->out_of_data_ = (buffer_length == 0);
 
@@ -74,6 +76,8 @@ FLACDecoderResult FLACDecoder::read_header(size_t buffer_length) {
 FLACDecoderResult FLACDecoder::decode_frame(size_t buffer_length, int16_t *output_buffer, uint32_t *num_samples) {
   this->buffer_index_ = 0;
   this->bytes_left_ = buffer_length;
+  this->out_of_data_ = false;
+
   *num_samples = 0;
 
   if (!this->block_samples_) {
@@ -87,6 +91,9 @@ FLACDecoderResult FLACDecoder::decode_frame(size_t buffer_length, int16_t *outpu
     return FLAC_DECODER_NO_MORE_FRAMES;
   }
 
+  uint64_t previous_bit_buffer = this->bit_buffer_;
+  uint32_t previous_bit_buffer_length = this->bit_buffer_length_;
+
   // sync code
   if (this->read_uint(14) != 0x3FFE) {
     return FLAC_DECODER_ERROR_SYNC_NOT_FOUND;
@@ -108,6 +115,8 @@ FLACDecoderResult FLACDecoder::decode_frame(size_t buffer_length, int16_t *outpu
     next_int = (next_int << 1) & 0xFF;
 
     if (this->out_of_data_) {
+      this->bit_buffer_ = previous_bit_buffer;
+      this->bit_buffer_length_ = previous_bit_buffer_length;
       return FLAC_DECODER_ERROR_OUT_OF_DATA;
     }
   }
@@ -140,6 +149,12 @@ FLACDecoderResult FLACDecoder::decode_frame(size_t buffer_length, int16_t *outpu
   this->decode_subframes(block_size, this->sample_depth_, channel_assignment);
   *num_samples = block_size * this->num_channels_;
 
+  if (this->bytes_left_ < 2) {
+    this->bit_buffer_ = previous_bit_buffer;
+    this->bit_buffer_length_ = previous_bit_buffer_length;
+    return FLAC_DECODER_ERROR_OUT_OF_DATA;
+  }
+
   // Footer
   this->align_to_byte();
   this->read_uint(16);

From ea2b1878a20a63888d7a80c1f3999358f2dc16e3 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Fri, 2 Aug 2024 13:24:38 -0400
Subject: [PATCH 06/19] limit http transfer size per loop

---
 esphome/components/nabu/audio_reader.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/esphome/components/nabu/audio_reader.cpp b/esphome/components/nabu/audio_reader.cpp
index 915a02f..ea5725a 100644
--- a/esphome/components/nabu/audio_reader.cpp
+++ b/esphome/components/nabu/audio_reader.cpp
@@ -7,6 +7,8 @@
 namespace esphome {
 namespace nabu {
 
+  static const size_t MAX_HTTP_BYTES_TO_READ_PER_ITERATION = 4096;
+
 AudioReader::AudioReader(esphome::RingBuffer *output_ring_buffer, size_t transfer_buffer_size) {
   this->output_ring_buffer_ = output_ring_buffer;
 
@@ -101,6 +103,7 @@ AudioReaderState AudioReader::read() {
 AudioReaderState AudioReader::file_read_() {
   if (this->media_file_bytes_left_ > 0) {
     size_t bytes_to_write = std::min(this->media_file_bytes_left_, this->output_ring_buffer_->free());
+    bytes_to_write = std::min(bytes_to_write, MAX_HTTP_BYTES_TO_READ_PER_ITERATION);
 
     if (bytes_to_write == 0) {
       return AudioReaderState::READING;

From 830f56caf8eb532ffcb7aaddc5e8a74cf49296a4 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Fri, 2 Aug 2024 13:25:53 -0400
Subject: [PATCH 07/19] disable task debug logging

---
 esphome/components/nabu/nabu_media_player.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp
index 5995813..616777e 100644
--- a/esphome/components/nabu/nabu_media_player.cpp
+++ b/esphome/components/nabu/nabu_media_player.cpp
@@ -147,7 +147,7 @@ static void stats_task(void *arg) {
 static const char *const TAG = "nabu_media_player";
 
 void NabuMediaPlayer::setup() {
-  xTaskCreatePinnedToCore(stats_task, "stats", 4096, NULL, STATS_TASK_PRIO, NULL, tskNO_AFFINITY);
+  // xTaskCreatePinnedToCore(stats_task, "stats", 4096, NULL, STATS_TASK_PRIO, NULL, tskNO_AFFINITY);
 
   state = media_player::MEDIA_PLAYER_STATE_IDLE;
 

From 67f19a61733c5b050695126bb3fb9647362d53d2 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Fri, 2 Aug 2024 13:56:26 -0400
Subject: [PATCH 08/19] revert min http transfer size

---
 esphome/components/nabu/audio_reader.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/esphome/components/nabu/audio_reader.cpp b/esphome/components/nabu/audio_reader.cpp
index ea5725a..915a02f 100644
--- a/esphome/components/nabu/audio_reader.cpp
+++ b/esphome/components/nabu/audio_reader.cpp
@@ -7,8 +7,6 @@
 namespace esphome {
 namespace nabu {
 
-  static const size_t MAX_HTTP_BYTES_TO_READ_PER_ITERATION = 4096;
-
 AudioReader::AudioReader(esphome::RingBuffer *output_ring_buffer, size_t transfer_buffer_size) {
   this->output_ring_buffer_ = output_ring_buffer;
 
@@ -103,7 +101,6 @@ AudioReaderState AudioReader::read() {
 AudioReaderState AudioReader::file_read_() {
   if (this->media_file_bytes_left_ > 0) {
     size_t bytes_to_write = std::min(this->media_file_bytes_left_, this->output_ring_buffer_->free());
-    bytes_to_write = std::min(bytes_to_write, MAX_HTTP_BYTES_TO_READ_PER_ITERATION);
 
     if (bytes_to_write == 0) {
       return AudioReaderState::READING;

From 11cf4ee84b08583d14aeee3df5442f5e3a55060e Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Sun, 4 Aug 2024 07:59:07 -0400
Subject: [PATCH 09/19] always reset input_buffer_current

---
 esphome/components/nabu/audio_decoder.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/esphome/components/nabu/audio_decoder.cpp b/esphome/components/nabu/audio_decoder.cpp
index 9a16d50..7684fc8 100644
--- a/esphome/components/nabu/audio_decoder.cpp
+++ b/esphome/components/nabu/audio_decoder.cpp
@@ -121,8 +121,8 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
       // Shift unread data in input buffer to start
       if (this->input_buffer_length_ > 0) {
         memmove(this->input_buffer_, this->input_buffer_current_, this->input_buffer_length_);
-        this->input_buffer_current_ = this->input_buffer_;
       }
+      this->input_buffer_current_ = this->input_buffer_;
 
       // read in new ring buffer data to fill the remaining input buffer
       size_t bytes_read = 0;
@@ -231,11 +231,10 @@ FileDecoderState AudioDecoder::decode_wav_() {
       this->wav_bytes_left_ -= bytes_to_write;
     }
 
-    return FileDecoderState::MORE_TO_PROCESS;
+    return FileDecoderState::IDLE;
   }
 
   return FileDecoderState::END_OF_FILE;
-  // return DecoderState::FINISHED;
 }
 
 FileDecoderState AudioDecoder::decode_mp3_() {

From dddf4dedcca50145f09c47c9e73ec585efb4128d Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Sun, 4 Aug 2024 08:08:19 -0400
Subject: [PATCH 10/19] update TODOs and describe media player framework

---
 esphome/components/nabu/nabu_media_player.cpp | 50 ++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp
index 616777e..017a0bf 100644
--- a/esphome/components/nabu/nabu_media_player.cpp
+++ b/esphome/components/nabu/nabu_media_player.cpp
@@ -14,14 +14,62 @@ namespace esphome {
 namespace nabu {
 
 // TODO:
+//  - Have better logging outputs
+//    - Output file type and stream information + any resampling processes
+//    - Remove printf
+//    - Log which part of an audio pipeline has an error
+//  - Verify we can handle the audio stream (no more than 2 channels, etc...) and abort if not
+//  - Add I2C getting of mute status on boot
+//  - Clear mixer input buffer if an audio tream is stopped
+//  - Block media commands until the bluetooth stack is disabled (will run out of memory otherwise)
 //  - Tune task memory requirements and potentially buffer sizes if issues appear
-//  - Biquad filters work for downsampling without handling float buffer carefully, upsampling will require some care
 //  - Ducking improvements
 //    - Ducking ratio probably isn't the best way to specify, as volume perception is not linear
 //    - Add a YAML action for setting the ducking level instead of requiring a lambda
 //  - Clean up process around playing back local media files
 //    - Create a registry of media files in Python
+//    - What do I need to give them an ESPHome id?
 //    - Add a yaml action to play a specific media file
+//
+//
+// Framework:
+//  - Media player that can handle two streams; one for media and one for announcements
+//    - If played together, they are mixed with the announcement stream staying at full volume
+//    - The media audio can be further ducked via the ``set_ducking_ratio`` function
+//  - Each stream is handled by an ``AudioPipeline`` object with three parts/tasks
+//    - ``AudioReader`` handles reading from an HTTP source or from a PROGMEM flash set at compile time
+//    - ``AudioDecoder`` handles decoding the audio file. All formats are limited to two channels and 16 bits per sample
+//      - FLAC
+//      - WAV
+//      - MP3 (based on the libhelix decoder - a random mp3 file may be incompatible)
+//    - ``AudioResampler`` handles converting the sample rate to the configured output sample rate and converting mono
+//      to stereo
+//      - The quality is not good, and it is slow! Please send audio at the configured sample rate to avoid these issues
+//    - Each task will always run once started, but they will not doing anything until they are needed
+//    - FreeRTOS Event Groups make up the inter-task communication
+//    - The ``AudioPipeline`` sets up an output ring buffer for the Reader and Decoder parts. The next part/task
+//      automatically pulls from the previous ring buffer
+//  - The streams are mixed together in the ``AudioMixer`` task
+//    - Each stream has a corresponding input buffer that the ``AudioResampler`` feeds directly
+//    - Pausing the media stream is done here
+//    - Media stream ducking is done here
+//    - The output ring buffer feeds the ``speaker_task`` directly. It is kept small intentionally to avoid latency when
+//      pausing
+//  - Audio output is handled by the ``speaker_task``. It configures the I2S bus and copies audio from the mixer's
+//    output ring buffer to the DMA buffers
+//  - Media player commands are received by the ``control`` function. The commands are added to the
+//    ``media_control_command_queue_`` to be processed in the component's loop
+//    - Starting a stream intializes the appropriate pipeline or stops it if it is already running
+//    - Volume and mute commands are achieved by the ``mute``, ``unmute``, ``set_volume`` functions. They communicate
+//      directly with the DAC over I2C.
+//      - Volume commands are ignored if the media control queue is full to avoid crashing when the track wheel is spun
+//      fast
+//    - Pausing is sent to the ``AudioMixer`` task. It only effects the media stream.
+//  - The components main loop performs housekeeping:
+//    - It reads the media control queue and processes it directly
+//    - It watches the state of speaker and mixer tasks
+//    - It determines the overall state of the media player by considering the state of each pipeline
+//      - announcement playback takes highest priority
 
 static const size_t QUEUE_COUNT = 20;
 static const size_t DMA_BUFFER_COUNT = 4;

From 8a2c8bddf2a6a75cfbe779e6fcd5fcc6290e0973 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Sun, 4 Aug 2024 08:58:01 -0400
Subject: [PATCH 11/19] read mute status from dac at setup

---
 esphome/components/nabu/nabu_media_player.cpp | 31 +++++++++++++++++--
 esphome/components/nabu/nabu_media_player.h   |  5 ++-
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp
index 017a0bf..5cf0ed3 100644
--- a/esphome/components/nabu/nabu_media_player.cpp
+++ b/esphome/components/nabu/nabu_media_player.cpp
@@ -19,7 +19,6 @@ namespace nabu {
 //    - Remove printf
 //    - Log which part of an audio pipeline has an error
 //  - Verify we can handle the audio stream (no more than 2 channels, etc...) and abort if not
-//  - Add I2C getting of mute status on boot
 //  - Clear mixer input buffer if an audio tream is stopped
 //  - Block media commands until the bluetooth stack is disabled (will run out of memory otherwise)
 //  - Tune task memory requirements and potentially buffer sizes if issues appear
@@ -214,7 +213,10 @@ void NabuMediaPlayer::setup() {
 
   xTaskCreate(NabuMediaPlayer::speaker_task, "speaker_task", 3072, (void *) this, 23, &this->speaker_task_handle_);
 
-  this->get_dac_volume_();
+  if (!this->get_dac_volume_().has_value() || !this->get_dac_mute_().has_value()) {
+    ESP_LOGE(TAG, "Couldn't communicate with DAC");
+    this->mark_failed();
+  }
 
   // if (!this->write_byte(DAC_PAGE_SELECTION_REGISTER, 0x01)) {
   //   ESP_LOGE(TAG, "DAC failed to switch register page");
@@ -703,6 +705,31 @@ bool NabuMediaPlayer::set_volume_(float volume, bool publish) {
   return true;
 }
 
+optional<bool> NabuMediaPlayer::get_dac_mute_(bool publish) {
+  if (!this->write_byte(DAC_PAGE_SELECTION_REGISTER, DAC_MUTE_PAGE)) {
+    ESP_LOGE(TAG, "DAC failed to switch to mute page registers");
+    return {};
+  }
+
+  uint8_t dac_mute_left = 0;
+  uint8_t dac_mute_right = 0;
+  if (!this->read_byte(DAC_LEFT_MUTE_REGISTER, &dac_mute_left) ||
+      !this->read_byte(DAC_RIGHT_MUTE_REGISTER, &dac_mute_right)) {
+    ESP_LOGE(TAG, "DAC failed to read mute status");
+    return {};
+  }
+
+  bool is_muted = false;
+  if (dac_mute_left == DAC_MUTE_COMMAND && dac_mute_right == DAC_MUTE_COMMAND) {
+    is_muted = true;
+  }
+
+  if (publish) {
+    this->is_muted_ = is_muted;
+  }
+  return is_muted;
+}
+
 bool NabuMediaPlayer::mute_() {
   if (!this->write_byte(DAC_PAGE_SELECTION_REGISTER, DAC_MUTE_PAGE)) {
     ESP_LOGE(TAG, "DAC failed to switch to mute page registers");
diff --git a/esphome/components/nabu/nabu_media_player.h b/esphome/components/nabu/nabu_media_player.h
index 28a5530..149ee6b 100644
--- a/esphome/components/nabu/nabu_media_player.h
+++ b/esphome/components/nabu/nabu_media_player.h
@@ -64,9 +64,12 @@ class NabuMediaPlayer : public Component,
   // Sends commands to the media_control_commanda_queue_
   void control(const media_player::MediaPlayerCall &call) override;
 
-  /// @return volume read from DAC between 0.0 and 1.0, if successful
+  /// @return Volume read from DAC between 0.0 and 1.0, if successful. Updates volume_ if publish is true.
   optional<float> get_dac_volume_(bool publish = true);
 
+  /// @return Mute status read from DAC, if successful. Updates is_muted_ if publish is true.
+  optional<bool> get_dac_mute_(bool publish = true);
+
   /// @return true if I2C writes were successful
   bool set_volume_(float volume, bool publish = true);
 

From 75d7462c2bda3b104dcef922f4e455da873d5246 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Sun, 4 Aug 2024 09:05:17 -0400
Subject: [PATCH 12/19] raise error if bits per sample is too high for our
 optimized version

---
 esphome/components/nabu/flac_decoder.cpp | 6 ++++++
 esphome/components/nabu/flac_decoder.h   | 1 +
 2 files changed, 7 insertions(+)

diff --git a/esphome/components/nabu/flac_decoder.cpp b/esphome/components/nabu/flac_decoder.cpp
index a17440a..8fe0bea 100644
--- a/esphome/components/nabu/flac_decoder.cpp
+++ b/esphome/components/nabu/flac_decoder.cpp
@@ -69,6 +69,12 @@ FLACDecoderResult FLACDecoder::read_header(size_t buffer_length) {
     return FLAC_DECODER_ERROR_BAD_HEADER;
   }
 
+  if (this->sample_depth_ > 16) {
+    // This decoder can support higher sample depths, but it would require using int32s throughout. We limit to 16 bits
+    // per sample for the sake of speed, as we can then use a quick esp-dsp function for the dot product calculation
+    return FLAC_DECODER_ERROR_UNSUPPORTED_BITS_PER_SAMPLE;
+  }
+
   // Successfully read header
   return FLAC_DECODER_SUCCESS;
 }  // read_header
diff --git a/esphome/components/nabu/flac_decoder.h b/esphome/components/nabu/flac_decoder.h
index ef2ec64..4ec33d6 100644
--- a/esphome/components/nabu/flac_decoder.h
+++ b/esphome/components/nabu/flac_decoder.h
@@ -38,6 +38,7 @@ enum FLACDecoderResult {
   FLAC_DECODER_ERROR_BAD_FIXED_PREDICTION_ORDER = 10,
   FLAC_DECODER_ERROR_RESERVED_RESIDUAL_CODING_METHOD = 11,
   FLAC_DECODER_ERROR_BLOCK_SIZE_NOT_DIVISIBLE_RICE = 12,
+  FLAC_DECODER_ERROR_UNSUPPORTED_BITS_PER_SAMPLE = 13,
 };
 
 // Coefficients for fixed linear prediction

From 47eb168371cace0f73a7b2732fef79bc0ce6ac5f Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Sun, 4 Aug 2024 09:20:39 -0400
Subject: [PATCH 13/19] block unprocessable streams

---
 esphome/components/nabu/audio_pipeline.cpp    | 17 ++++++++++++-----
 esphome/components/nabu/audio_resampler.cpp   |  9 ++++++++-
 esphome/components/nabu/audio_resampler.h     |  6 +++++-
 esphome/components/nabu/nabu_media_player.cpp |  3 +--
 4 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp
index a30e3e2..9c52fc4 100644
--- a/esphome/components/nabu/audio_pipeline.cpp
+++ b/esphome/components/nabu/audio_pipeline.cpp
@@ -50,14 +50,16 @@ AudioPipeline::AudioPipeline(AudioMixer *mixer, AudioPipelineType pipeline_type)
   this->event_group_ = xEventGroupCreate();
 }
 
-void AudioPipeline::start(const std::string &uri, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority) {
+void AudioPipeline::start(const std::string &uri, uint32_t target_sample_rate, const std::string &task_name,
+                          UBaseType_t priority) {
   this->common_start_(target_sample_rate, task_name, priority);
 
   this->current_uri_ = uri;
   xEventGroupSetBits(this->event_group_, READER_COMMAND_INIT_HTTP);
 }
 
-void AudioPipeline::start(media_player::MediaFile *media_file, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority) {
+void AudioPipeline::start(media_player::MediaFile *media_file, uint32_t target_sample_rate,
+                          const std::string &task_name, UBaseType_t priority) {
   this->common_start_(target_sample_rate, task_name, priority);
 
   this->current_media_file_ = media_file;
@@ -190,8 +192,9 @@ void AudioPipeline::decode_task_(void *params) {
     xEventGroupClearBits(this_pipeline->event_group_, EventGroupBits::DECODER_MESSAGE_FINISHED);
 
     {
-      AudioDecoder decoder = AudioDecoder(this_pipeline->raw_file_ring_buffer_.get(),
-                                          this_pipeline->decoded_ring_buffer_.get(), HTTP_BUFFER_SIZE);//BUFFER_SIZE_BYTES);
+      AudioDecoder decoder =
+          AudioDecoder(this_pipeline->raw_file_ring_buffer_.get(), this_pipeline->decoded_ring_buffer_.get(),
+                       HTTP_BUFFER_SIZE);  // BUFFER_SIZE_BYTES);
       decoder.start(this_pipeline->current_media_file_type_);
 
       bool has_stream_info = false;
@@ -259,7 +262,11 @@ void AudioPipeline::resample_task_(void *params) {
       AudioResampler resampler =
           AudioResampler(this_pipeline->decoded_ring_buffer_.get(), output_ring_buffer, BUFFER_SIZE_SAMPLES);
 
-      resampler.start(this_pipeline->current_stream_info_, this_pipeline->target_sample_rate_);
+      if (!resampler.start(this_pipeline->current_stream_info_, this_pipeline->target_sample_rate_)) {
+        // Unsupported incoming audio stream
+        xEventGroupSetBits(this_pipeline->event_group_,
+                           EventGroupBits::RESAMPLER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
+      }
 
       while (true) {
         event_bits = xEventGroupGetBits(this_pipeline->event_group_);
diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp
index f8ea717..1cf2765 100644
--- a/esphome/components/nabu/audio_resampler.cpp
+++ b/esphome/components/nabu/audio_resampler.cpp
@@ -48,7 +48,7 @@ AudioResampler::~AudioResampler() {
   }
 }
 
-void AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate) {
+bool AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate) {
   this->stream_info_ = stream_info;
 
   this->input_buffer_current_ = this->input_buffer_;
@@ -63,6 +63,11 @@ void AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t targe
 
   this->needs_mono_to_stereo_ = (stream_info.channels != 2);
 
+  if ((stream_info.channels > 2) || (stream_info_.bits_per_sample != 16)) {
+    // TODO: Make these values configurable
+    return false;
+  }
+
   if (stream_info.channels > 0) {
     this->channel_factor_ = 2 / stream_info.channels;
     printf("Converting %d channels to 2 channels\n", stream_info.channels);
@@ -138,6 +143,8 @@ void AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t targe
   } else {
     this->needs_resampling_ = false;
   }
+
+  return true;
 }
 
 AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
diff --git a/esphome/components/nabu/audio_resampler.h b/esphome/components/nabu/audio_resampler.h
index 176a094..aa36086 100644
--- a/esphome/components/nabu/audio_resampler.h
+++ b/esphome/components/nabu/audio_resampler.h
@@ -30,7 +30,11 @@ class AudioResampler {
                  size_t internal_buffer_samples);
   ~AudioResampler();
 
-  void start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate);
+  /// @brief Sets up the various bits necessary to resample
+  /// @param stream_info the incoming sample rate, bits per sample, and number of channels
+  /// @param target_sample_rate the necessary sample rate to convert to
+  /// @return True if it convert the incoming stream, false otherwise
+  bool start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate);
 
   AudioResamplerState resample(bool stop_gracefully);
 
diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp
index 5cf0ed3..ee9fac0 100644
--- a/esphome/components/nabu/nabu_media_player.cpp
+++ b/esphome/components/nabu/nabu_media_player.cpp
@@ -18,8 +18,7 @@ namespace nabu {
 //    - Output file type and stream information + any resampling processes
 //    - Remove printf
 //    - Log which part of an audio pipeline has an error
-//  - Verify we can handle the audio stream (no more than 2 channels, etc...) and abort if not
-//  - Clear mixer input buffer if an audio tream is stopped
+//  - Clear mixer input buffer if an audio stream is stopped
 //  - Block media commands until the bluetooth stack is disabled (will run out of memory otherwise)
 //  - Tune task memory requirements and potentially buffer sizes if issues appear
 //  - Ducking improvements

From fc1a5682ca8db809abef9e35cad32e8aac0a997b Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Sun, 4 Aug 2024 09:29:51 -0400
Subject: [PATCH 14/19] clear appropriate mixer buffer when stopping a pipeline

---
 esphome/components/nabu/audio_pipeline.cpp    | 9 +++++++++
 esphome/components/nabu/nabu_media_player.cpp | 1 -
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp
index 9c52fc4..92deb75 100644
--- a/esphome/components/nabu/audio_pipeline.cpp
+++ b/esphome/components/nabu/audio_pipeline.cpp
@@ -110,6 +110,15 @@ void AudioPipeline::stop() {
       true,                                                                               // Wait for all the bits,
       pdMS_TO_TICKS(200));  // Block temporarily before deleting each task
 
+  // Clear the ring buffer in the mixer; avoids playing incorrect audio when starting a new file while paused
+  CommandEvent command_event;
+  if (this->pipeline_type_ == AudioPipelineType::MEDIA) {
+    command_event.command = CommandEventType::CLEAR_MEDIA;
+  } else {
+    command_event.command = CommandEventType::CLEAR_ANNOUNCEMENT;
+  }
+  this->mixer_->send_command(&command_event);
+
   xEventGroupClearBits(this->event_group_, ALL_BITS);
   this->reset_ring_buffers();
 }
diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp
index ee9fac0..49147db 100644
--- a/esphome/components/nabu/nabu_media_player.cpp
+++ b/esphome/components/nabu/nabu_media_player.cpp
@@ -18,7 +18,6 @@ namespace nabu {
 //    - Output file type and stream information + any resampling processes
 //    - Remove printf
 //    - Log which part of an audio pipeline has an error
-//  - Clear mixer input buffer if an audio stream is stopped
 //  - Block media commands until the bluetooth stack is disabled (will run out of memory otherwise)
 //  - Tune task memory requirements and potentially buffer sizes if issues appear
 //  - Ducking improvements

From 2fa940b5a3c233aaf370c397f7ee7edbf96e7eee Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Sun, 4 Aug 2024 09:48:16 -0400
Subject: [PATCH 15/19] log which pipeline element has an error

---
 esphome/components/nabu/audio_pipeline.cpp    | 19 +++++++++++-
 esphome/components/nabu/audio_pipeline.h      |  7 ++---
 esphome/components/nabu/nabu_media_player.cpp | 29 +++++++++++++++----
 3 files changed, 44 insertions(+), 11 deletions(-)

diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp
index 92deb75..e9520b1 100644
--- a/esphome/components/nabu/audio_pipeline.cpp
+++ b/esphome/components/nabu/audio_pipeline.cpp
@@ -92,7 +92,24 @@ AudioPipelineState AudioPipeline::get_state() {
   EventBits_t event_bits = xEventGroupGetBits(this->event_group_);
   if (!this->read_task_handle_ && !this->decode_task_handle_ && !this->resample_task_handle_) {
     return AudioPipelineState::STOPPED;
-  } else if ((event_bits & READER_MESSAGE_FINISHED) && (event_bits & DECODER_MESSAGE_FINISHED) &&
+  }
+  
+  if ((event_bits & READER_MESSAGE_ERROR)) {
+    xEventGroupClearBits(this->event_group_, READER_MESSAGE_ERROR);
+    return AudioPipelineState::ERROR_READING;
+  }
+  
+  if ((event_bits & DECODER_MESSAGE_ERROR)) {
+    xEventGroupClearBits(this->event_group_, DECODER_MESSAGE_ERROR);
+    return AudioPipelineState::ERROR_DECODING;
+  }
+
+  if ((event_bits & RESAMPLER_MESSAGE_ERROR)) {
+    xEventGroupClearBits(this->event_group_, RESAMPLER_MESSAGE_ERROR);
+    return AudioPipelineState::ERROR_RESAMPLING;
+  }
+
+  if ((event_bits & READER_MESSAGE_FINISHED) && (event_bits & DECODER_MESSAGE_FINISHED) &&
              (event_bits & RESAMPLER_MESSAGE_FINISHED)) {
     return AudioPipelineState::STOPPED;
   }
diff --git a/esphome/components/nabu/audio_pipeline.h b/esphome/components/nabu/audio_pipeline.h
index c11dc05..20a8d91 100644
--- a/esphome/components/nabu/audio_pipeline.h
+++ b/esphome/components/nabu/audio_pipeline.h
@@ -28,12 +28,11 @@ enum class AudioPipelineType : uint8_t {
 };
 
 enum class AudioPipelineState : uint8_t {
-  STARTING,
-  STARTED,
   PLAYING,
-  PAUSED,
-  STOPPING,
   STOPPED,
+  ERROR_READING,
+  ERROR_DECODING,
+  ERROR_RESAMPLING,
 };
 
 class AudioPipeline {
diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp
index 49147db..28bdec9 100644
--- a/esphome/components/nabu/nabu_media_player.cpp
+++ b/esphome/components/nabu/nabu_media_player.cpp
@@ -17,7 +17,6 @@ namespace nabu {
 //  - Have better logging outputs
 //    - Output file type and stream information + any resampling processes
 //    - Remove printf
-//    - Log which part of an audio pipeline has an error
 //  - Block media commands until the bluetooth stack is disabled (will run out of memory otherwise)
 //  - Tune task memory requirements and potentially buffer sizes if issues appear
 //  - Ducking improvements
@@ -452,7 +451,7 @@ void NabuMediaPlayer::watch_media_commands_() {
           this->is_paused_ = false;
           break;
         case media_player::MEDIA_PLAYER_COMMAND_PAUSE:
-          if (this->media_pipeline_state_ == AudioPipelineState::PLAYING) {
+          if (!this->is_paused_) {
             command_event.command = CommandEventType::PAUSE_MEDIA;
             this->audio_mixer_->send_command(&command_event);
           }
@@ -557,12 +556,31 @@ void NabuMediaPlayer::loop() {
 
   if (this->announcement_pipeline_ != nullptr)
     this->announcement_pipeline_state_ = this->announcement_pipeline_->get_state();
+  
+  if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_READING) {
+    ESP_LOGE(TAG, "Encountered an error reading the announcement file");
+  }
+  if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_DECODING) {
+    ESP_LOGE(TAG, "Encountered an error decoding the announcement file");
+  }
+  if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_RESAMPLING) {
+    ESP_LOGE(TAG, "Encountered an error resampling the announcement file");
+  }
 
   if (this->media_pipeline_ != nullptr)
     this->media_pipeline_state_ = this->media_pipeline_->get_state();
 
-  if ((this->announcement_pipeline_state_ != AudioPipelineState::STOPPING) &&
-      (this->announcement_pipeline_state_ != AudioPipelineState::STOPPED)) {
+  if (this->media_pipeline_state_ == AudioPipelineState::ERROR_READING) {
+    ESP_LOGE(TAG, "Encountered an error reading the media file");
+  }
+  if (this->media_pipeline_state_ == AudioPipelineState::ERROR_DECODING) {
+    ESP_LOGE(TAG, "Encountered an error decoding the media file");
+  }
+  if (this->media_pipeline_state_ == AudioPipelineState::ERROR_RESAMPLING) {
+    ESP_LOGE(TAG, "Encountered an error resampling the media file");
+  }
+
+  if (this->announcement_pipeline_state_ != AudioPipelineState::STOPPED) {
     this->state = media_player::MEDIA_PLAYER_STATE_ANNOUNCING;
     if (this->is_idle_muted_ && !this->is_muted_) {
       // this->unmute_();
@@ -576,8 +594,7 @@ void NabuMediaPlayer::loop() {
         // this->mute_();
         this->is_idle_muted_ = true;
       }
-    } else if ((this->media_pipeline_state_ == AudioPipelineState::STOPPING) ||
-               (this->media_pipeline_state_ == AudioPipelineState::STOPPED)) {
+    } else if (this->media_pipeline_state_ == AudioPipelineState::STOPPED) {
       this->state = media_player::MEDIA_PLAYER_STATE_IDLE;
       if (!this->is_idle_muted_) {
         // this->mute_();

From cb2b5ab60f85cb3cdde8525617b051e9bc37f20d Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Sun, 4 Aug 2024 10:07:56 -0400
Subject: [PATCH 16/19] never transfer more input samples than can be processed
 in 1 step

---
 esphome/components/nabu/audio_resampler.cpp | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp
index 1cf2765..810a409 100644
--- a/esphome/components/nabu/audio_resampler.cpp
+++ b/esphome/components/nabu/audio_resampler.cpp
@@ -173,6 +173,17 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
   // Refill input buffer
   //////
 
+  // Depending on if we are converting mono to stereo or if we are upsampling, we may need to restrict how many input samples to load
+  // Mono to stereo -> cut in half
+  // Upsampling -> reduce by a factor of the ceiling of sample_ratio_
+
+  size_t max_input_samples = this->internal_buffer_samples_;
+
+  max_input_samples /= this->stream_info_.channels;
+  
+  uint32_t upsampling_factor = std::ceil(this->sample_ratio_);
+  max_input_samples /= upsampling_factor;
+
   // Move old data to the start of the buffer
   if (this->input_buffer_length_ > 0) {
     memmove((void *) this->input_buffer_, (void *) this->input_buffer_current_, this->input_buffer_length_);
@@ -182,7 +193,7 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
   // Copy new data to the end of the of the buffer
   size_t bytes_available = this->input_ring_buffer_->available();
   size_t bytes_to_read =
-      std::min(bytes_available, this->internal_buffer_samples_ * sizeof(int16_t) - this->input_buffer_length_);
+      std::min(bytes_available, max_input_samples * sizeof(int16_t) - this->input_buffer_length_);
 
   if (bytes_to_read > 0) {
     int16_t *new_input_buffer_data = this->input_buffer_ + this->input_buffer_length_ / sizeof(int16_t);
@@ -252,15 +263,12 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
 
         size_t samples_read = this->input_buffer_length_ / sizeof(int16_t);
 
-        // This is inefficient! It reconverts any samples that weren't used in the previous resampling run
         for (int i = 0; i < samples_read; ++i) {
           this->float_input_buffer_[i] = static_cast<float>(this->input_buffer_[i]) / 32768.0f;
         }
 
         size_t frames_read = samples_read / this->stream_info_.channels;
 
-        // The low pass filter seems to be causing glitches... probably because samples are repeated due to the above
-        // ineffeciency!
         if (this->pre_filter_) {
           for (int i = 0; i < this->stream_info_.channels; ++i) {
             biquad_apply_buffer(&this->lowpass_[i][0], this->float_input_buffer_ + i, frames_read,

From 7e5869d04a7db9fef73fa3c77fda12cfb46f3481 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Sun, 4 Aug 2024 11:46:34 -0400
Subject: [PATCH 17/19] correctly account for mono to stereo adjustment

---
 esphome/components/nabu/audio_resampler.cpp | 23 +++++++++++----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp
index 810a409..0ffba43 100644
--- a/esphome/components/nabu/audio_resampler.cpp
+++ b/esphome/components/nabu/audio_resampler.cpp
@@ -79,7 +79,8 @@ bool AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t targe
     //   const uint8_t decimation = 48000 / 16000;
     //   const float fir_out_offset = 0;  //((FIR_FILTER_LENGTH / decimation / 2) - 1);
 
-    //   int8_t shift = this->generate_q15_fir_coefficients_(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH,
+    //   int8_t shift = this->generate_q15_fir_coefficients_(this->fir_filter_coeffecients_, (uint32_t)
+    //   FIR_FILTER_LENGTH,
     //                                                       (float) 0.5 / decimation);
     //   // dsps_16_array_rev(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH);
     //   dsps_fird_init_s16(&this->fir_filter_, this->fir_filter_coeffecients_, this->fir_delay_, FIR_FILTER_LENGTH,
@@ -87,7 +88,7 @@ bool AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t targe
     //   this->decimation_filter_ = true;
     //   this->needs_resampling_ = true;
     //   // memset(this->fir_delay_, 0, FIR_FILTER_LENGTH*sizeof(int16_t));
-    // } else 
+    // } else
     {
       int flags = 0;
 
@@ -173,14 +174,14 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
   // Refill input buffer
   //////
 
-  // Depending on if we are converting mono to stereo or if we are upsampling, we may need to restrict how many input samples to load
-  // Mono to stereo -> cut in half
-  // Upsampling -> reduce by a factor of the ceiling of sample_ratio_
-
+  // Depending on if we are converting mono to stereo or if we are upsampling, we may need to restrict how many input
+  // samples we transfer
   size_t max_input_samples = this->internal_buffer_samples_;
 
-  max_input_samples /= this->stream_info_.channels;
-  
+  // Mono to stereo -> cut in half
+  max_input_samples /= (2 / this->stream_info_.channels);
+
+  // Upsampling -> reduce by a factor of the ceiling of sample_ratio_
   uint32_t upsampling_factor = std::ceil(this->sample_ratio_);
   max_input_samples /= upsampling_factor;
 
@@ -192,8 +193,7 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
 
   // Copy new data to the end of the of the buffer
   size_t bytes_available = this->input_ring_buffer_->available();
-  size_t bytes_to_read =
-      std::min(bytes_available, max_input_samples * sizeof(int16_t) - this->input_buffer_length_);
+  size_t bytes_to_read = std::min(bytes_available, max_input_samples * sizeof(int16_t) - this->input_buffer_length_);
 
   if (bytes_to_read > 0) {
     int16_t *new_input_buffer_data = this->input_buffer_ + this->input_buffer_length_ / sizeof(int16_t);
@@ -226,7 +226,8 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
         }
       } else {
         // Interleaved stereo samples
-        // TODO: This doesn't sound correct! I need to use separate filters for each channel so the delay line isn't mixed
+        // TODO: This doesn't sound correct! I need to use separate filters for each channel so the delay line isn't
+        // mixed
         size_t available_samples = this->input_buffer_length_ / sizeof(int16_t);
         for (int i = 0; i < available_samples / 2; ++i) {
           // split interleaved samples into two separate streams

From ed74ac31582cadaa38185c4aabac4a5ad723b177 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Sun, 4 Aug 2024 15:40:03 -0400
Subject: [PATCH 18/19] check for upsampling before scaling max input samples

---
 esphome/components/nabu/audio_resampler.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp
index 0ffba43..ce740a4 100644
--- a/esphome/components/nabu/audio_resampler.cpp
+++ b/esphome/components/nabu/audio_resampler.cpp
@@ -181,9 +181,11 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
   // Mono to stereo -> cut in half
   max_input_samples /= (2 / this->stream_info_.channels);
 
-  // Upsampling -> reduce by a factor of the ceiling of sample_ratio_
-  uint32_t upsampling_factor = std::ceil(this->sample_ratio_);
-  max_input_samples /= upsampling_factor;
+  if (this->sample_ratio_ > 1.0) {
+    // Upsampling -> reduce by a factor of the ceiling of sample_ratio_
+    uint32_t upsampling_factor = std::ceil(this->sample_ratio_);
+    max_input_samples /= upsampling_factor;
+  }
 
   // Move old data to the start of the buffer
   if (this->input_buffer_length_ > 0) {

From 9aefd4d4753c8135858a437d7030f1da8af9be03 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Mon, 5 Aug 2024 09:32:03 -0400
Subject: [PATCH 19/19] fix memory leak

---
 esphome/components/nabu/audio_resampler.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp
index ce740a4..0203cc6 100644
--- a/esphome/components/nabu/audio_resampler.cpp
+++ b/esphome/components/nabu/audio_resampler.cpp
@@ -46,6 +46,8 @@ AudioResampler::~AudioResampler() {
     resampleFree(this->resampler_);
     this->resampler_ = nullptr;
   }
+
+  // dsps_fird_s16_aexx_free(&this->fir_filter_);
 }
 
 bool AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate) {
@@ -382,6 +384,7 @@ int8_t AudioResampler::generate_q15_fir_coefficients_(int16_t *fir_coeffs, const
   }
 
   free(fir_window);
+  free(float_coeffs);
 
   return shift;
 }