From 5488cf07d18c9f16014a4e0b0b3dbe9adb607a87 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Thu, 1 Aug 2024 13:26:12 -0400 Subject: [PATCH 1/2] Simplify media player and better manage memory (#28) * use psram for spectrogram generation buffers * refactor media player * IDF 5+ fix * reimplement volume fixes --- .../components/media_player/media_player.h | 10 + .../components/micro_wake_word/__init__.py | 3 +- esphome/components/nabu/audio_decoder.cpp | 339 +++++++++++++++ esphome/components/nabu/audio_decoder.h | 79 ++++ .../{combine_streamer.cpp => audio_mixer.cpp} | 72 ++-- .../nabu/{streamer.h => audio_mixer.h} | 90 ++-- esphome/components/nabu/audio_pipeline.cpp | 288 +++++++++++++ esphome/components/nabu/audio_pipeline.h | 90 ++++ esphome/components/nabu/audio_reader.cpp | 152 +++++++ esphome/components/nabu/audio_reader.h | 49 +++ esphome/components/nabu/audio_resampler.cpp | 373 ++++++++++++++++ esphome/components/nabu/audio_resampler.h | 89 ++++ esphome/components/nabu/biquad.h | 1 + esphome/components/nabu/combine_streamer.h | 50 --- esphome/components/nabu/decode_streamer.cpp | 399 ------------------ esphome/components/nabu/decode_streamer.h | 37 -- esphome/components/nabu/flac_decoder.cpp | 83 ++-- esphome/components/nabu/flac_decoder.h | 80 ++-- esphome/components/nabu/media_player.py | 3 +- esphome/components/nabu/nabu_media_player.cpp | 251 +++-------- esphome/components/nabu/nabu_media_player.h | 35 +- esphome/components/nabu/pipeline.cpp | 287 ------------- esphome/components/nabu/pipeline.h | 64 --- esphome/components/nabu/resample_streamer.cpp | 354 ---------------- esphome/components/nabu/resample_streamer.h | 36 -- esphome/components/nabu/streamer.cpp | 286 ------------- esphome/components/nabu/wav_decoder.cpp | 20 +- esphome/components/nabu/wav_decoder.h | 4 +- voice-kit.yaml | 1 + 29 files changed, 1692 insertions(+), 1933 deletions(-) create mode 100644 esphome/components/nabu/audio_decoder.cpp create mode 100644 esphome/components/nabu/audio_decoder.h rename esphome/components/nabu/{combine_streamer.cpp => audio_mixer.cpp} (73%) rename esphome/components/nabu/{streamer.h => audio_mixer.h} (54%) create mode 100644 esphome/components/nabu/audio_pipeline.cpp create mode 100644 esphome/components/nabu/audio_pipeline.h create mode 100644 esphome/components/nabu/audio_reader.cpp create mode 100644 esphome/components/nabu/audio_reader.h create mode 100644 esphome/components/nabu/audio_resampler.cpp create mode 100644 esphome/components/nabu/audio_resampler.h delete mode 100644 esphome/components/nabu/combine_streamer.h delete mode 100644 esphome/components/nabu/decode_streamer.cpp delete mode 100644 esphome/components/nabu/decode_streamer.h delete mode 100644 esphome/components/nabu/pipeline.cpp delete mode 100644 esphome/components/nabu/pipeline.h delete mode 100644 esphome/components/nabu/resample_streamer.cpp delete mode 100644 esphome/components/nabu/resample_streamer.h delete mode 100644 esphome/components/nabu/streamer.cpp diff --git a/esphome/components/media_player/media_player.h b/esphome/components/media_player/media_player.h index f3d765f..761523e 100644 --- a/esphome/components/media_player/media_player.h +++ b/esphome/components/media_player/media_player.h @@ -6,6 +6,16 @@ namespace esphome { namespace media_player { +struct StreamInfo { + bool operator==(const StreamInfo &rhs) const { + return (channels == rhs.channels) && (bits_per_sample == rhs.bits_per_sample) && (sample_rate == rhs.sample_rate); + } + bool operator!=(const StreamInfo &rhs) const { return !operator==(rhs); } + uint8_t channels = 1; + uint8_t bits_per_sample = 16; + uint32_t sample_rate = 16000; +}; + enum MediaPlayerState : uint8_t { MEDIA_PLAYER_STATE_NONE = 0, MEDIA_PLAYER_STATE_IDLE = 1, diff --git a/esphome/components/micro_wake_word/__init__.py b/esphome/components/micro_wake_word/__init__.py index c2faca2..a95d65a 100644 --- a/esphome/components/micro_wake_word/__init__.py +++ b/esphome/components/micro_wake_word/__init__.py @@ -502,7 +502,8 @@ async def to_code(config): ) cg.add(var.set_features_step_size(manifest[KEY_MICRO][CONF_FEATURE_STEP_SIZE])) - cg.add_library("kahrendt/ESPMicroSpeechFeatures", "1.0.0") + cg.add_library(None,None,"https://github.com/kahrendt/ESPMicroSpeechFeatures.git#psram-allocations") + # cg.add_library("kahrendt/ESPMicroSpeechFeatures", "1.0.0") MICRO_WAKE_WORD_ACTION_SCHEMA = cv.Schema({cv.GenerateID(): cv.use_id(MicroWakeWord)}) diff --git a/esphome/components/nabu/audio_decoder.cpp b/esphome/components/nabu/audio_decoder.cpp new file mode 100644 index 0000000..11720a5 --- /dev/null +++ b/esphome/components/nabu/audio_decoder.cpp @@ -0,0 +1,339 @@ +#ifdef USE_ESP_IDF + +#include "audio_decoder.h" + +#include "mp3_decoder.h" + +#include "esphome/core/ring_buffer.h" + +namespace esphome { +namespace nabu { + +AudioDecoder::AudioDecoder(RingBuffer *input_ring_buffer, RingBuffer *output_ring_buffer, size_t internal_buffer_size) { + this->input_ring_buffer_ = input_ring_buffer; + this->output_ring_buffer_ = output_ring_buffer; + this->internal_buffer_size_ = internal_buffer_size; + + ExternalRAMAllocator allocator(ExternalRAMAllocator::ALLOW_FAILURE); + this->input_buffer_ = allocator.allocate(internal_buffer_size); + this->output_buffer_ = allocator.allocate(internal_buffer_size); +} + +AudioDecoder::~AudioDecoder() { + ExternalRAMAllocator allocator(ExternalRAMAllocator::ALLOW_FAILURE); + if (this->input_buffer_ != nullptr) { + allocator.deallocate(this->input_buffer_, this->internal_buffer_size_); + } + if (this->output_buffer_ != nullptr) { + allocator.deallocate(this->output_buffer_, this->internal_buffer_size_); + } + + if (this->flac_decoder_ != nullptr) { + this->flac_decoder_->free_buffers(); + delete this->flac_decoder_; + this->flac_decoder_ = nullptr; + } + + if (this->wav_decoder_ != nullptr) { + delete this->wav_decoder_; + this->wav_decoder_ = nullptr; + } + + if (this->media_file_type_ == media_player::MediaFileType::MP3) { + MP3FreeDecoder(this->mp3_decoder_); + } +} + +void AudioDecoder::start(media_player::MediaFileType media_file_type) { + this->media_file_type_ = media_file_type; + + this->input_buffer_current_ = this->input_buffer_; + this->input_buffer_length_ = 0; + this->output_buffer_current_ = this->output_buffer_; + this->output_buffer_length_ = 0; + + this->potentially_failed_count_ = 0; + this->end_of_file_ = false; + + switch (this->media_file_type_) { + case media_player::MediaFileType::WAV: + this->wav_decoder_ = new wav_decoder::WAVDecoder(&this->input_buffer_current_); + this->wav_decoder_->reset(); + break; + case media_player::MediaFileType::MP3: + this->mp3_decoder_ = MP3InitDecoder(); + break; + case media_player::MediaFileType::FLAC: + this->flac_decoder_ = new flac::FLACDecoder(this->input_buffer_); + break; + case media_player::MediaFileType::NONE: + break; + } +} + +AudioDecoderState AudioDecoder::decode(bool stop_gracefully) { + if (stop_gracefully) { + if (this->output_buffer_length_ == 0) { + // If the file decoder believes it the end of file + if (this->end_of_file_) { + return AudioDecoderState::FINISHED; + } + // If all the internal buffers are empty, the decoding is done + if ((this->input_ring_buffer_->available() == 0) && (this->input_buffer_length_ == 0)) { + return AudioDecoderState::FINISHED; + } + } + } + + if (this->potentially_failed_count_ > 5) { + return AudioDecoderState::FAILED; + } + + FileDecoderState state = FileDecoderState::MORE_TO_PROCESS; + + while (state == FileDecoderState::MORE_TO_PROCESS) { + if (this->output_buffer_length_ > 0) { + // Have decoded data, feed into output ring buffer + size_t bytes_free = this->output_ring_buffer_->free(); + size_t bytes_to_write = std::min(this->output_buffer_length_, bytes_free); + + if (bytes_to_write > 0) { + size_t bytes_written = this->output_ring_buffer_->write((void *) this->output_buffer_current_, bytes_to_write); + + this->output_buffer_length_ -= bytes_written; + this->output_buffer_current_ += bytes_written; + } + + if (this->output_buffer_length_ > 0) { + // Output ring buffer is full, so we can't do any more processing + return AudioDecoderState::DECODING; + } + } else { + // Try to decode more data + + // Shift unread data in input buffer to start + if ((this->input_buffer_length_ > 0) && (this->input_buffer_length_ < this->internal_buffer_size_)) { + memmove(this->input_buffer_, this->input_buffer_current_, this->input_buffer_length_); + } + this->input_buffer_current_ = this->input_buffer_; + + // read in new ring buffer data to fill the remaining input buffer + size_t bytes_available = this->input_ring_buffer_->available(); + size_t bytes_to_read = std::min(bytes_available, this->internal_buffer_size_ - this->input_buffer_length_); + size_t bytes_read = 0; + + if (bytes_to_read > 0) { + uint8_t *new_mp3_data = this->input_buffer_ + this->input_buffer_length_; + bytes_read = this->input_ring_buffer_->read((void *) new_mp3_data, bytes_to_read); + + this->input_buffer_length_ += bytes_read; + } + + if (this->input_buffer_length_ == 0) { + // No input data available, so we can't do any more processing + state = FileDecoderState::IDLE; + } else { + switch (this->media_file_type_) { + case media_player::MediaFileType::WAV: + state = this->decode_wav_(); + break; + case media_player::MediaFileType::MP3: + state = this->decode_mp3_(); + break; + case media_player::MediaFileType::FLAC: + state = this->decode_flac_(); + break; + case media_player::MediaFileType::NONE: + state = FileDecoderState::IDLE; + break; + } + } + } + if (state == FileDecoderState::POTENTIALLY_FAILED) { + ++this->potentially_failed_count_; + } else if (state == FileDecoderState::END_OF_FILE) { + this->end_of_file_ = true; + } else { + this->potentially_failed_count_ = 0; + } + } + return AudioDecoderState::DECODING; +} + +FileDecoderState AudioDecoder::decode_wav_() { + if (!this->channels_.has_value() && (this->input_buffer_length_ > 44)) { + // Header hasn't been processed + + size_t original_buffer_length = this->input_buffer_length_; + + size_t wav_bytes_to_skip = this->wav_decoder_->bytes_to_skip(); + size_t wav_bytes_to_read = this->wav_decoder_->bytes_needed(); + + bool header_finished = false; + while (!header_finished) { + if (wav_bytes_to_skip > 0) { + // Adjust pointer to skip the appropriate bytes + this->input_buffer_current_ += wav_bytes_to_skip; + this->input_buffer_length_ -= wav_bytes_to_skip; + wav_bytes_to_skip = 0; + } else if (wav_bytes_to_read > 0) { + wav_decoder::WAVDecoderResult result = this->wav_decoder_->next(); + this->input_buffer_current_ += wav_bytes_to_read; + this->input_buffer_length_ -= wav_bytes_to_read; + + if (result == wav_decoder::WAV_DECODER_SUCCESS_IN_DATA) { + // Header parsing is complete + + // Assume PCM + this->channels_ = this->wav_decoder_->num_channels(); + this->sample_rate_ = this->wav_decoder_->sample_rate(); + this->sample_depth_ = this->wav_decoder_->bits_per_sample(); + + printf("sample channels: %d\n", this->channels_.value()); + printf("sample rate: %" PRId32 "\n", this->sample_rate_.value()); + // printf("number of samples: %d\n", + // this->wav_decoder_->chunk_bytes_left() / (this->channels_.value() * (this->bits_per_sample.value() + // / 8))); + this->wav_bytes_left_ = this->wav_decoder_->chunk_bytes_left(); + header_finished = true; + } else if (result == wav_decoder::WAV_DECODER_SUCCESS_NEXT) { + // Continue parsing header + wav_bytes_to_skip = this->wav_decoder_->bytes_to_skip(); + wav_bytes_to_read = this->wav_decoder_->bytes_needed(); + } else { + printf("Unexpected error while parsing WAV header: %d\n", result); + return FileDecoderState::FAILED; + } + } else { + // Something unexpected has happened + // Reset state and hope we have enough info next time + this->input_buffer_length_ = original_buffer_length; + this->input_buffer_current_ = this->input_buffer_; + return FileDecoderState::POTENTIALLY_FAILED; + } + } + } + + if (this->wav_bytes_left_ > 0) { + size_t bytes_to_write = std::min(this->wav_bytes_left_, this->input_buffer_length_); + bytes_to_write = std::min(bytes_to_write, this->internal_buffer_size_); + if (bytes_to_write > 0) { + std::memcpy(this->output_buffer_, this->input_buffer_current_, bytes_to_write); + this->input_buffer_current_ += bytes_to_write; + this->input_buffer_length_ -= bytes_to_write; + this->output_buffer_current_ = this->output_buffer_; + this->output_buffer_length_ = bytes_to_write; + this->wav_bytes_left_ -= bytes_to_write; + } + + return FileDecoderState::MORE_TO_PROCESS; + } + + return FileDecoderState::END_OF_FILE; + // return DecoderState::FINISHED; +} + +FileDecoderState AudioDecoder::decode_mp3_() { + // Look for the next sync word + int32_t offset = MP3FindSyncWord(this->input_buffer_current_, this->input_buffer_length_); + if (offset < 0) { + // We may recover if we have more data + return FileDecoderState::POTENTIALLY_FAILED; + } + + // Advance read pointer + this->input_buffer_current_ += offset; + this->input_buffer_length_ -= offset; + + int err = MP3Decode(this->mp3_decoder_, &this->input_buffer_current_, (int *) &this->input_buffer_length_, + (int16_t *) this->output_buffer_, 0); + if (err) { + switch (err) { + case ERR_MP3_MAINDATA_UNDERFLOW: + // Not a problem. Next call to decode will provide more data. + return FileDecoderState::POTENTIALLY_FAILED; + break; + default: + // TODO: Better handle mp3 decoder errors + return FileDecoderState::FAILED; + break; + } + } else { + MP3FrameInfo mp3_frame_info; + MP3GetLastFrameInfo(this->mp3_decoder_, &mp3_frame_info); + if (mp3_frame_info.outputSamps > 0) { + int bytes_per_sample = (mp3_frame_info.bitsPerSample / 8); + this->output_buffer_length_ = mp3_frame_info.outputSamps * bytes_per_sample; + this->output_buffer_current_ = this->output_buffer_; + + this->sample_rate_ = mp3_frame_info.samprate; + this->channels_ = mp3_frame_info.nChans; + this->sample_depth_ = mp3_frame_info.bitsPerSample; + } + } + // } + return FileDecoderState::MORE_TO_PROCESS; +} + +FileDecoderState AudioDecoder::decode_flac_() { + if (!this->channels_.has_value()) { + // Header hasn't been read + auto result = this->flac_decoder_->read_header(this->input_buffer_length_); + + if (result == flac::FLAC_DECODER_ERROR_OUT_OF_DATA) { + return FileDecoderState::POTENTIALLY_FAILED; + } + + size_t bytes_consumed = this->flac_decoder_->get_bytes_index(); + this->input_buffer_current_ += bytes_consumed; + this->input_buffer_length_ = this->flac_decoder_->get_bytes_left(); + + if (result != flac::FLAC_DECODER_SUCCESS) { + printf("failed to read flac header. Error: %d\n", result); + return FileDecoderState::FAILED; + } + + this->channels_ = this->flac_decoder_->get_num_channels(); + this->sample_rate_ = this->flac_decoder_->get_sample_rate(); + this->sample_depth_ = this->flac_decoder_->get_sample_depth(); + + size_t flac_decoder_output_buffer_min_size = flac_decoder_->get_output_buffer_size(); + if (this->internal_buffer_size_ < flac_decoder_output_buffer_min_size * sizeof(int16_t)) { + printf("output buffer is not big enough\n"); + return FileDecoderState::FAILED; + } + + return FileDecoderState::MORE_TO_PROCESS; + } + + uint32_t output_samples = 0; + auto result = + this->flac_decoder_->decode_frame(this->input_buffer_length_, (int16_t *) this->output_buffer_, &output_samples); + + if (result == flac::FLAC_DECODER_ERROR_OUT_OF_DATA) { + // not an issue, just needs more data! + return FileDecoderState::POTENTIALLY_FAILED; + } else if (result > flac::FLAC_DECODER_ERROR_OUT_OF_DATA) { + // Serious error, can't recover + return FileDecoderState::FAILED; + } + + // We have successfully decoded some input data and have new output data + size_t bytes_consumed = this->flac_decoder_->get_bytes_index(); + this->input_buffer_current_ += bytes_consumed; + this->input_buffer_length_ = this->flac_decoder_->get_bytes_left(); + + this->output_buffer_current_ = this->output_buffer_; + this->output_buffer_length_ = output_samples * sizeof(int16_t); + + if (result == flac::FLAC_DECODER_NO_MORE_FRAMES) { + return FileDecoderState::END_OF_FILE; + } + + return FileDecoderState::MORE_TO_PROCESS; +} + +} // namespace nabu +} // namespace esphome + +#endif \ No newline at end of file diff --git a/esphome/components/nabu/audio_decoder.h b/esphome/components/nabu/audio_decoder.h new file mode 100644 index 0000000..122b23f --- /dev/null +++ b/esphome/components/nabu/audio_decoder.h @@ -0,0 +1,79 @@ +#pragma once + +#ifdef USE_ESP_IDF + +#include "flac_decoder.h" +#include "wav_decoder.h" +#include "mp3_decoder.h" + +#include "esphome/components/media_player/media_player.h" +#include "esphome/core/ring_buffer.h" + +namespace esphome { +namespace nabu { + +enum class AudioDecoderState : uint8_t { + INITIALIZED = 0, + DECODING, + FINISHED, + FAILED, +}; + +// Only used within the AudioDecoder class; conveys the state of the particular file type decoder +enum class FileDecoderState : uint8_t { + MORE_TO_PROCESS, + IDLE, + POTENTIALLY_FAILED, + FAILED, + END_OF_FILE, +}; + +class AudioDecoder { + public: + AudioDecoder(esphome::RingBuffer *input_ring_buffer, esphome::RingBuffer *output_ring_buffer, size_t internal_buffer_size); + ~AudioDecoder(); + + void start(media_player::MediaFileType media_file_type); + + AudioDecoderState decode(bool stop_gracefully); + + const optional &get_channels() const { return this->channels_; } + const optional &get_sample_depth() const { return this->sample_depth_; } + const optional &get_sample_rate() const { return this->sample_rate_; } + + protected: + FileDecoderState decode_wav_(); + FileDecoderState decode_mp3_(); + FileDecoderState decode_flac_(); + + esphome::RingBuffer *input_ring_buffer_; + esphome::RingBuffer *output_ring_buffer_; + size_t internal_buffer_size_; + + uint8_t *input_buffer_; + uint8_t *input_buffer_current_; + size_t input_buffer_length_; + + uint8_t *output_buffer_; + uint8_t *output_buffer_current_; + size_t output_buffer_length_; + + HMP3Decoder mp3_decoder_; + + wav_decoder::WAVDecoder *wav_decoder_{nullptr}; + size_t wav_bytes_left_; + + flac::FLACDecoder *flac_decoder_{nullptr}; + + media_player::MediaFileType media_file_type_{media_player::MediaFileType::NONE}; + optional channels_; + optional sample_depth_; + optional sample_rate_; + + size_t potentially_failed_count_{0}; + bool end_of_file_{false}; +}; +} // namespace nabu +} // namespace esphome + +#endif \ No newline at end of file diff --git a/esphome/components/nabu/combine_streamer.cpp b/esphome/components/nabu/audio_mixer.cpp similarity index 73% rename from esphome/components/nabu/combine_streamer.cpp rename to esphome/components/nabu/audio_mixer.cpp index 0096585..36d6536 100644 --- a/esphome/components/nabu/combine_streamer.cpp +++ b/esphome/components/nabu/audio_mixer.cpp @@ -1,6 +1,6 @@ #ifdef USE_ESP_IDF -#include "combine_streamer.h" +#include "audio_mixer.h" #include "esp_dsp.h" @@ -11,10 +11,10 @@ namespace esphome { namespace nabu { static const size_t INPUT_RING_BUFFER_SIZE = 32768; // Audio samples -static const size_t BUFFER_SIZE = 2048; // Audio samples - keep small for fast pausing +static const size_t BUFFER_SIZE = 4096; // Audio samples - keep small for fast pausing static const size_t QUEUE_COUNT = 20; -CombineStreamer::CombineStreamer() { +AudioMixer::AudioMixer() { this->media_ring_buffer_ = RingBuffer::create(INPUT_RING_BUFFER_SIZE); this->announcement_ring_buffer_ = RingBuffer::create(INPUT_RING_BUFFER_SIZE); this->output_ring_buffer_ = RingBuffer::create(BUFFER_SIZE); @@ -25,11 +25,14 @@ CombineStreamer::CombineStreamer() { return; } + ExternalRAMAllocator allocator(ExternalRAMAllocator::ALLOW_FAILURE); + this->stack_buffer_ = allocator.allocate(8192); + this->event_queue_ = xQueueCreate(QUEUE_COUNT, sizeof(TaskEvent)); this->command_queue_ = xQueueCreate(QUEUE_COUNT, sizeof(CommandEvent)); } -size_t CombineStreamer::write_media(uint8_t *buffer, size_t length) { +size_t AudioMixer::write_media(uint8_t *buffer, size_t length) { size_t free_bytes = this->media_free(); size_t bytes_to_write = std::min(length, free_bytes); if (bytes_to_write > 0) { @@ -38,7 +41,7 @@ size_t CombineStreamer::write_media(uint8_t *buffer, size_t length) { return 0; } -size_t CombineStreamer::write_announcement(uint8_t *buffer, size_t length) { +size_t AudioMixer::write_announcement(uint8_t *buffer, size_t length) { size_t free_bytes = this->announcement_free(); size_t bytes_to_write = std::min(length, free_bytes); @@ -48,20 +51,21 @@ size_t CombineStreamer::write_announcement(uint8_t *buffer, size_t length) { return 0; } -void CombineStreamer::start(const std::string &task_name, UBaseType_t priority) { +void AudioMixer::start(const std::string &task_name, UBaseType_t priority) { if (this->task_handle_ == nullptr) { - xTaskCreate(CombineStreamer::combine_task_, task_name.c_str(), 3072, (void *) this, priority, &this->task_handle_); + this->task_handle_ = xTaskCreateStatic(AudioMixer::mix_task_, task_name.c_str(), 3072, (void *) this, + priority, this->stack_buffer_, &this->task_stack_); } } -void CombineStreamer::reset_ring_buffers() { +void AudioMixer::reset_ring_buffers() { this->output_ring_buffer_->reset(); this->media_ring_buffer_->reset(); this->announcement_ring_buffer_->reset(); } -void CombineStreamer::combine_task_(void *params) { - CombineStreamer *this_combiner = (CombineStreamer *) params; +void AudioMixer::mix_task_(void *params) { + AudioMixer *this_mixer = (AudioMixer *) params; TaskEvent event; CommandEvent command_event; @@ -77,11 +81,11 @@ void CombineStreamer::combine_task_(void *params) { if ((media_buffer == nullptr) || (announcement_buffer == nullptr)) { event.type = EventType::WARNING; event.err = ESP_ERR_NO_MEM; - xQueueSend(this_combiner->event_queue_, &event, portMAX_DELAY); + xQueueSend(this_mixer->event_queue_, &event, portMAX_DELAY); event.type = EventType::STOPPED; event.err = ESP_OK; - xQueueSend(this_combiner->event_queue_, &event, portMAX_DELAY); + xQueueSend(this_mixer->event_queue_, &event, portMAX_DELAY); while (true) { delay(10); @@ -91,13 +95,13 @@ void CombineStreamer::combine_task_(void *params) { } event.type = EventType::STARTED; - xQueueSend(this_combiner->event_queue_, &event, portMAX_DELAY); + xQueueSend(this_mixer->event_queue_, &event, portMAX_DELAY); int16_t q15_ducking_ratio = (int16_t) (1 * std::pow(2, 15)); // esp-dsp using q15 fixed point numbers bool transfer_media = true; while (true) { - if (xQueueReceive(this_combiner->command_queue_, &command_event, (10 / portTICK_PERIOD_MS)) == pdTRUE) { + if (xQueueReceive(this_mixer->command_queue_, &command_event, (10 / portTICK_PERIOD_MS)) == pdTRUE) { if (command_event.command == CommandEventType::STOP) { break; } else if (command_event.command == CommandEventType::DUCK) { @@ -108,15 +112,15 @@ void CombineStreamer::combine_task_(void *params) { } else if (command_event.command == CommandEventType::RESUME_MEDIA) { transfer_media = true; } else if (command_event.command == CommandEventType::CLEAR_MEDIA) { - this_combiner->media_ring_buffer_->reset(); + this_mixer->media_ring_buffer_->reset(); } else if (command_event.command == CommandEventType::CLEAR_ANNOUNCEMENT) { - this_combiner->announcement_ring_buffer_->reset(); + this_mixer->announcement_ring_buffer_->reset(); } } - size_t media_available = this_combiner->media_ring_buffer_->available(); - size_t announcement_available = this_combiner->announcement_ring_buffer_->available(); - size_t output_free = this_combiner->output_ring_buffer_->free(); + size_t media_available = this_mixer->media_ring_buffer_->available(); + size_t announcement_available = this_mixer->announcement_ring_buffer_->available(); + size_t output_free = this_mixer->output_ring_buffer_->free(); if ((output_free > 0) && (media_available * transfer_media + announcement_available > 0)) { size_t bytes_to_read = std::min(output_free, BUFFER_SIZE); @@ -132,7 +136,7 @@ void CombineStreamer::combine_task_(void *params) { if (bytes_to_read > 0) { size_t media_bytes_read = 0; if (media_available * transfer_media > 0) { - media_bytes_read = this_combiner->media_ring_buffer_->read((void *) media_buffer, bytes_to_read, 0); + media_bytes_read = this_mixer->media_ring_buffer_->read((void *) media_buffer, bytes_to_read, 0); if (media_bytes_read > 0) { if (q15_ducking_ratio < (1 * std::pow(2, 15))) { dsps_mulc_s16_ae32(media_buffer, combination_buffer, media_bytes_read, q15_ducking_ratio, 1, 1); @@ -144,7 +148,7 @@ void CombineStreamer::combine_task_(void *params) { size_t announcement_bytes_read = 0; if (announcement_available > 0) { announcement_bytes_read = - this_combiner->announcement_ring_buffer_->read((void *) announcement_buffer, bytes_to_read, 0); + this_mixer->announcement_ring_buffer_->read((void *) announcement_buffer, bytes_to_read, 0); } size_t bytes_written = 0; @@ -177,36 +181,36 @@ void CombineStreamer::combine_task_(void *params) { // (input buffer 1, input buffer 2, output buffer, length, input buffer 1 step, input buffer 2 step, output // buffer step, bitshift) dsps_add_s16_aes3(media_buffer, announcement_buffer, combination_buffer, samples_read, 1, 1, 1, 0); - bytes_written = this_combiner->output_ring_buffer_->write((void *) combination_buffer, bytes_to_read); + bytes_written = this_mixer->output_ring_buffer_->write((void *) combination_buffer, bytes_to_read); } else if (media_bytes_read > 0) { - bytes_written = this_combiner->output_ring_buffer_->write((void *) media_buffer, media_bytes_read); + bytes_written = this_mixer->output_ring_buffer_->write((void *) media_buffer, media_bytes_read); } else if (announcement_bytes_read > 0) { bytes_written = - this_combiner->output_ring_buffer_->write((void *) announcement_buffer, announcement_bytes_read); + this_mixer->output_ring_buffer_->write((void *) announcement_buffer, announcement_bytes_read); } - if (bytes_written) { - event.type = EventType::RUNNING; - xQueueSend(this_combiner->event_queue_, &event, portMAX_DELAY); - } else if (this_combiner->output_ring_buffer_->available() == 0) { - event.type = EventType::IDLE; - xQueueSend(this_combiner->event_queue_, &event, portMAX_DELAY); - } + // if (bytes_written) { + // event.type = EventType::RUNNING; + // xQueueSend(this_mixer->event_queue_, &event, portMAX_DELAY); + // } else if (this_mixer->output_ring_buffer_->available() == 0) { + // event.type = EventType::IDLE; + // xQueueSend(this_mixer->event_queue_, &event, portMAX_DELAY); + // } } } } event.type = EventType::STOPPING; - xQueueSend(this_combiner->event_queue_, &event, portMAX_DELAY); + xQueueSend(this_mixer->event_queue_, &event, portMAX_DELAY); - this_combiner->reset_ring_buffers(); + this_mixer->reset_ring_buffers(); allocator.deallocate(media_buffer, BUFFER_SIZE); allocator.deallocate(announcement_buffer, BUFFER_SIZE); allocator.deallocate(combination_buffer, BUFFER_SIZE); event.type = EventType::STOPPED; - xQueueSend(this_combiner->event_queue_, &event, portMAX_DELAY); + xQueueSend(this_mixer->event_queue_, &event, portMAX_DELAY); while (true) { delay(10); diff --git a/esphome/components/nabu/streamer.h b/esphome/components/nabu/audio_mixer.h similarity index 54% rename from esphome/components/nabu/streamer.h rename to esphome/components/nabu/audio_mixer.h index 37daf6d..5d4f72b 100644 --- a/esphome/components/nabu/streamer.h +++ b/esphome/components/nabu/audio_mixer.h @@ -8,24 +8,12 @@ #include "esphome/core/helpers.h" #include "esphome/core/ring_buffer.h" -#include - #include #include namespace esphome { namespace nabu { -struct StreamInfo { - bool operator==(const StreamInfo &rhs) const { - return (channels == rhs.channels) && (bits_per_sample == rhs.bits_per_sample) && (sample_rate == rhs.sample_rate); - } - bool operator!=(const StreamInfo &rhs) const { return !operator==(rhs); } - uint8_t channels = 1; - uint8_t bits_per_sample = 16; - uint32_t sample_rate = 16000; -}; - enum class EventType : uint8_t { STARTING = 0, STARTED, @@ -36,24 +24,14 @@ enum class EventType : uint8_t { WARNING = 255, }; -// enum class MediaFileType : uint8_t { -// NONE = 0, -// WAV, -// MP3, -// FLAC, -// }; - struct TaskEvent { EventType type; esp_err_t err; - media_player::MediaFileType media_file_type; - StreamInfo stream_info; }; enum class CommandEventType : uint8_t { START, STOP, - STOP_GRACEFULLY, DUCK, PAUSE_MEDIA, RESUME_MEDIA, @@ -61,20 +39,15 @@ enum class CommandEventType : uint8_t { CLEAR_ANNOUNCEMENT, }; -enum class PipelineType : uint8_t { - MEDIA, - ANNOUNCEMENT, -}; - struct CommandEvent { CommandEventType command; float ducking_ratio = 0.0; - media_player::MediaFileType media_file_type = media_player::MediaFileType::NONE; - StreamInfo stream_info; }; -class OutputStreamer { +class AudioMixer { public: + AudioMixer(); + /// @brief Returns the number of bytes available to read from the ring buffer size_t available() { return this->output_ring_buffer_->available(); } @@ -86,6 +59,21 @@ class OutputStreamer { return xQueueReceive(this->event_queue_, event, ticks_to_wait); } + void start(const std::string &task_name, UBaseType_t priority = 1); + + void stop() { + vTaskDelete(this->task_handle_); + this->task_handle_ = nullptr; + + xQueueReset(this->event_queue_); + xQueueReset(this->command_queue_); + } + + void reset_ring_buffers(); + + size_t media_free() { return this->media_ring_buffer_->free(); } + size_t announcement_free() { return this->announcement_ring_buffer_->free(); } + /// @brief Reads from the output ring buffer /// @param buffer stores the read data /// @param length how many bytes requested to read from the ring buffer @@ -99,39 +87,37 @@ class OutputStreamer { return 0; } - virtual void reset_ring_buffers() { this->output_ring_buffer_->reset(); } + size_t write_media(uint8_t *buffer, size_t length); + size_t write_announcement(uint8_t *buffer, size_t length); + + BaseType_t read_media_event(TaskEvent *event, TickType_t ticks_to_wait = 0) { + return xQueueReceive(this->media_event_queue_, event, ticks_to_wait); + } + BaseType_t read_announcement_event(TaskEvent *event, TickType_t ticks_to_wait = 0) { + return xQueueReceive(this->announcement_event_queue_, event, ticks_to_wait); + } - virtual void start(const std::string &task_name, UBaseType_t priority = 1) = 0; - virtual void stop(); + RingBuffer *get_media_ring_buffer() { return this->media_ring_buffer_.get(); } + RingBuffer *get_announcement_ring_buffer() { return this->announcement_ring_buffer_.get(); } protected: TaskHandle_t task_handle_{nullptr}; + StaticTask_t task_stack_; + StackType_t *stack_buffer_{nullptr}; + std::unique_ptr output_ring_buffer_; QueueHandle_t event_queue_; QueueHandle_t command_queue_; -}; -class HTTPStreamer : public OutputStreamer { - public: - HTTPStreamer(); - - void start(const std::string &task_name, UBaseType_t priority = 1) override; - void start_http(const std::string &task_name, UBaseType_t priority = 1); - void start_file(const std::string &task_name, UBaseType_t priority = 1); - void start(const std::string &uri, const std::string &task_name, UBaseType_t priority = 1); - void start(media_player::MediaFile *media_file, const std::string &task_name, UBaseType_t priority = 1); - - protected: - static void read_task_(void *params); - static void file_read_task_(void *params); + static void mix_task_(void *params); - media_player::MediaFileType establish_connection_(esp_http_client_handle_t *client); - void cleanup_connection_(esp_http_client_handle_t *client); + std::unique_ptr media_ring_buffer_; + std::unique_ptr announcement_ring_buffer_; - media_player::MediaFile *current_media_file_{}; - std::string current_uri_{}; + QueueHandle_t media_event_queue_; + QueueHandle_t announcement_event_queue_; }; - } // namespace nabu } // namespace esphome + #endif \ No newline at end of file diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp new file mode 100644 index 0000000..f95d270 --- /dev/null +++ b/esphome/components/nabu/audio_pipeline.cpp @@ -0,0 +1,288 @@ +#ifdef USE_ESP_IDF + +#include "audio_pipeline.h" + +#include "esphome/core/helpers.h" + +namespace esphome { +namespace nabu { + +static const size_t QUEUE_COUNT = 10; + +static const size_t HTTP_BUFFER_SIZE = 32 * 1024; +static const size_t BUFFER_SIZE_SAMPLES = 32768; +static const size_t BUFFER_SIZE_BYTES = BUFFER_SIZE_SAMPLES * sizeof(int16_t); + +enum EventGroupBits : uint32_t { + PIPELINE_COMMAND_STOP = (1 << 0), // Stops all activity in the pipeline elements + + READER_COMMAND_INIT_HTTP = (1 << 4), // Read audio from an HTTP source + READER_COMMAND_INIT_FILE = (1 << 5), // Read audio from an audio file from the flash + + READER_MESSAGE_LOADED_MEDIA_TYPE = (1 << 6), // Audio file type is read after checking it is supported + READER_MESSAGE_FINISHED = (1 << 7), // Reader is done (either through a failure or just end of the stream) + READER_MESSAGE_ERROR = (1 << 8), // Error reading the file + + DECODER_MESSAGE_LOADED_STREAM_INFO = (1 << 11), // Decoder has determined the stream information + DECODER_MESSAGE_FINISHED = (1 << 12), // Decoder is done (either through a faiilure or the end of the stream) + DECODER_MESSAGE_ERROR = (1 << 13), // Error decoding the file + + RESAMPLER_MESSAGE_FINISHED = (1 << 17), // Resampler is done (either through a failure or the end of the stream) + RESAMPLER_MESSAGE_ERROR = (1 << 18), // Error resampling the file + + ALL_BITS = 0xfffff, // 24 total bits available in an event group +}; + +AudioPipeline::AudioPipeline(AudioMixer *mixer, AudioPipelineType pipeline_type) { + this->raw_file_ring_buffer_ = RingBuffer::create(HTTP_BUFFER_SIZE); + this->decoded_ring_buffer_ = RingBuffer::create(BUFFER_SIZE_BYTES); + this->resampled_ring_buffer_ = RingBuffer::create(BUFFER_SIZE_BYTES); + + this->mixer_ = mixer; + this->pipeline_type_ = pipeline_type; + + ExternalRAMAllocator allocator(ExternalRAMAllocator::ALLOW_FAILURE); + + this->read_task_stack_buffer_ = allocator.allocate(8192); + this->decode_task_stack_buffer_ = allocator.allocate(8192); + this->resample_task_stack_buffer_ = allocator.allocate(8192); + + this->event_group_ = xEventGroupCreate(); +} + +void AudioPipeline::start(const std::string &uri, const std::string &task_name, UBaseType_t priority) { + this->common_start_(task_name, priority); + + this->current_uri_ = uri; + xEventGroupSetBits(this->event_group_, READER_COMMAND_INIT_HTTP); +} + +void AudioPipeline::start(media_player::MediaFile *media_file, const std::string &task_name, UBaseType_t priority) { + this->common_start_(task_name, priority); + + this->current_media_file_ = media_file; + xEventGroupSetBits(this->event_group_, READER_COMMAND_INIT_FILE); +} + +void AudioPipeline::common_start_(const std::string &task_name, UBaseType_t priority) { + if (this->read_task_handle_ == nullptr) { + this->read_task_handle_ = + xTaskCreateStatic(AudioPipeline::read_task_, (task_name + "_read").c_str(), 8192, (void *) this, priority, + this->read_task_stack_buffer_, &this->read_task_stack_); + } + if (this->decode_task_handle_ == nullptr) { + this->decode_task_handle_ = + xTaskCreateStatic(AudioPipeline::decode_task_, (task_name + "_decode").c_str(), 8192, (void *) this, priority, + this->decode_task_stack_buffer_, &this->decode_task_stack_); + } + if (this->resample_task_handle_ == nullptr) { + this->resample_task_handle_ = + xTaskCreateStatic(AudioPipeline::resample_task_, (task_name + "_resample").c_str(), 8192, (void *) this, + priority, this->resample_task_stack_buffer_, &this->resample_task_stack_); + } + + this->stop(); +} + +AudioPipelineState AudioPipeline::get_state() { + EventBits_t event_bits = xEventGroupGetBits(this->event_group_); + if (!this->read_task_handle_ && !this->decode_task_handle_ && !this->resample_task_handle_) { + return AudioPipelineState::STOPPED; + } else if (event_bits & (READER_MESSAGE_FINISHED | DECODER_MESSAGE_FINISHED | RESAMPLER_MESSAGE_FINISHED)) { + return AudioPipelineState::STOPPED; + } + + return AudioPipelineState::PLAYING; +} + +void AudioPipeline::stop() { + xEventGroupSetBits(this->event_group_, PIPELINE_COMMAND_STOP); + + xEventGroupWaitBits( + this->event_group_, + (READER_MESSAGE_FINISHED | DECODER_MESSAGE_FINISHED | RESAMPLER_MESSAGE_FINISHED), // Bit message to read + pdTRUE, // Clear the bit on exit + true, // Wait for all the bits, + pdMS_TO_TICKS(200)); // Block temporarily before deleting each task + + xEventGroupClearBits(this->event_group_, ALL_BITS); + this->reset_ring_buffers(); +} + +void AudioPipeline::reset_ring_buffers() { + this->raw_file_ring_buffer_->reset(); + this->decoded_ring_buffer_->reset(); + this->resampled_ring_buffer_->reset(); +} + +void AudioPipeline::read_task_(void *params) { + AudioPipeline *this_pipeline = (AudioPipeline *) params; + + while (true) { + xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::READER_MESSAGE_FINISHED); + + // Wait until the pipeline notifies us the source of the media file + EventBits_t event_bits = + xEventGroupWaitBits(this_pipeline->event_group_, + READER_COMMAND_INIT_FILE | READER_COMMAND_INIT_HTTP, // Bit message to read + pdTRUE, // Clear the bit on exit + pdFALSE, // Wait for all the bits, + portMAX_DELAY); // Block indefinitely until bit is set + + xEventGroupClearBits(this_pipeline->event_group_, EventGroupBits::READER_MESSAGE_FINISHED); + + { + AudioReader reader = AudioReader(this_pipeline->raw_file_ring_buffer_.get(), HTTP_BUFFER_SIZE); + if (event_bits & READER_COMMAND_INIT_FILE) { + this_pipeline->current_media_file_type_ = reader.start(this_pipeline->current_media_file_); + } else { + this_pipeline->current_media_file_type_ = reader.start(this_pipeline->current_uri_); + } + if (this_pipeline->current_media_file_type_ == media_player::MediaFileType::NONE) { + // Couldn't load the file or it is an unknown type! + xEventGroupSetBits(this_pipeline->event_group_, + EventGroupBits::READER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP); + } else { + // Inform the decoder that the media type is available + xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::READER_MESSAGE_LOADED_MEDIA_TYPE); + + while (true) { + event_bits = xEventGroupGetBits(this_pipeline->event_group_); + + if (event_bits & PIPELINE_COMMAND_STOP) { + break; + } + + AudioReaderState reader_state = reader.read(); + + if (reader_state == AudioReaderState::FINISHED) { + break; + } else if (reader_state == AudioReaderState::FAILED) { + xEventGroupSetBits(this_pipeline->event_group_, + EventGroupBits::READER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP); + break; + } + + // Block to give other tasks opportunity to run + delay(10); + } + } + } + } +} + +void AudioPipeline::decode_task_(void *params) { + AudioPipeline *this_pipeline = (AudioPipeline *) params; + + while (true) { + xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::DECODER_MESSAGE_FINISHED); + + // Wait until the reader notifies us that the media type is available + EventBits_t event_bits = xEventGroupWaitBits(this_pipeline->event_group_, + READER_MESSAGE_LOADED_MEDIA_TYPE, // Bit message to read + pdTRUE, // Clear the bit on exit + pdFALSE, // Wait for all the bits, + portMAX_DELAY); // Block indefinitely until bit is set + + xEventGroupClearBits(this_pipeline->event_group_, EventGroupBits::DECODER_MESSAGE_FINISHED); + + { + AudioDecoder decoder = AudioDecoder(this_pipeline->raw_file_ring_buffer_.get(), + this_pipeline->decoded_ring_buffer_.get(), BUFFER_SIZE_BYTES); + decoder.start(this_pipeline->current_media_file_type_); + + bool has_stream_info = false; + + while (true) { + event_bits = xEventGroupGetBits(this_pipeline->event_group_); + + if (event_bits & PIPELINE_COMMAND_STOP) { + break; + } + + // Stop gracefully if the reader has finished + AudioDecoderState decoder_state = decoder.decode(event_bits & READER_MESSAGE_FINISHED); + + if (decoder_state == AudioDecoderState::FINISHED) { + break; + } else if (decoder_state == AudioDecoderState::FAILED) { + xEventGroupSetBits(this_pipeline->event_group_, + EventGroupBits::DECODER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP); + break; + } + + if (!has_stream_info && decoder.get_channels().has_value()) { + has_stream_info = true; + + this_pipeline->current_stream_info_.channels = decoder.get_channels().value(); + this_pipeline->current_stream_info_.bits_per_sample = decoder.get_sample_depth().value(); + this_pipeline->current_stream_info_.sample_rate = decoder.get_sample_rate().value(); + + // Inform the resampler that the stream information is available + xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::DECODER_MESSAGE_LOADED_STREAM_INFO); + } + + // Block to give other tasks opportunity to run + delay(10); + } + } + } +} + +void AudioPipeline::resample_task_(void *params) { + AudioPipeline *this_pipeline = (AudioPipeline *) params; + + while (true) { + xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::RESAMPLER_MESSAGE_FINISHED); + + // Wait until the decoder notifies us that the stream information is available + EventBits_t event_bits = xEventGroupWaitBits(this_pipeline->event_group_, + DECODER_MESSAGE_LOADED_STREAM_INFO, // Bit message to read + pdTRUE, // Clear the bit on exit + pdFALSE, // Wait for all the bits, + portMAX_DELAY); // Block indefinitely until bit is set + + xEventGroupClearBits(this_pipeline->event_group_, EventGroupBits::RESAMPLER_MESSAGE_FINISHED); + + { + RingBuffer *output_ring_buffer = nullptr; + + if (this_pipeline->pipeline_type_ == AudioPipelineType::MEDIA) { + output_ring_buffer = this_pipeline->mixer_->get_media_ring_buffer(); + } else { + output_ring_buffer = this_pipeline->mixer_->get_announcement_ring_buffer(); + } + + AudioResampler resampler = + AudioResampler(this_pipeline->decoded_ring_buffer_.get(), output_ring_buffer, BUFFER_SIZE_SAMPLES); + + resampler.start(this_pipeline->current_stream_info_); + + while (true) { + event_bits = xEventGroupGetBits(this_pipeline->event_group_); + + if (event_bits & PIPELINE_COMMAND_STOP) { + break; + } + + // Stop gracefully if the decoder is done + AudioResamplerState resampler_state = resampler.resample(event_bits & DECODER_MESSAGE_FINISHED); + + if (resampler_state == AudioResamplerState::FINISHED) { + break; + } else if (resampler_state == AudioResamplerState::FAILED) { + xEventGroupSetBits(this_pipeline->event_group_, + EventGroupBits::RESAMPLER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP); + break; + } + + // Block to give other tasks opportunity to run + delay(10); + } + } + } +} + +} // namespace nabu +} // namespace esphome +#endif \ No newline at end of file diff --git a/esphome/components/nabu/audio_pipeline.h b/esphome/components/nabu/audio_pipeline.h new file mode 100644 index 0000000..4b1996f --- /dev/null +++ b/esphome/components/nabu/audio_pipeline.h @@ -0,0 +1,90 @@ +#pragma once + +#ifdef USE_ESP_IDF + +#include "audio_reader.h" +#include "audio_decoder.h" +#include "audio_resampler.h" +#include "audio_mixer.h" + +#include "esphome/components/media_player/media_player.h" + +#include "esphome/core/hal.h" +#include "esphome/core/helpers.h" +#include "esphome/core/ring_buffer.h" + +#include +#include +#include + + + +namespace esphome { +namespace nabu { + +enum class AudioPipelineType : uint8_t { + MEDIA, + ANNOUNCEMENT, +}; + +enum class AudioPipelineState : uint8_t { + STARTING, + STARTED, + PLAYING, + PAUSED, + STOPPING, + STOPPED, +}; + +class AudioPipeline { + public: + AudioPipeline(AudioMixer *mixer, AudioPipelineType pipeline_type); + + void start(const std::string &uri, const std::string &task_name, UBaseType_t priority = 1); + void start(media_player::MediaFile *media_file, const std::string &task_name, UBaseType_t priority = 1); + + void stop(); + + AudioPipelineState get_state(); + + void reset_ring_buffers(); + + protected: + void common_start_(const std::string &task_name, UBaseType_t priority); + + AudioMixer *mixer_; + + std::string current_uri_{}; + media_player::MediaFile *current_media_file_{nullptr}; + + media_player::MediaFileType current_media_file_type_; + media_player::StreamInfo current_stream_info_; + + AudioPipelineType pipeline_type_; + + std::unique_ptr raw_file_ring_buffer_; + std::unique_ptr decoded_ring_buffer_; + std::unique_ptr resampled_ring_buffer_; + + EventGroupHandle_t event_group_; + + static void read_task_(void *params); + TaskHandle_t read_task_handle_{nullptr}; + StaticTask_t read_task_stack_; + StackType_t *read_task_stack_buffer_{nullptr}; + + static void decode_task_(void *params); + TaskHandle_t decode_task_handle_{nullptr}; + StaticTask_t decode_task_stack_; + StackType_t *decode_task_stack_buffer_{nullptr}; + + static void resample_task_(void *params); + TaskHandle_t resample_task_handle_{nullptr}; + StaticTask_t resample_task_stack_; + StackType_t *resample_task_stack_buffer_{nullptr}; +}; + +} // namespace nabu +} // namespace esphome + +#endif \ No newline at end of file diff --git a/esphome/components/nabu/audio_reader.cpp b/esphome/components/nabu/audio_reader.cpp new file mode 100644 index 0000000..915a02f --- /dev/null +++ b/esphome/components/nabu/audio_reader.cpp @@ -0,0 +1,152 @@ +#ifdef USE_ESP_IDF + +#include "audio_reader.h" + +#include "esphome/core/ring_buffer.h" + +namespace esphome { +namespace nabu { + +AudioReader::AudioReader(esphome::RingBuffer *output_ring_buffer, size_t transfer_buffer_size) { + this->output_ring_buffer_ = output_ring_buffer; + + ExternalRAMAllocator allocator(ExternalRAMAllocator::ALLOW_FAILURE); + this->transfer_buffer_ = allocator.allocate(transfer_buffer_size); + this->transfer_buffer_size_ = transfer_buffer_size; +} + +AudioReader::~AudioReader() { + if (this->transfer_buffer_ != nullptr) { + ExternalRAMAllocator allocator(ExternalRAMAllocator::ALLOW_FAILURE); + allocator.deallocate(this->transfer_buffer_, this->transfer_buffer_size_); + } + + this->cleanup_connection_(); +} + +media_player::MediaFileType AudioReader::start(media_player::MediaFile *media_file) { + this->current_media_file_ = media_file; + + this->media_file_data_current_ = media_file->data; + this->media_file_bytes_left_ = media_file->length; + + return media_file->file_type; +} + +media_player::MediaFileType AudioReader::start(const std::string &uri) { + this->cleanup_connection_(); + + if (uri.empty()) { + return media_player::MediaFileType::NONE; + } + + esp_http_client_config_t config = { + .url = uri.c_str(), + .cert_pem = nullptr, + .disable_auto_redirect = false, + .max_redirection_count = 10, + }; + this->client_ = esp_http_client_init(&config); + + if (this->client_ == nullptr) { + printf("Failed to initialize HTTP connection"); + return media_player::MediaFileType::NONE; + } + + esp_err_t err; + if ((err = esp_http_client_open(this->client_, 0)) != ESP_OK) { + printf("Failed to open HTTP connection"); + this->cleanup_connection_(); + return media_player::MediaFileType::NONE; + } + + int content_length = esp_http_client_fetch_headers(this->client_); + + // TODO: Figure out how to handle this better! Music Assistant streams don't send a content length + // if (content_length <= 0) { + // printf("Fialed to get content length"); + // this->cleanup_connection_(client); + // return media_player::MediaFileType::NONE; + // } + + char url[500]; + if (esp_http_client_get_url(this->client_, url, 500) != ESP_OK) { + this->cleanup_connection_(); + return media_player::MediaFileType::NONE; + } + + std::string url_string = url; + + if (str_endswith(url_string, ".wav")) { + return media_player::MediaFileType::WAV; + } else if (str_endswith(url_string, ".mp3")) { + return media_player::MediaFileType::MP3; + } else if (str_endswith(url_string, ".flac")) { + return media_player::MediaFileType::FLAC; + } + + return media_player::MediaFileType::NONE; +} + +AudioReaderState AudioReader::read() { + if (this->client_ != nullptr) { + return this->http_read_(); + } else if (this->current_media_file_ != nullptr) { + return this->file_read_(); + } + + return AudioReaderState::INITIALIZED; +} + +AudioReaderState AudioReader::file_read_() { + if (this->media_file_bytes_left_ > 0) { + size_t bytes_to_write = std::min(this->media_file_bytes_left_, this->output_ring_buffer_->free()); + + if (bytes_to_write == 0) { + return AudioReaderState::READING; + } + + size_t bytes_written = this->output_ring_buffer_->write((void *) this->media_file_data_current_, bytes_to_write); + this->media_file_bytes_left_ -= bytes_written; + this->media_file_data_current_ += bytes_written; + + return AudioReaderState::READING; + } + return AudioReaderState::FINISHED; +} + +AudioReaderState AudioReader::http_read_() { + size_t bytes_to_read = this->output_ring_buffer_->free(); + + if (bytes_to_read == 0) { + return AudioReaderState::READING; + } + + int received_len = esp_http_client_read(this->client_, (char *) this->transfer_buffer_, bytes_to_read); + + if (received_len > 0) { + this->output_ring_buffer_->write((void *) this->transfer_buffer_, received_len); + } else if (received_len < 0) { + // TODO: Error situation. Should we mark failed..? + } + + if (esp_http_client_is_complete_data_received(this->client_)) { + this->cleanup_connection_(); + return AudioReaderState::FINISHED; + } + + return AudioReaderState::READING; +} + +void AudioReader::cleanup_connection_() { + if (this->client_ != nullptr) { + esp_http_client_close(this->client_); + esp_http_client_cleanup(this->client_); + this->client_ = nullptr; + } +} + +} // namespace nabu +} // namespace esphome + +#endif \ No newline at end of file diff --git a/esphome/components/nabu/audio_reader.h b/esphome/components/nabu/audio_reader.h new file mode 100644 index 0000000..d6a2176 --- /dev/null +++ b/esphome/components/nabu/audio_reader.h @@ -0,0 +1,49 @@ +#pragma once + +#ifdef USE_ESP_IDF + +#include "esphome/components/media_player/media_player.h" +#include "esphome/core/ring_buffer.h" + +#include + +namespace esphome { +namespace nabu { + +enum class AudioReaderState : uint8_t { + INITIALIZED = 0, + READING, + FINISHED, + FAILED, +}; + +class AudioReader { + public: + AudioReader(esphome::RingBuffer *output_ring_buffer, size_t transfer_buffer_size); + ~AudioReader(); + + media_player::MediaFileType start(const std::string &uri); + media_player::MediaFileType start(media_player::MediaFile *media_file); + + AudioReaderState read(); + + protected: + AudioReaderState file_read_(); + AudioReaderState http_read_(); + + void cleanup_connection_(); + + esphome::RingBuffer *output_ring_buffer_; + uint8_t *transfer_buffer_; + size_t transfer_buffer_size_; + + esp_http_client_handle_t client_{nullptr}; + + media_player::MediaFile *current_media_file_{nullptr}; + size_t media_file_bytes_left_; + const uint8_t *media_file_data_current_; +}; +} // namespace nabu +} // namespace esphome + +#endif \ No newline at end of file diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp new file mode 100644 index 0000000..4860c28 --- /dev/null +++ b/esphome/components/nabu/audio_resampler.cpp @@ -0,0 +1,373 @@ +#ifdef USE_ESP_IDF + +#include "audio_resampler.h" + +#include "esphome/core/ring_buffer.h" + +namespace esphome { +namespace nabu { + +static const size_t NUM_TAPS = 32; +static const size_t NUM_FILTERS = 32; +static const bool USE_PRE_POST_FILTER = true; + +AudioResampler::AudioResampler(RingBuffer *input_ring_buffer, RingBuffer *output_ring_buffer, + size_t internal_buffer_samples) { + this->input_ring_buffer_ = input_ring_buffer; + this->output_ring_buffer_ = output_ring_buffer; + this->internal_buffer_samples_ = internal_buffer_samples; + + ExternalRAMAllocator int16_allocator(ExternalRAMAllocator::ALLOW_FAILURE); + this->input_buffer_ = int16_allocator.allocate(internal_buffer_samples); + this->output_buffer_ = int16_allocator.allocate(internal_buffer_samples); + + ExternalRAMAllocator float_allocator(ExternalRAMAllocator::ALLOW_FAILURE); + this->float_input_buffer_ = float_allocator.allocate(internal_buffer_samples); + this->float_output_buffer_ = float_allocator.allocate(internal_buffer_samples); +} + +AudioResampler::~AudioResampler() { + ExternalRAMAllocator int16_allocator(ExternalRAMAllocator::ALLOW_FAILURE); + ExternalRAMAllocator float_allocator(ExternalRAMAllocator::ALLOW_FAILURE); + + if (this->input_buffer_ != nullptr) { + int16_allocator.deallocate(this->input_buffer_, this->internal_buffer_samples_); + } + if (this->output_buffer_ != nullptr) { + int16_allocator.deallocate(this->output_buffer_, this->internal_buffer_samples_); + } + if (this->float_input_buffer_ != nullptr) { + float_allocator.deallocate(this->float_input_buffer_, this->internal_buffer_samples_); + } + if (this->float_output_buffer_ != nullptr) { + float_allocator.deallocate(this->float_output_buffer_, this->internal_buffer_samples_); + } + if (this->resampler_ != nullptr) { + resampleFree(this->resampler_); + this->resampler_ = nullptr; + } +} + +void AudioResampler::start(media_player::StreamInfo &stream_info) { + this->stream_info_ = stream_info; + + this->input_buffer_current_ = this->input_buffer_; + this->input_buffer_length_ = 0; + this->float_input_buffer_current_ = this->float_input_buffer_; + this->float_input_buffer_length_ = 0; + + this->output_buffer_current_ = this->output_buffer_; + this->output_buffer_length_ = 0; + this->float_output_buffer_current_ = this->float_output_buffer_; + this->float_output_buffer_length_ = 0; + + this->needs_mono_to_stereo_ = (stream_info.channels != 2); + + if (stream_info.channels > 0) { + this->channel_factor_ = 2 / stream_info.channels; + printf("Converting %d channels to 2 channels\n", stream_info.channels); + } + constexpr float resample_rate = 16000.0f; + if (stream_info.sample_rate != 16000) { + if (stream_info.sample_rate == 48000) { + // Special case, we can do this a lot faster with esp-dsp code! + const uint8_t decimation = 48000 / 16000; + const float fir_out_offset = 0; //((FIR_FILTER_LENGTH / decimation / 2) - 1); + + int8_t shift = this->generate_q15_fir_coefficients_(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH, + (float) 0.5 / decimation); + // dsps_16_array_rev(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH); + dsps_fird_init_s16(&this->fir_filter_, this->fir_filter_coeffecients_, this->fir_delay_, FIR_FILTER_LENGTH, + decimation, fir_out_offset, -shift); + this->decimation_filter_ = true; + this->needs_resampling_ = true; + // memset(this->fir_delay_, 0, FIR_FILTER_LENGTH*sizeof(int16_t)); + } else { + int flags = 0; + + this->needs_resampling_ = true; + + this->sample_ratio_ = resample_rate / static_cast(stream_info.sample_rate); + + printf("Resampling from %d Hz to 16000 Hz\n", stream_info.sample_rate); + + if (this->sample_ratio_ < 1.0) { + this->lowpass_ratio_ -= (10.24 / 16); + + if (this->lowpass_ratio_ < 0.84) { + this->lowpass_ratio_ = 0.84; + } + + if (this->lowpass_ratio_ < this->sample_ratio_) { + // avoid discontinuities near unity sample ratios + this->lowpass_ratio_ = this->sample_ratio_; + } + } + if (this->lowpass_ratio_ * this->sample_ratio_ < 0.98 && USE_PRE_POST_FILTER) { + float cutoff = this->lowpass_ratio_ * this->sample_ratio_ / 2.0; + biquad_lowpass(&this->lowpass_coeff_, cutoff); + this->pre_filter_ = true; + } + + if (this->lowpass_ratio_ / this->sample_ratio_ < 0.98 && USE_PRE_POST_FILTER && !this->pre_filter_) { + float cutoff = this->lowpass_ratio_ / this->sample_ratio_ / 2.0; + biquad_lowpass(&this->lowpass_coeff_, cutoff); + this->post_filter_ = true; + } + + if (this->pre_filter_ || this->post_filter_) { + for (int i = 0; i < stream_info.channels; ++i) { + biquad_init(&this->lowpass_[i][0], &this->lowpass_coeff_, 1.0); + biquad_init(&this->lowpass_[i][1], &this->lowpass_coeff_, 1.0); + } + } + + if (this->sample_ratio_ < 1.0) { + this->resampler_ = resampleInit(stream_info.channels, NUM_TAPS, NUM_FILTERS, + this->sample_ratio_ * this->lowpass_ratio_, flags | INCLUDE_LOWPASS); + } else if (this->lowpass_ratio_ < 1.0) { + this->resampler_ = + resampleInit(stream_info.channels, NUM_TAPS, NUM_FILTERS, this->lowpass_ratio_, flags | INCLUDE_LOWPASS); + } else { + this->resampler_ = resampleInit(stream_info.channels, NUM_TAPS, NUM_FILTERS, 1.0, flags); + } + + resampleAdvancePosition(this->resampler_, NUM_TAPS / 2.0); + } + } else { + this->needs_resampling_ = false; + } +} + +AudioResamplerState AudioResampler::resample(bool stop_gracefully) { + if (stop_gracefully) { + if ((this->input_ring_buffer_->available() == 0) && (this->output_ring_buffer_->available() == 0) && + (this->input_buffer_length_ == 0) && (this->output_buffer_length_ == 0)) { + return AudioResamplerState::FINISHED; + } + } + + if (this->output_buffer_length_ > 0) { + size_t bytes_free = this->output_ring_buffer_->free(); + size_t bytes_to_write = std::min(this->output_buffer_length_, bytes_free); + + if (bytes_to_write > 0) { + size_t bytes_written = this->output_ring_buffer_->write((void *) this->output_buffer_current_, bytes_to_write); + + this->output_buffer_current_ += bytes_written / sizeof(int16_t); + this->output_buffer_length_ -= bytes_written; + } + + return AudioResamplerState::RESAMPLING; + } + + ////// + // Refill input buffer + ////// + + // Move old data to the start of the buffer + if (this->input_buffer_length_ > 0) { + memmove((void *) this->input_buffer_, (void *) this->input_buffer_current_, this->input_buffer_length_); + } + this->input_buffer_current_ = this->input_buffer_; + + // Copy new data to the end of the of the buffer + size_t bytes_available = this->input_ring_buffer_->available(); + size_t bytes_to_read = + std::min(bytes_available, this->internal_buffer_samples_ * sizeof(int16_t) - this->input_buffer_length_); + + if (bytes_to_read > 0) { + int16_t *new_input_buffer_data = this->input_buffer_ + this->input_buffer_length_ / sizeof(int16_t); + size_t bytes_read = this->input_ring_buffer_->read((void *) new_input_buffer_data, bytes_to_read); + + this->input_buffer_length_ += bytes_read; + } + + if (this->needs_resampling_) { + if (this->decimation_filter_) { + if (this->needs_mono_to_stereo_) { + if (this->input_buffer_length_ > 0) { + size_t available_samples = this->input_buffer_length_ / sizeof(int16_t); + + if (available_samples / 3 == 0) { + this->input_buffer_current_ = this->input_buffer_; + this->input_buffer_length_ = 0; + } else { + dsps_fird_s16_aes3(&this->fir_filter_, this->input_buffer_current_, this->output_buffer_, + available_samples / 3); + + size_t output_samples = available_samples / 3; + + this->input_buffer_current_ += output_samples * 3; + this->input_buffer_length_ -= output_samples * 3 * sizeof(int16_t); + + this->output_buffer_current_ = this->output_buffer_; + this->output_buffer_length_ += output_samples * sizeof(int16_t); + } + } + } else { + // Interleaved stereo samples + // TODO: This doesn't sound correct! I need to use separate filters for each channel so the delay line isn't mixed + size_t available_samples = this->input_buffer_length_ / sizeof(int16_t); + for (int i = 0; i < available_samples / 2; ++i) { + // split interleaved samples into two separate streams + this->output_buffer_[i] = this->input_buffer_[2 * i]; + this->output_buffer_[i + available_samples / 2] = this->input_buffer_[2 * i + 1]; + } + std::memcpy(this->input_buffer_, this->output_buffer_, available_samples * sizeof(int16_t)); + dsps_fird_s16_aes3(&this->fir_filter_, this->input_buffer_, this->output_buffer_, (available_samples / 3) / 2); + dsps_fird_s16_aes3(&this->fir_filter_, this->input_buffer_ + available_samples / 2, + this->output_buffer_ + (available_samples / 3) / 2, (available_samples / 3) / 2); + std::memcpy(this->input_buffer_, this->output_buffer_, available_samples * sizeof(int16_t)); + for (int i = 0; i < available_samples / 2; ++i) { + this->output_buffer_[2 * i] = this->input_buffer_[i]; + this->output_buffer_[2 * i + 1] = this->input_buffer_[available_samples / 2 + i]; + } + + size_t output_samples = available_samples / 3; + + this->input_buffer_current_ += output_samples * 3; + this->input_buffer_length_ -= output_samples * 3 * sizeof(int16_t); + + this->output_buffer_current_ = this->output_buffer_; + this->output_buffer_length_ += output_samples * sizeof(int16_t); + } + } else { + if (this->input_buffer_length_ > 0) { + // Samples are indiviudal int16 values. Frames include 1 sample for mono and 2 samples for stereo + // Be careful converting between bytes, samples, and frames! + // 1 sample = 2 bytes = sizeof(int16_t) + // if mono: + // 1 frame = 1 sample + // if stereo: + // 1 frame = 2 samples (left and right) + + size_t samples_read = this->input_buffer_length_ / sizeof(int16_t); + + // This is inefficient! It reconverts any samples that weren't used in the previous resampling run + for (int i = 0; i < samples_read; ++i) { + this->float_input_buffer_[i] = static_cast(this->input_buffer_[i]) / 32768.0f; + } + + size_t frames_read = samples_read / this->stream_info_.channels; + + // The low pass filter seems to be causing glitches... probably because samples are repeated due to the above + // ineffeciency! + if (this->pre_filter_) { + for (int i = 0; i < this->stream_info_.channels; ++i) { + biquad_apply_buffer(&this->lowpass_[i][0], this->float_input_buffer_ + i, frames_read, + this->stream_info_.channels); + biquad_apply_buffer(&this->lowpass_[i][1], this->float_input_buffer_ + i, frames_read, + this->stream_info_.channels); + } + } + + ResampleResult res; + + res = resampleProcessInterleaved(this->resampler_, this->float_input_buffer_, frames_read, + this->float_output_buffer_, + this->internal_buffer_samples_ / this->channel_factor_, this->sample_ratio_); + + size_t frames_used = res.input_used; + size_t samples_used = frames_used * this->stream_info_.channels; + + size_t frames_generated = res.output_generated; + if (this->post_filter_) { + for (int i = 0; i < this->stream_info_.channels; ++i) { + biquad_apply_buffer(&this->lowpass_[i][0], this->float_output_buffer_ + i, frames_generated, + this->stream_info_.channels); + biquad_apply_buffer(&this->lowpass_[i][1], this->float_output_buffer_ + i, frames_generated, + this->stream_info_.channels); + } + } + + size_t samples_generated = frames_generated * this->stream_info_.channels; + + for (int i = 0; i < samples_generated; ++i) { + this->output_buffer_[i] = static_cast(this->float_output_buffer_[i] * 32767); + } + + this->input_buffer_current_ += samples_used; + this->input_buffer_length_ -= samples_used * sizeof(int16_t); + + this->output_buffer_current_ = this->output_buffer_; + this->output_buffer_length_ += samples_generated * sizeof(int16_t); + } + } + } else { + size_t bytes_to_transfer = + std::min(this->internal_buffer_samples_ / this->channel_factor_ * sizeof(int16_t), this->input_buffer_length_); + std::memcpy((void *) this->output_buffer_, (void *) this->input_buffer_current_, bytes_to_transfer); + + this->input_buffer_current_ += bytes_to_transfer / sizeof(int16_t); + this->input_buffer_length_ -= bytes_to_transfer; + + this->output_buffer_current_ = this->output_buffer_; + this->output_buffer_length_ += bytes_to_transfer; + } + + if (this->needs_mono_to_stereo_) { + // Convert mono to stereo + for (int i = this->output_buffer_length_ / (sizeof(int16_t)) - 1; i >= 0; --i) { + this->output_buffer_[2 * i] = this->output_buffer_[i]; + this->output_buffer_[2 * i + 1] = this->output_buffer_[i]; + } + + this->output_buffer_length_ *= 2; // double the bytes for stereo samples + } + return AudioResamplerState::RESAMPLING; +} + +int16_t AudioResampler::float_to_q15_(float q, uint32_t shift) { return (int16_t) (q * pow(2, 15 + shift)); } + +int8_t AudioResampler::generate_q15_fir_coefficients_(int16_t *fir_coeffs, const unsigned int fir_len, const float ft) { + // Even or odd length of the FIR filter + const bool is_odd = (fir_len % 2) ? (true) : (false); + const float fir_order = (float) (fir_len - 1); + + // Window coefficients + float *fir_window = (float *) malloc(fir_len * sizeof(float)); + dsps_wind_blackman_harris_f32(fir_window, fir_len); + + float *float_coeffs = (float *) malloc(fir_len * sizeof(float)); + + float max_coeff = 0.0; + float min_coeff = 0.0; + for (int i = 0; i < fir_len; i++) { + if ((i == fir_order / 2) && (is_odd)) { + float_coeffs[i] = 2 * ft; + } else { + float_coeffs[i] = sinf((2 * M_PI * ft * (i - fir_order / 2))) / (M_PI * (i - fir_order / 2)); + } + + float_coeffs[i] *= fir_window[i]; + if (float_coeffs[i] > max_coeff) { + max_coeff = float_coeffs[i]; + } + if (float_coeffs[i] < min_coeff) { + min_coeff = float_coeffs[i]; + } + } + + float max_abs_coeffs = fmaxf(fabsf(min_coeff), max_coeff); + + int32_t shift = 0; + for (int i = 1; i < 15; ++i) { + if (max_abs_coeffs < pow(2, -i)) { + ++shift; + } + } + + for (int i = 0; i < fir_len; ++i) { + fir_coeffs[i] = float_to_q15_(float_coeffs[i], shift); + } + + free(fir_window); + + return shift; +} + +} // namespace nabu +} // namespace esphome + +#endif \ No newline at end of file diff --git a/esphome/components/nabu/audio_resampler.h b/esphome/components/nabu/audio_resampler.h new file mode 100644 index 0000000..1176c1a --- /dev/null +++ b/esphome/components/nabu/audio_resampler.h @@ -0,0 +1,89 @@ +#pragma once + +#ifdef USE_ESP_IDF + +#include "audio_pipeline.h" + +#include "biquad.h" +#include "resampler.h" + +#include "esp_dsp.h" + +#include "esphome/components/media_player/media_player.h" +#include "esphome/core/ring_buffer.h" + +namespace esphome { +namespace nabu { + +static const uint32_t FIR_FILTER_LENGTH = 96; + +enum class AudioResamplerState : uint8_t { + INITIALIZED = 0, + RESAMPLING, + FINISHED, + FAILED, +}; + +class AudioResampler { + public: + AudioResampler(esphome::RingBuffer *input_ring_buffer, esphome::RingBuffer *output_ring_buffer, + size_t internal_buffer_samples); + ~AudioResampler(); + + void start(media_player::StreamInfo &stream_info); + + AudioResamplerState resample(bool stop_gracefully); + + protected: + esphome::RingBuffer *input_ring_buffer_; + esphome::RingBuffer *output_ring_buffer_; + size_t internal_buffer_samples_; + + int16_t *input_buffer_; + int16_t *input_buffer_current_; + size_t input_buffer_length_; + + int16_t *output_buffer_; + int16_t *output_buffer_current_; + size_t output_buffer_length_; + + float *float_input_buffer_; + float *float_input_buffer_current_; + size_t float_input_buffer_length_; + + float *float_output_buffer_; + float *float_output_buffer_current_; + size_t float_output_buffer_length_; + + media_player::StreamInfo stream_info_; + bool needs_resampling_{false}; + bool needs_mono_to_stereo_{false}; + + Resample *resampler_{nullptr}; + + Biquad lowpass_[2][2]; + BiquadCoefficients lowpass_coeff_; + + float sample_ratio_{1.0}; + float lowpass_ratio_{1.0}; + uint8_t channel_factor_{1}; + + bool pre_filter_{false}; + bool post_filter_{false}; + + // The following is used to create faster decimation filter when we resample from 48 kHz to 16 kHz + // TODO: There seems to be some aliasing still... + fir_s16_t fir_filter_; + + bool decimation_filter_{false}; + alignas(16) int16_t fir_filter_coeffecients_[FIR_FILTER_LENGTH]; + alignas(16) int16_t fir_delay_[FIR_FILTER_LENGTH]; + + int16_t float_to_q15_(float q, uint32_t shift); + int8_t generate_q15_fir_coefficients_(int16_t *fir_coeffs, const unsigned int fir_len, const float ft); + +}; +} // namespace nabu +} // namespace esphome + +#endif \ No newline at end of file diff --git a/esphome/components/nabu/biquad.h b/esphome/components/nabu/biquad.h index 891cacd..804a786 100644 --- a/esphome/components/nabu/biquad.h +++ b/esphome/components/nabu/biquad.h @@ -1,3 +1,4 @@ +#pragma once //////////////////////////////////////////////////////////////////////////// // **** BIQUAD **** // // Simple Biquad Filter Library // diff --git a/esphome/components/nabu/combine_streamer.h b/esphome/components/nabu/combine_streamer.h deleted file mode 100644 index 3c94d1f..0000000 --- a/esphome/components/nabu/combine_streamer.h +++ /dev/null @@ -1,50 +0,0 @@ -#pragma once - -#ifdef USE_ESP_IDF - -#include "streamer.h" - -#include "esphome/core/hal.h" -#include "esphome/core/helpers.h" -#include "esphome/core/ring_buffer.h" - -#include -#include - -namespace esphome { -namespace nabu { - -class CombineStreamer : public OutputStreamer { - public: - CombineStreamer(); - - void start(const std::string &task_name, UBaseType_t priority = 1) override; - // void stop() override; - void reset_ring_buffers() override; - - size_t media_free() { return this->media_ring_buffer_->free(); } - size_t announcement_free() { return this->announcement_ring_buffer_->free(); } - - size_t write_media(uint8_t *buffer, size_t length); - size_t write_announcement(uint8_t *buffer, size_t length); - - BaseType_t read_media_event(TaskEvent *event, TickType_t ticks_to_wait = 0) { - return xQueueReceive(this->media_event_queue_, event, ticks_to_wait); - } - BaseType_t read_announcement_event(TaskEvent *event, TickType_t ticks_to_wait = 0) { - return xQueueReceive(this->announcement_event_queue_, event, ticks_to_wait); - } - - protected: - static void combine_task_(void *params); - - std::unique_ptr media_ring_buffer_; - std::unique_ptr announcement_ring_buffer_; - - QueueHandle_t media_event_queue_; - QueueHandle_t announcement_event_queue_; -}; -} // namespace nabu -} // namespace esphome - -#endif \ No newline at end of file diff --git a/esphome/components/nabu/decode_streamer.cpp b/esphome/components/nabu/decode_streamer.cpp deleted file mode 100644 index e2e72f9..0000000 --- a/esphome/components/nabu/decode_streamer.cpp +++ /dev/null @@ -1,399 +0,0 @@ -#ifdef USE_ESP_IDF - -#include "decode_streamer.h" - -#include "flac_decoder.h" -#include "mp3_decoder.h" -#include "wav_decoder.h" -#include "streamer.h" - -#include "esphome/components/media_player/media_player.h" - -#include "esphome/core/hal.h" -#include "esphome/core/helpers.h" - -namespace esphome { -namespace nabu { - -static const size_t BUFFER_SIZE = 32768; // Audio samples -static const size_t QUEUE_COUNT = 20; - -DecodeStreamer::DecodeStreamer() { - this->input_ring_buffer_ = RingBuffer::create(BUFFER_SIZE * sizeof(int16_t)); - this->output_ring_buffer_ = RingBuffer::create(BUFFER_SIZE * sizeof(int16_t)); - - this->event_queue_ = xQueueCreate(QUEUE_COUNT, sizeof(TaskEvent)); - this->command_queue_ = xQueueCreate(QUEUE_COUNT, sizeof(CommandEvent)); - - // TODO: Handle if this fails to allocate - if ((this->input_ring_buffer_) || (this->output_ring_buffer_ == nullptr)) { - return; - } -} - -void DecodeStreamer::start(const std::string &task_name, UBaseType_t priority) { - if (this->task_handle_ == nullptr) { - xTaskCreate(DecodeStreamer::decode_task_, task_name.c_str(), 3072, (void *) this, priority, &this->task_handle_); - } -} - -size_t DecodeStreamer::write(uint8_t *buffer, size_t length) { - size_t free_bytes = this->input_ring_buffer_->free(); - size_t bytes_to_write = std::min(length, free_bytes); - if (bytes_to_write > 0) { - return this->input_ring_buffer_->write((void *) buffer, bytes_to_write); - } - return 0; -} - -void DecodeStreamer::decode_task_(void *params) { - DecodeStreamer *this_streamer = (DecodeStreamer *) params; - - TaskEvent event; - CommandEvent command_event; - - ExternalRAMAllocator allocator(ExternalRAMAllocator::ALLOW_FAILURE); - uint8_t *input_buffer = allocator.allocate(BUFFER_SIZE); - uint8_t *output_buffer = allocator.allocate(BUFFER_SIZE); - - uint8_t *input_buffer_current = input_buffer; - uint8_t *output_buffer_current = output_buffer; - - size_t input_buffer_length = 0; - size_t output_buffer_length = 0; - - if ((input_buffer == nullptr) || (output_buffer == nullptr)) { - event.type = EventType::WARNING; - event.err = ESP_ERR_NO_MEM; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - event.type = EventType::STOPPED; - event.err = ESP_OK; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - while (true) { - delay(10); - } - - return; - } - - // event.type = EventType::STARTED; - // xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - media_player::MediaFileType media_file_type = media_player::MediaFileType::NONE; - - wav_decoder::WAVDecoder wav_decoder(input_buffer); - size_t wav_header_bytes_to_read = wav_decoder.bytes_needed(); - size_t wav_buffer_offset = 0; - size_t wav_bytes_to_skip = wav_decoder.bytes_to_skip(); - size_t wav_sample_bytes_to_read = 0; - - // TODO: only initialize if needed - HMP3Decoder mp3_decoder = MP3InitDecoder(); - MP3FrameInfo mp3_frame_info; - - flac::FLACDecoder flac_decoder = - flac::FLACDecoder(input_buffer, BUFFER_SIZE, BUFFER_SIZE / 8, this_streamer->input_ring_buffer_.get()); - size_t flac_decoder_output_buffer_min_size = 0; - - bool stopping = false; - bool header_parsed = false; - - StreamInfo stream_info; - - while (true) { - if (xQueueReceive(this_streamer->command_queue_, &command_event, (0 / portTICK_PERIOD_MS)) == pdTRUE) { - if (command_event.command == CommandEventType::START) { - if ((media_file_type == media_player::MediaFileType::NONE) || (media_file_type == media_player::MediaFileType::MP3)) { - MP3FreeDecoder(mp3_decoder); - } - - media_file_type = command_event.media_file_type; - - // Set to nonsense... the decoder should update when the header is analyzed - stream_info.channels = 0; - - // Reset state of everything - memset((void *) input_buffer, 0, BUFFER_SIZE); - memset((void *) output_buffer, 0, BUFFER_SIZE); - - input_buffer_length = 0; - output_buffer_length = 0; - input_buffer_current = input_buffer; - output_buffer_current = output_buffer; - - stopping = false; - header_parsed = false; - - wav_decoder.reset(); - wav_header_bytes_to_read = wav_decoder.bytes_needed(); - wav_buffer_offset = 0; - wav_bytes_to_skip = wav_decoder.bytes_to_skip(); - wav_sample_bytes_to_read = 0; - - if (media_file_type == media_player::MediaFileType::MP3) { - mp3_decoder = MP3InitDecoder(); - } - - flac_decoder_output_buffer_min_size = 0; - } else if (command_event.command == CommandEventType::STOP) { - break; - } else if (command_event.command == CommandEventType::STOP_GRACEFULLY) { - stopping = true; - } - } - - if (media_file_type == media_player::MediaFileType::NONE) { - vTaskDelay(10 / portTICK_PERIOD_MS); - continue; - } - - if (output_buffer_length > 0) { - size_t bytes_free = this_streamer->output_ring_buffer_->free(); - size_t bytes_to_write = std::min(output_buffer_length, bytes_free); - - if (bytes_to_write > 0) { - size_t bytes_written = - this_streamer->output_ring_buffer_->write((void *) output_buffer_current, bytes_to_write); - - output_buffer_length -= bytes_written; - output_buffer_current += bytes_written; - } - } else { - if (media_file_type == media_player::MediaFileType::WAV) { - size_t bytes_available = this_streamer->input_ring_buffer_->available(); - size_t bytes_free = this_streamer->output_ring_buffer_->free(); - size_t max_bytes_to_read = std::min(bytes_free, bytes_available); - - size_t bytes_read = 0; - - if (!header_parsed && (bytes_available > 0)) { - if (wav_bytes_to_skip > 0) { - // Skip unneeded data - bytes_read = this_streamer->input_ring_buffer_->read((void *) input_buffer, - std::min(wav_bytes_to_skip, max_bytes_to_read)); - wav_bytes_to_skip -= bytes_read; - } else if (wav_header_bytes_to_read > 0) { - // Read needed header data - bytes_read = this_streamer->input_ring_buffer_->read((void *) (input_buffer + wav_buffer_offset), - wav_header_bytes_to_read); - wav_header_bytes_to_read -= bytes_read; - wav_buffer_offset += bytes_read; - - if (wav_header_bytes_to_read == 0) { - // Process header data in buffer - wav_decoder::WAVDecoderResult result = wav_decoder.next(); - if (result == wav_decoder::WAV_DECODER_SUCCESS_IN_DATA) { - // Header parsing is complete - header_parsed = true; - wav_sample_bytes_to_read = wav_decoder.chunk_bytes_left(); - - StreamInfo old_stream_info = stream_info; - - // Assume PCM and 16-bits per sample - stream_info.channels = wav_decoder.num_channels(); - stream_info.sample_rate = wav_decoder.sample_rate(); - - printf("sample channels: %d\n", stream_info.channels); - printf("sample rate: %" PRId32 "\n", stream_info.sample_rate); - printf("number of samples: %d\n", wav_sample_bytes_to_read / - (wav_decoder.num_channels() * (wav_decoder.bits_per_sample() / 8))); - - if (stream_info != old_stream_info) { - this_streamer->output_ring_buffer_->reset(); - - event.type = EventType::STARTED; - event.media_file_type = media_file_type; - event.stream_info = stream_info; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - } - } else if (result == wav_decoder::WAV_DECODER_SUCCESS_NEXT) { - // Continue parsing header - wav_bytes_to_skip = wav_decoder.bytes_to_skip(); - wav_header_bytes_to_read = wav_decoder.bytes_needed(); - wav_buffer_offset = 0; - } else { - printf("Unexpected error while parsing WAV header: %d\n", result); - break; - } // parsing state - } // if header bytes available - } // if header bytes needed - } // if header parsed - - if (header_parsed && (wav_sample_bytes_to_read > 0)) { - size_t bytes_to_read = std::min(max_bytes_to_read, BUFFER_SIZE); - if (bytes_to_read > 0) { - bytes_read = - this_streamer->input_ring_buffer_->read((void *) output_buffer, bytes_to_read, (10 / portTICK_PERIOD_MS)); - output_buffer_current = output_buffer; - output_buffer_length += bytes_read; - wav_sample_bytes_to_read -= bytes_read; - } - } - } else if (media_file_type == media_player::MediaFileType::MP3) { - // Shift unread data in buffer to start - if ((input_buffer_length > 0) && (input_buffer_length < BUFFER_SIZE)) { - memmove(input_buffer, input_buffer_current, input_buffer_length); - } - input_buffer_current = input_buffer; - - // read in new mp3 data to fill the buffer - size_t bytes_available = this_streamer->input_ring_buffer_->available(); - size_t bytes_to_read = std::min(bytes_available, BUFFER_SIZE - input_buffer_length); - size_t bytes_read = 0; - - if (bytes_to_read > 0) { - uint8_t *new_mp3_data = input_buffer + input_buffer_length; - bytes_read = - this_streamer->input_ring_buffer_->read((void *) new_mp3_data, bytes_to_read, (10 / portTICK_PERIOD_MS)); - - input_buffer_length += bytes_read; - } - - if (input_buffer_length > 0) { - // Look for the next sync word - int32_t offset = MP3FindSyncWord(input_buffer_current, input_buffer_length); - if (offset < 0) { - event.type = EventType::WARNING; - event.err = ESP_ERR_NO_MEM; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - continue; - } - - // Advance read pointer - input_buffer_current += offset; - input_buffer_length -= offset; - - int err = - MP3Decode(mp3_decoder, &input_buffer_current, (int *) &input_buffer_length, (int16_t *) output_buffer, 0); - if (err) { - switch (err) { - case ERR_MP3_MAINDATA_UNDERFLOW: - // Not a problem. Next call to decode will provide more data. - continue; - break; - case ERR_MP3_INDATA_UNDERFLOW: - // TODO: Better handle mp3 decoder errors - break; - default: - // TODO: Better handle mp3 decoder errors - break; - } - } else { - MP3GetLastFrameInfo(mp3_decoder, &mp3_frame_info); - if (mp3_frame_info.outputSamps > 0) { - int bytes_per_sample = (mp3_frame_info.bitsPerSample / 8); - output_buffer_length = mp3_frame_info.outputSamps * bytes_per_sample; - output_buffer_current = output_buffer; - - StreamInfo old_stream_info = stream_info; - stream_info.sample_rate = mp3_frame_info.samprate; - stream_info.channels = mp3_frame_info.nChans; - stream_info.bits_per_sample = mp3_frame_info.bitsPerSample; - - if (stream_info != old_stream_info) { - this_streamer->output_ring_buffer_->reset(); - - event.type = EventType::STARTED; - event.media_file_type = media_file_type; - event.stream_info = stream_info; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - }; - } - } - } - } else if (media_file_type == media_player::MediaFileType::FLAC) { - if (!header_parsed) { - if (this_streamer->input_ring_buffer_->available() > 0) { - auto result = flac_decoder.read_header(); - - if (result != flac::FLAC_DECODER_SUCCESS) { - printf("failed to read flac header. Error: %d\n", result); - break; - } - - input_buffer_length -= flac_decoder.get_bytes_index(); - input_buffer_current += flac_decoder.get_bytes_index(); - - StreamInfo old_stream_info = stream_info; - - stream_info.channels = flac_decoder.get_num_channels(); - stream_info.sample_rate = flac_decoder.get_sample_rate(); - stream_info.bits_per_sample = flac_decoder.get_sample_depth(); - - if (stream_info != old_stream_info) { - this_streamer->output_ring_buffer_->reset(); - - event.type = EventType::STARTED; - event.media_file_type = media_file_type; - event.stream_info = stream_info; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - } - - flac_decoder_output_buffer_min_size = flac_decoder.get_output_buffer_size(); - if (BUFFER_SIZE < flac_decoder_output_buffer_min_size * sizeof(int16_t)) { - printf("output buffer is not big enough"); - break; - } - header_parsed = true; - } - } else { - if (this_streamer->input_ring_buffer_->available() == 0) { - vTaskDelay(10 / portTICK_PERIOD_MS); - } - uint32_t output_samples = 0; - auto result = flac_decoder.decode_frame((int16_t *) output_buffer, &output_samples); - - if (result != flac::FLAC_DECODER_SUCCESS) { - break; - } - - output_buffer_current = output_buffer; - output_buffer_length = output_samples * sizeof(int16_t); - } - } - } - - if (this_streamer->input_ring_buffer_->available() || this_streamer->output_ring_buffer_->available() || - (output_buffer_length > 0) || (input_buffer_length > 0)) { - event.type = EventType::RUNNING; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - } else { - event.type = EventType::IDLE; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - } - - if (stopping && (this_streamer->input_ring_buffer_->available() == 0) && - (this_streamer->output_ring_buffer_->available() == 0) && (output_buffer_length == 0) && - (input_buffer_length == 0)) { - break; - } - } - event.type = EventType::STOPPING; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - this_streamer->reset_ring_buffers(); - if (media_file_type == media_player::MediaFileType::MP3) { - MP3FreeDecoder(mp3_decoder); - } - flac_decoder.free_buffers(); - allocator.deallocate(input_buffer, BUFFER_SIZE); - allocator.deallocate(output_buffer, BUFFER_SIZE); - - event.type = EventType::STOPPED; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - while (true) { - delay(10); - } -} - -void DecodeStreamer::reset_ring_buffers() { - this->input_ring_buffer_->reset(); - this->output_ring_buffer_->reset(); -} - -} // namespace nabu -} // namespace esphome -#endif diff --git a/esphome/components/nabu/decode_streamer.h b/esphome/components/nabu/decode_streamer.h deleted file mode 100644 index 856a889..0000000 --- a/esphome/components/nabu/decode_streamer.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#ifdef USE_ESP_IDF - -#include "streamer.h" - -#include "esphome/core/hal.h" -#include "esphome/core/helpers.h" -#include "esphome/core/ring_buffer.h" - -#include -#include - -namespace esphome { -namespace nabu { - -class DecodeStreamer : public OutputStreamer { - public: - DecodeStreamer(); - void start(const std::string &task_name, UBaseType_t priority = 1) override; - void reset_ring_buffers() override; - - size_t input_free() { return this->input_ring_buffer_->free(); } - - bool empty() { return (this->input_ring_buffer_->available() + this->output_ring_buffer_->available()) == 0; } - - size_t write(uint8_t *buffer, size_t length); - - protected: - static void decode_task_(void *params); - std::unique_ptr input_ring_buffer_; -}; - -} // namespace nabu -} // namespace esphome - -#endif \ No newline at end of file diff --git a/esphome/components/nabu/flac_decoder.cpp b/esphome/components/nabu/flac_decoder.cpp index df41b78..976ab3d 100644 --- a/esphome/components/nabu/flac_decoder.cpp +++ b/esphome/components/nabu/flac_decoder.cpp @@ -6,18 +6,18 @@ #include "flac_decoder.h" +#include "esp_dsp.h" + namespace flac { -FLACDecoderResult FLACDecoder::read_header() { +FLACDecoderResult FLACDecoder::read_header(size_t buffer_length) { + this->buffer_index_ = 0; + this->bytes_left_ = buffer_length; + if (this->out_of_data_) { return FLAC_DECODER_ERROR_OUT_OF_DATA; } - if (this->bytes_left_ < this->min_buffer_size_) { - // Refill the buffer before reading the header - this->fill_buffer(); - } - // File must start with 'fLaC' if (this->read_uint(32) != FLAC_MAGIC_NUMBER) { return FLAC_DECODER_ERROR_BAD_MAGIC_NUMBER; @@ -70,12 +70,14 @@ FLACDecoderResult FLACDecoder::read_header() { return FLAC_DECODER_SUCCESS; } // read_header -FLACDecoderResult FLACDecoder::decode_frame(int16_t *output_buffer, uint32_t *num_samples) { +FLACDecoderResult FLACDecoder::decode_frame(size_t buffer_length, int16_t *output_buffer, uint32_t *num_samples) { + this->buffer_index_ = 0; + this->bytes_left_ = buffer_length; *num_samples = 0; if (!this->block_samples_) { // freed in free_buffers() - esphome::ExternalRAMAllocator allocator(esphome::ExternalRAMAllocator::ALLOW_FAILURE); + esphome::ExternalRAMAllocator allocator(esphome::ExternalRAMAllocator::ALLOW_FAILURE); this->block_samples_ = allocator.allocate(this->max_block_size_ * this->num_channels_); } @@ -161,7 +163,7 @@ FLACDecoderResult FLACDecoder::decode_frame(int16_t *output_buffer, uint32_t *nu void FLACDecoder::free_buffers() { if (this->block_samples_) { // delete this->block_samples_; - esphome::ExternalRAMAllocator allocator(esphome::ExternalRAMAllocator::ALLOW_FAILURE); + esphome::ExternalRAMAllocator allocator(esphome::ExternalRAMAllocator::ALLOW_FAILURE); allocator.deallocate(this->block_samples_, this->max_block_size_ * this->num_channels_); this->block_samples_ = nullptr; } @@ -170,37 +172,6 @@ void FLACDecoder::free_buffers() { this->block_result_.shrink_to_fit(); } // free_buffers -std::size_t FLACDecoder::fill_buffer() { - if (this->bytes_left_ > 0) { - memmove(this->buffer_, this->buffer_ + this->buffer_index_, this->bytes_left_); - } - - uint8_t *new_buffer_data = this->buffer_ + this->bytes_left_; - - // TODO: This is hacky... we don't want to keep delaying every time fill_buffer is called if the file is done - // otherwise the decoder task will take a long time to end despite the media being finished - // if ((this->input_ring_buffer_->available() == 0) && (this->unsuccessful_read_count_ < 10)) { - // vTaskDelay(10 / portTICK_PERIOD_MS); - // } - - std::size_t bytes_read = 0; - if (this->input_ring_buffer_->available() > 0) { - bytes_read = this->input_ring_buffer_->read((void *) new_buffer_data, this->buffer_size_ - this->bytes_left_, - 0 / portTICK_PERIOD_MS); - } - - // if (bytes_read > 0) { - // this->unsuccessful_read_count_ = 0; - // } else if (this->unsuccessful_read_count_ < 10) { - // ++this->unsuccessful_read_count_; - // } - - this->buffer_index_ = 0; - this->bytes_left_ += bytes_read; - - return bytes_read; -} // fill_buffer - FLACDecoderResult FLACDecoder::decode_subframes(uint32_t block_size, uint32_t sample_depth, uint32_t channel_assignment) { FLACDecoderResult result = FLAC_DECODER_SUCCESS; @@ -332,10 +303,12 @@ FLACDecoderResult FLACDecoder::decode_lpc_subframe(uint32_t block_size, std::siz uint32_t precision = this->read_uint(4) + 1; int32_t shift = this->read_sint(5); - std::vector coefs; + std::vector coefs; + coefs.resize(lpc_order + 1); for (std::size_t i = 0; i < lpc_order; i++) { - coefs.push_back(this->read_sint(precision)); + coefs[lpc_order - i - 1] = this->read_sint(precision); } + coefs[lpc_order] = 1 << shift; result = decode_residuals(block_size); if (result != FLAC_DECODER_SUCCESS) { @@ -388,21 +361,23 @@ FLACDecoderResult FLACDecoder::decode_residuals(uint32_t block_size) { return FLAC_DECODER_SUCCESS; } // decode_residuals -void FLACDecoder::restore_linear_prediction(const std::vector &coefs, int32_t shift) { - for (std::size_t i = coefs.size(); i < this->block_result_.size(); i++) { - int32_t sum = 0; - for (std::size_t j = 0; j < coefs.size(); j++) { - sum += (this->block_result_[i - 1 - j] * coefs[j]); - } - this->block_result_[i] += (sum >> shift); - } +void FLACDecoder::restore_linear_prediction(const std::vector &coefs, int32_t shift) { + // The esp-dsp dot product code has a round factor built-in that can cause small differences (at most 1) in the + // decoding compared to the original implementation + for (std::size_t i = 0; i < this->block_result_.size() - coefs.size() + 1; i++) { + dsps_dotprod_s16_ae32(&this->block_result_.data()[i], coefs.data(), + &this->block_result_.data()[i + coefs.size() - 1], coefs.size(), 15 - shift); + } + // for (std::size_t i = coefs.size(); i < this->block_result_.size(); i++) { + // int32_t sum = 0; + // for (std::size_t j = 0; j < coefs.size(); j++) { + // sum += (this->block_result_[i - 1 - j] * coefs[j]); + // } + // this->block_result_[i] += (sum >> shift); + // } } // restore_linear_prediction uint32_t FLACDecoder::read_uint(std::size_t num_bits) { - if (this->bytes_left_ < this->min_buffer_size_) { - this->fill_buffer(); - } - if (this->bytes_left_ == 0) { this->out_of_data_ = true; return 0; diff --git a/esphome/components/nabu/flac_decoder.h b/esphome/components/nabu/flac_decoder.h index b831d3e..0d050ad 100644 --- a/esphome/components/nabu/flac_decoder.h +++ b/esphome/components/nabu/flac_decoder.h @@ -19,12 +19,10 @@ namespace flac { const static uint32_t FLAC_MAGIC_NUMBER = 0x664C6143; const static uint32_t FLAC_UINT_MASK[] = { - 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, - 0x0000003f, 0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, - 0x00000fff, 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 0x0001ffff, - 0x0003ffff, 0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, - 0x00ffffff, 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, - 0x3fffffff, 0x7fffffff, 0xffffffff}; + 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, + 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 0x0001ffff, + 0x0003ffff, 0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, 0x01ffffff, 0x03ffffff, + 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff}; enum FLACDecoderResult { FLAC_DECODER_SUCCESS = 0, @@ -42,39 +40,31 @@ enum FLACDecoderResult { }; // Coefficients for fixed linear prediction -const static std::vector FLAC_FIXED_COEFFICIENTS[] = {{}, - { - 1, - }, - {2, -1}, - {3, -3, 1}, - {4, -6, 4, -1}}; +const static std::vector FLAC_FIXED_COEFFICIENTS[] = { + {1}, {1, 1}, {-1, 2, 1}, {1, -3, 3, 1}, {-1, 4, -6, 4, 1}}; /* Basic FLAC decoder ported from: * https://www.nayuki.io/res/simple-flac-implementation/simple-decode-flac-to-wav.py */ class FLACDecoder { - -public: + public: /* buffer - FLAC data * buffer_size - size of the data buffer * min_buffer_size - min bytes in buffer before fill_buffer is called */ - FLACDecoder(uint8_t *buffer, const std::size_t buffer_size, const std::size_t min_buffer_size, - esphome::RingBuffer *input_ring_buffer) - : buffer_(buffer), buffer_size_(buffer_size), - min_buffer_size_(min_buffer_size), input_ring_buffer_(input_ring_buffer) {} + FLACDecoder(uint8_t *buffer) + : buffer_(buffer) {} ~FLACDecoder() { this->free_buffers(); } /* Reads FLAC header from buffer. * Must be called before decode_frame. */ - FLACDecoderResult read_header(); + FLACDecoderResult read_header(size_t buffer_length); /* Decodes a single frame of audio. * Copies num_samples into output_buffer. * Use get_output_buffer_size() to allocate output_buffer. */ - FLACDecoderResult decode_frame(int16_t *output_buffer, uint32_t *num_samples); + FLACDecoderResult decode_frame(size_t buffer_length, int16_t *output_buffer, uint32_t *num_samples); /* Frees internal memory. */ void free_buffers(); @@ -92,43 +82,33 @@ class FLACDecoder { uint32_t get_num_samples() { return this->num_samples_; } /* Maximum number of output samples per frame (after read_header()) */ - uint32_t get_output_buffer_size() { - return this->max_block_size_ * this->num_channels_; - } + uint32_t get_output_buffer_size() { return this->max_block_size_ * this->num_channels_; } std::size_t get_bytes_index() { return this->buffer_index_; } + /* Number of unread bytes in the input buffer. */ + std::size_t get_bytes_left() { return this->bytes_left_; } -protected: - /* Fills the input buffer, moving unused chunk to front. */ - std::size_t fill_buffer(); - + protected: /* Decodes one or more subframes by type. */ - FLACDecoderResult decode_subframes(uint32_t block_size, uint32_t sample_depth, - uint32_t channel_assignment); + FLACDecoderResult decode_subframes(uint32_t block_size, uint32_t sample_depth, uint32_t channel_assignment); /* Decodes a subframe by type. */ - FLACDecoderResult decode_subframe(uint32_t block_size, uint32_t sample_depth, - std::size_t block_samples_offset); + FLACDecoderResult decode_subframe(uint32_t block_size, uint32_t sample_depth, std::size_t block_samples_offset); /* Decodes a subframe with fixed coefficients. */ - FLACDecoderResult decode_fixed_subframe(uint32_t block_size, - std::size_t block_samples_offset, - uint32_t pre_order, + FLACDecoderResult decode_fixed_subframe(uint32_t block_size, std::size_t block_samples_offset, uint32_t pre_order, uint32_t sample_depth); /* Decodes a subframe with dynamic coefficients. */ - FLACDecoderResult decode_lpc_subframe(uint32_t block_size, - std::size_t block_samples_offset, - uint32_t lpc_order, + FLACDecoderResult decode_lpc_subframe(uint32_t block_size, std::size_t block_samples_offset, uint32_t lpc_order, uint32_t sample_depth); /* Decodes prediction residuals. */ FLACDecoderResult decode_residuals(uint32_t block_size); /* Completes predicted samples. */ - void restore_linear_prediction(const std::vector &coefs, - int32_t shift); + void restore_linear_prediction(const std::vector &coefs, int32_t shift); /* Reads an unsigned integer of arbitrary bit size. */ uint32_t read_uint(std::size_t num_bits); @@ -139,23 +119,13 @@ class FLACDecoder { /* Reads a rice-encoded signed integer. */ int64_t read_rice_sint(uint8_t param); - /* Number of unread bytes in the input buffer. */ - std::size_t get_bytes_left() { return this->bytes_left_; } - - /* Forces input buffer to be byte-aligned. */ void align_to_byte(); -private: + private: /* Pointer to input buffer with FLAC data. */ uint8_t *buffer_ = nullptr; - /* Size of the input buffer in bytes. */ - const std::size_t buffer_size_; - - /* Minimum bytes in the input buffer before fill_buffer() is called. */ - const std::size_t min_buffer_size_; - /* Next index to read from the input buffer. */ std::size_t buffer_index_ = 0; @@ -193,14 +163,12 @@ class FLACDecoder { uint32_t num_samples_ = 0; /* Buffer of decoded samples at full precision (all channels). */ - int32_t *block_samples_ = nullptr; + int16_t *block_samples_ = nullptr; /* Buffer of decoded samples at full precision (single channel). */ - std::vector> block_result_; - - esphome::RingBuffer *input_ring_buffer_; + std::vector> block_result_; }; -} // namespace flac +} // namespace flac #endif diff --git a/esphome/components/nabu/media_player.py b/esphome/components/nabu/media_player.py index 00565dd..fb58299 100644 --- a/esphome/components/nabu/media_player.py +++ b/esphome/components/nabu/media_player.py @@ -188,7 +188,8 @@ async def to_code(config): esp32.add_idf_component( name="esp-dsp", repo="https://github.com/kahrendt/esp-dsp", - ref="filename-fix", + ref="no-round-dot-product", + # ref="filename-fix", # repo="https://github.com/espressif/esp-dsp", # ref="v1.3.0", ) diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp index 1a9424d..8ec1dfb 100644 --- a/esphome/components/nabu/nabu_media_player.cpp +++ b/esphome/components/nabu/nabu_media_player.cpp @@ -15,23 +15,10 @@ namespace nabu { // TODO: // - Tune task memory requirements and potentially buffer sizes if issues appear -// - The various tasks are not uniform in their running/idle states meaning. Be consistent! -// - Determine the best place to yield in each task... it's inconsistent -// - Be careful of different task priorities... for example, the speaker task had issues yielding unless the delay -// was in the command queue receiving part -// - This showed up when I removed the "IDLE" and "RUNNING" task messages, caused WDT -// - Probably best to delay at the reading ring buffer stages... but this could also prevent necessary yielding -// while streaming -// - Ensure buffers are fuller before starting to stream media (especially with the resampler active) to avoid -// initial stuttering -// - Using lots of internal memory... the decoder streamer class can be optimized to avoid loading -// unnecessary parts (look at the mp3 decoder in particular) // - Biquad filters work for downsampling without handling float buffer carefully, upsampling will require some care // - Ducking improvements // - Ducking ratio probably isn't the best way to specify, as volume perception is not linear // - Add a YAML action for setting the ducking level instead of requiring a lambda -// - Verify ring buffers are reset in a safe way (only tasks that read should reset it?) -// - Eliminate code redundancy for start of media playback (url and file based) // - Clean up process around playing back local media files // - Create a registry of media files in Python // - Add a yaml action to play a specific media file @@ -161,7 +148,7 @@ static void stats_task(void *arg) { static const char *const TAG = "nabu_media_player"; void NabuMediaPlayer::setup() { - // xTaskCreatePinnedToCore(stats_task, "stats", 4096, NULL, STATS_TASK_PRIO, NULL, tskNO_AFFINITY); + xTaskCreatePinnedToCore(stats_task, "stats", 4096, NULL, STATS_TASK_PRIO, NULL, tskNO_AFFINITY); state = media_player::MEDIA_PLAYER_STATE_IDLE; @@ -170,8 +157,8 @@ void NabuMediaPlayer::setup() { this->speaker_command_queue_ = xQueueCreate(QUEUE_COUNT, sizeof(CommandEvent)); this->speaker_event_queue_ = xQueueCreate(QUEUE_COUNT, sizeof(TaskEvent)); - this->combine_streamer_ = make_unique(); - this->combine_streamer_->start("mixer"); + this->audio_mixer_ = make_unique(); + this->audio_mixer_->start("mixer", 10); if (!this->parent_->try_lock()) { this->mark_failed(); @@ -182,6 +169,16 @@ void NabuMediaPlayer::setup() { this->get_dac_volume_(); + // if (!this->write_byte(DAC_PAGE_SELECTION_REGISTER, 0x01)) { + // ESP_LOGE(TAG, "DAC failed to switch register page"); + // return; + // } + + // if (!this->write_byte(0x7B, 0b00000001)) { // 40 ms reference start up time on page 1, didn't help + // // if (!this->write_byte(0x40, 0b00010000)) { // auto mute... + // return; + // } + ESP_LOGI(TAG, "Set up nabu media player"); } @@ -279,7 +276,7 @@ void NabuMediaPlayer::speaker_task(void *params) { xQueueSend(this_speaker->speaker_event_queue_, &event, portMAX_DELAY); while (true) { - if (xQueueReceive(this_speaker->speaker_command_queue_, &command_event, (10 / portTICK_PERIOD_MS)) == pdTRUE) { + if (xQueueReceive(this_speaker->speaker_command_queue_, &command_event, (5 / portTICK_PERIOD_MS)) == pdTRUE) { if (command_event.command == CommandEventType::STOP) { // Stop signal from main thread break; @@ -295,8 +292,7 @@ void NabuMediaPlayer::speaker_task(void *params) { size_t bytes_read = 0; - bytes_read = - this_speaker->combine_streamer_->read((uint8_t *) buffer, bytes_to_read, (delay_ms / portTICK_PERIOD_MS)); + bytes_read = this_speaker->audio_mixer_->read((uint8_t *) buffer, bytes_to_read, (delay_ms / portTICK_PERIOD_MS)); if (bytes_read > 0) { size_t bytes_written; @@ -348,50 +344,23 @@ void NabuMediaPlayer::watch_media_commands_() { if (media_command.announce.has_value() && media_command.announce.value()) { if (this->announcement_pipeline_ == nullptr) { this->announcement_pipeline_ = - make_unique(this->combine_streamer_.get(), PipelineType::ANNOUNCEMENT); + make_unique(this->audio_mixer_.get(), AudioPipelineType::ANNOUNCEMENT); } - if ((this->announcement_pipeline_state_ != PipelineState::STOPPED) && - (this->announcement_pipeline_state_ != PipelineState::STOPPING)) { - command_event.command = CommandEventType::STOP; - this->announcement_pipeline_->send_command(&command_event); - } - - this->cancel_retry("ann_start"); - this->set_retry("ann_start", 20, 3, [this](uint8_t attempts_left) -> RetryResult { - if (this->announcement_pipeline_state_ != PipelineState::STOPPED) { - return RetryResult::RETRY; - } - - this->announcement_pipeline_->start(this->announcement_url_.value(), "ann_pipe"); - return RetryResult::DONE; - }); + this->announcement_pipeline_->start(this->announcement_url_.value(), "ann", 7); } else { if (this->media_pipeline_ == nullptr) { - this->media_pipeline_ = make_unique(this->combine_streamer_.get(), PipelineType::MEDIA); + this->media_pipeline_ = make_unique(this->audio_mixer_.get(), AudioPipelineType::MEDIA); } - if ((this->media_pipeline_state_ != PipelineState::STOPPED) && - (this->media_pipeline_state_ != PipelineState::STOPPING)) { - command_event.command = CommandEventType::STOP; - this->media_pipeline_->send_command(&command_event); - } - - this->cancel_retry("media_start"); - this->set_retry("media_start", 60, 3, [this](uint8_t attempts_left) -> RetryResult { - if (this->media_pipeline_state_ != PipelineState::STOPPED) { - return RetryResult::RETRY; - } + this->media_pipeline_->start(this->media_url_.value(), "media", 2); - this->media_pipeline_->start(this->media_url_.value(), "med_pipe"); - if (this->is_paused_) { - CommandEvent command_event; - command_event.command = CommandEventType::RESUME_MEDIA; - this->combine_streamer_->send_command(&command_event); - } - this->is_paused_ = false; - return RetryResult::DONE; - }); + if (this->is_paused_) { + CommandEvent command_event; + command_event.command = CommandEventType::RESUME_MEDIA; + this->audio_mixer_->send_command(&command_event); + } + this->is_paused_ = false; } } @@ -399,50 +368,23 @@ void NabuMediaPlayer::watch_media_commands_() { if (media_command.announce.has_value() && media_command.announce.value()) { if (this->announcement_pipeline_ == nullptr) { this->announcement_pipeline_ = - make_unique(this->combine_streamer_.get(), PipelineType::ANNOUNCEMENT); + make_unique(this->audio_mixer_.get(), AudioPipelineType::ANNOUNCEMENT); } - if ((this->announcement_pipeline_state_ != PipelineState::STOPPED) && - (this->announcement_pipeline_state_ != PipelineState::STOPPING)) { - command_event.command = CommandEventType::STOP; - this->announcement_pipeline_->send_command(&command_event); - } - - this->cancel_retry("ann_start"); - this->set_retry("ann_start", 20, 3, [this](uint8_t attempts_left) -> RetryResult { - if (this->announcement_pipeline_state_ != PipelineState::STOPPED) { - return RetryResult::RETRY; - } - - this->announcement_pipeline_->start(this->announcement_file_.value(), "ann_pipe"); - return RetryResult::DONE; - }); + this->announcement_pipeline_->start(this->announcement_file_.value(), "ann", 7); } else { if (this->media_pipeline_ == nullptr) { - this->media_pipeline_ = make_unique(this->combine_streamer_.get(), PipelineType::MEDIA); + this->media_pipeline_ = make_unique(this->audio_mixer_.get(), AudioPipelineType::MEDIA); } - if ((this->media_pipeline_state_ != PipelineState::STOPPED) && - (this->media_pipeline_state_ != PipelineState::STOPPING)) { - command_event.command = CommandEventType::STOP; - this->media_pipeline_->send_command(&command_event); - } - - this->cancel_retry("media_start"); - this->set_retry("media_start", 60, 3, [this](uint8_t attempts_left) -> RetryResult { - if (this->media_pipeline_state_ != PipelineState::STOPPED) { - return RetryResult::RETRY; - } + this->media_pipeline_->start(this->media_file_.value(), "media", 2); - this->media_pipeline_->start(this->media_file_.value(), "med_pipe"); - if (this->is_paused_) { - CommandEvent command_event; - command_event.command = CommandEventType::RESUME_MEDIA; - this->combine_streamer_->send_command(&command_event); - } - this->is_paused_ = false; - return RetryResult::DONE; - }); + if (this->is_paused_) { + CommandEvent command_event; + command_event.command = CommandEventType::RESUME_MEDIA; + this->audio_mixer_->send_command(&command_event); + } + this->is_paused_ = false; } } @@ -458,34 +400,34 @@ void NabuMediaPlayer::watch_media_commands_() { case media_player::MEDIA_PLAYER_COMMAND_PLAY: if (this->is_paused_) { command_event.command = CommandEventType::RESUME_MEDIA; - this->combine_streamer_->send_command(&command_event); + this->audio_mixer_->send_command(&command_event); } this->is_paused_ = false; break; case media_player::MEDIA_PLAYER_COMMAND_PAUSE: - if (this->media_pipeline_state_ == PipelineState::PLAYING) { + if (this->media_pipeline_state_ == AudioPipelineState::PLAYING) { command_event.command = CommandEventType::PAUSE_MEDIA; - this->combine_streamer_->send_command(&command_event); + this->audio_mixer_->send_command(&command_event); } this->is_paused_ = true; break; case media_player::MEDIA_PLAYER_COMMAND_STOP: command_event.command = CommandEventType::STOP; if (media_command.announce.has_value() && media_command.announce.value()) { - this->announcement_pipeline_->send_command(&command_event, (10 / portTICK_PERIOD_MS)); + this->announcement_pipeline_->stop(); } else { - this->media_pipeline_->send_command(&command_event); + this->media_pipeline_->stop(); this->is_paused_ = false; } break; case media_player::MEDIA_PLAYER_COMMAND_TOGGLE: if (this->is_paused_) { command_event.command = CommandEventType::RESUME_MEDIA; - this->combine_streamer_->send_command(&command_event); + this->audio_mixer_->send_command(&command_event); this->is_paused_ = false; } else { command_event.command = CommandEventType::PAUSE_MEDIA; - this->combine_streamer_->send_command(&command_event); + this->audio_mixer_->send_command(&command_event); this->is_paused_ = true; } break; @@ -550,82 +492,10 @@ void NabuMediaPlayer::watch_speaker_() { } } -// TODO: Reduce code redundancy void NabuMediaPlayer::watch_() { TaskEvent event; - - if (this->announcement_pipeline_ != nullptr) { - while (this->announcement_pipeline_->read_event(&event)) { - switch (event.type) { - case EventType::STARTING: - this->announcement_pipeline_state_ = PipelineState::STARTING; - ESP_LOGD(TAG, "Starting Announcement Playback"); - break; - case EventType::STARTED: - ESP_LOGD(TAG, "Started Announcement Playback"); - this->announcement_pipeline_state_ = PipelineState::STARTED; - break; - case EventType::IDLE: - this->announcement_pipeline_state_ = PipelineState::PLAYING; - break; - case EventType::RUNNING: - this->announcement_pipeline_state_ = PipelineState::PLAYING; - this->status_clear_warning(); - break; - case EventType::STOPPING: - ESP_LOGD(TAG, "Stopping Announcement Playback"); - this->announcement_pipeline_state_ = PipelineState::STOPPING; - break; - case EventType::STOPPED: { - this->announcement_pipeline_->stop(); - ESP_LOGD(TAG, "Stopped Announcement Playback"); - this->announcement_pipeline_state_ = PipelineState::STOPPED; - break; - } - case EventType::WARNING: - ESP_LOGW(TAG, "Error reading announcement: %s", esp_err_to_name(event.err)); - this->status_set_warning(esp_err_to_name(event.err)); - break; - } - } - } - - if (this->media_pipeline_ != nullptr) { - while (this->media_pipeline_->read_event(&event)) { - switch (event.type) { - case EventType::STARTING: - ESP_LOGD(TAG, "Starting Media Playback"); - this->media_pipeline_state_ = PipelineState::STARTING; - break; - case EventType::STARTED: - ESP_LOGD(TAG, "Started Media Playback"); - this->media_pipeline_state_ = PipelineState::STARTED; - break; - case EventType::IDLE: - this->media_pipeline_state_ = PipelineState::PLAYING; - break; - case EventType::RUNNING: - this->media_pipeline_state_ = PipelineState::PLAYING; - this->status_clear_warning(); - break; - case EventType::STOPPING: - this->media_pipeline_state_ = PipelineState::STOPPING; - ESP_LOGD(TAG, "Stopping Media Playback"); - break; - case EventType::STOPPED: - this->media_pipeline_->stop(); - this->media_pipeline_state_ = PipelineState::STOPPED; - ESP_LOGD(TAG, "Stopped Media Playback"); - break; - case EventType::WARNING: - ESP_LOGW(TAG, "Error reading media: %s", esp_err_to_name(event.err)); - this->status_set_warning(esp_err_to_name(event.err)); - break; - } - } - } - if (this->combine_streamer_ != nullptr) { - while (this->combine_streamer_->read_event(&event)) + if (this->audio_mixer_ != nullptr) { + while (this->audio_mixer_->read_event(&event)) ; } } @@ -638,31 +508,38 @@ void NabuMediaPlayer::loop() { // Determine state of the media player media_player::MediaPlayerState old_state = this->state; - if ((this->announcement_pipeline_state_ != PipelineState::STOPPING) && - (this->announcement_pipeline_state_ != PipelineState::STOPPED)) { + if (this->announcement_pipeline_ != nullptr) + this->announcement_pipeline_state_ = this->announcement_pipeline_->get_state(); + + if (this->media_pipeline_ != nullptr) + this->media_pipeline_state_ = this->media_pipeline_->get_state(); + + if ((this->announcement_pipeline_state_ != AudioPipelineState::STOPPING) && + (this->announcement_pipeline_state_ != AudioPipelineState::STOPPED)) { this->state = media_player::MEDIA_PLAYER_STATE_ANNOUNCING; if (this->is_idle_muted_ && !this->is_muted_) { - this->unmute_(); + // this->unmute_(); this->is_idle_muted_ = false; + ESP_LOGD(TAG, "Unmuting as output is not idle"); } } else { if (this->is_paused_) { this->state = media_player::MEDIA_PLAYER_STATE_PAUSED; if (!this->is_idle_muted_) { - this->mute_(); + // this->mute_(); this->is_idle_muted_ = true; } - } else if ((this->media_pipeline_state_ == PipelineState::STOPPING) || - (this->media_pipeline_state_ == PipelineState::STOPPED)) { + } else if ((this->media_pipeline_state_ == AudioPipelineState::STOPPING) || + (this->media_pipeline_state_ == AudioPipelineState::STOPPED)) { this->state = media_player::MEDIA_PLAYER_STATE_IDLE; if (!this->is_idle_muted_) { - this->mute_(); + // this->mute_(); this->is_idle_muted_ = true; } } else { this->state = media_player::MEDIA_PLAYER_STATE_PLAYING; if (this->is_idle_muted_ && !this->is_muted_) { - this->unmute_(); + // this->unmute_(); this->is_idle_muted_ = false; } } @@ -674,11 +551,11 @@ void NabuMediaPlayer::loop() { } void NabuMediaPlayer::set_ducking_ratio(float ducking_ratio) { - if (this->combine_streamer_ != nullptr) { + if (this->audio_mixer_ != nullptr) { CommandEvent command_event; command_event.command = CommandEventType::DUCK; command_event.ducking_ratio = ducking_ratio; - this->combine_streamer_->send_command(&command_event); + this->audio_mixer_->send_command(&command_event); } } @@ -717,14 +594,16 @@ void NabuMediaPlayer::control(const media_player::MediaPlayerCall &call) { if (call.get_volume().has_value()) { media_command.volume = call.get_volume().value(); - xQueueSend(this->media_control_command_queue_, &media_command, 0); // Wait 0 ticks for queue to be free, volume sets aren't that important! + // Wait 0 ticks for queue to be free, volume sets aren't that important! + xQueueSend(this->media_control_command_queue_, &media_command, 0); return; } if (call.get_command().has_value()) { media_command.command = call.get_command().value(); TickType_t ticks_to_wait = portMAX_DELAY; - if ((call.get_command().value() == media_player::MEDIA_PLAYER_COMMAND_VOLUME_UP) || (call.get_command().value() == media_player::MEDIA_PLAYER_COMMAND_VOLUME_DOWN)) { + if ((call.get_command().value() == media_player::MEDIA_PLAYER_COMMAND_VOLUME_UP) || + (call.get_command().value() == media_player::MEDIA_PLAYER_COMMAND_VOLUME_DOWN)) { ticks_to_wait = 0; // Wait 0 ticks for queue to be free, volume sets aren't that important! } xQueueSend(this->media_control_command_queue_, &media_command, ticks_to_wait); diff --git a/esphome/components/nabu/nabu_media_player.h b/esphome/components/nabu/nabu_media_player.h index a21e24b..4ff4260 100644 --- a/esphome/components/nabu/nabu_media_player.h +++ b/esphome/components/nabu/nabu_media_player.h @@ -7,8 +7,8 @@ #include "esphome/core/component.h" #include "esphome/core/ring_buffer.h" -#include "streamer.h" -#include "pipeline.h" +#include "audio_mixer.h" +#include "audio_pipeline.h" #include #include @@ -32,15 +32,6 @@ static const uint8_t DAC_MUTE_PAGE = 0x01; static const uint8_t DAC_MUTE_COMMAND = 0x40; static const uint8_t DAC_UNMUTE_COMMAND = 0x00; -enum class PipelineState : uint8_t { - STARTING, - STARTED, - PLAYING, - PAUSED, - STOPPING, - STOPPED, -}; - struct MediaCallCommand { optional command; optional volume; @@ -49,11 +40,6 @@ struct MediaCallCommand { optional new_file; }; -// struct MediaFile { -// const uint8_t *data; -// MediaFileType file_type; -// }; - class NabuMediaPlayer : public Component, public media_player::MediaPlayer, public i2s_audio::I2SAudioOut, @@ -95,17 +81,18 @@ class NabuMediaPlayer : public Component, optional announcement_file_{}; // only modified by control fucntion QueueHandle_t media_control_command_queue_; - // Reads commands from media_control_command_queue_. Starts pipelines and mixer if necessary. Writes to the pipeline - // command queues + // Reads commands from media_control_command_queue_. Starts pipelines and mixer if necessary. void watch_media_commands_(); - std::unique_ptr media_pipeline_; - std::unique_ptr announcement_pipeline_; - std::unique_ptr combine_streamer_; - // Monitors the pipelines' and mixer's event queues. Only function that modifies pipeline_state_ variables + std::unique_ptr media_pipeline_; + std::unique_ptr announcement_pipeline_; + std::unique_ptr audio_mixer_; + + // Monitors the mixer task void watch_(); - PipelineState media_pipeline_state_{PipelineState::STOPPED}; - PipelineState announcement_pipeline_state_{PipelineState::STOPPED}; + + AudioPipelineState media_pipeline_state_{AudioPipelineState::STOPPED}; + AudioPipelineState announcement_pipeline_state_{AudioPipelineState::STOPPED}; void watch_speaker_(); static void speaker_task(void *params); diff --git a/esphome/components/nabu/pipeline.cpp b/esphome/components/nabu/pipeline.cpp deleted file mode 100644 index d18b11f..0000000 --- a/esphome/components/nabu/pipeline.cpp +++ /dev/null @@ -1,287 +0,0 @@ -#ifdef USE_ESP_IDF - -#include "esphome/core/helpers.h" -#include "pipeline.h" - -namespace esphome { -namespace nabu { - -static const size_t BUFFER_SIZE = 32768 * sizeof(int16_t); // Audio samples - -static const size_t QUEUE_COUNT = 10; - -Pipeline::Pipeline(CombineStreamer *mixer, PipelineType pipeline_type) { - this->reader_ = make_unique(); - this->decoder_ = make_unique(); - this->resampler_ = make_unique(); - - this->event_queue_ = xQueueCreate(QUEUE_COUNT, sizeof(TaskEvent)); - this->command_queue_ = xQueueCreate(QUEUE_COUNT, sizeof(CommandEvent)); - - this->mixer_ = mixer; - this->pipeline_type_ = pipeline_type; -} - -size_t Pipeline::read(uint8_t *buffer, size_t length) { - size_t available_bytes = this->available(); - size_t bytes_to_read = std::min(length, available_bytes); - if (bytes_to_read > 0) { - return this->decoder_->read(buffer, bytes_to_read); - } - return 0; -} - -void Pipeline::start(const std::string &uri, const std::string &task_name, UBaseType_t priority) { - this->reader_->start(uri, task_name + "_reader"); - this->decoder_->start(task_name + "_decoder"); - this->resampler_->start(task_name + "_resampler"); - if (this->task_handle_ == nullptr) { - xTaskCreate(Pipeline::transfer_task_, task_name.c_str(), 8096, (void *) this, priority, &this->task_handle_); - } -} - -void Pipeline::start(media_player::MediaFile *media_file, const std::string &task_name, UBaseType_t priority) { - this->reader_->start(media_file, task_name + "_reader"); - this->decoder_->start(task_name + "_decoder"); - this->resampler_->start(task_name + "_resampler"); - if (this->task_handle_ == nullptr) { - xTaskCreate(Pipeline::transfer_task_, task_name.c_str(), 8096, (void *) this, priority, &this->task_handle_); - } -} -void Pipeline::stop() { - vTaskDelete(this->task_handle_); - this->task_handle_ = nullptr; - - xQueueReset(this->event_queue_); - xQueueReset(this->command_queue_); -} - -BaseType_t Pipeline::send_command(CommandEvent *command, TickType_t ticks_to_wait) { - return xQueueSend(this->command_queue_, command, ticks_to_wait); -} - -BaseType_t Pipeline::read_event(TaskEvent *event, TickType_t ticks_to_wait) { - return xQueueReceive(this->event_queue_, event, ticks_to_wait); -} - -void Pipeline::transfer_task_(void *params) { - Pipeline *this_pipeline = (Pipeline *) params; - - TaskEvent event; - CommandEvent command_event; - - event.type = EventType::STARTING; - event.err = ESP_OK; - xQueueSend(this_pipeline->event_queue_, &event, portMAX_DELAY); - - ExternalRAMAllocator allocator(ExternalRAMAllocator::ALLOW_FAILURE); - uint8_t *transfer_buffer = allocator.allocate(BUFFER_SIZE); - if (transfer_buffer == nullptr) { - event.type = EventType::WARNING; - event.err = ESP_ERR_NO_MEM; - xQueueSend(this_pipeline->event_queue_, &event, portMAX_DELAY); - - event.type = EventType::STOPPED; - event.err = ESP_OK; - xQueueSend(this_pipeline->event_queue_, &event, portMAX_DELAY); - - while (true) { - delay(10); - } - - return; - } - - event.type = EventType::STARTED; - xQueueSend(this_pipeline->event_queue_, &event, portMAX_DELAY); - - bool stopping_gracefully = true; - - this_pipeline->reading_ = true; - this_pipeline->decoding_ = true; - this_pipeline->resampling_ = true; - - while (true) { - if (xQueueReceive(this_pipeline->command_queue_, &command_event, (10 / portTICK_PERIOD_MS)) == pdTRUE) { - if (command_event.command == CommandEventType::START) { - this_pipeline->reader_->send_command(&command_event); - } else if (command_event.command == CommandEventType::STOP) { - this_pipeline->reader_->send_command(&command_event); - this_pipeline->decoder_->send_command(&command_event); - this_pipeline->resampler_->send_command(&command_event); - stopping_gracefully = false; - } else if (command_event.command == CommandEventType::STOP_GRACEFULLY) { - this_pipeline->reader_->send_command(&command_event); - stopping_gracefully = true; - } - } - - size_t bytes_to_read = 0; - size_t bytes_read = 0; - size_t bytes_written = 0; - - // Move data from resampler to the mixer - if (this_pipeline->pipeline_type_ == PipelineType::MEDIA) { - bytes_to_read = std::min(this_pipeline->mixer_->media_free(), BUFFER_SIZE); - bytes_read = this_pipeline->resampler_->read(transfer_buffer, bytes_to_read); - bytes_written += this_pipeline->mixer_->write_media(transfer_buffer, bytes_read); - } else if (this_pipeline->pipeline_type_ == PipelineType::ANNOUNCEMENT) { - bytes_to_read = std::min(this_pipeline->mixer_->announcement_free(), BUFFER_SIZE); - bytes_read = this_pipeline->resampler_->read(transfer_buffer, bytes_to_read); - bytes_written += this_pipeline->mixer_->write_announcement(transfer_buffer, bytes_read); - } - - // Move data from decoder to resampler - bytes_to_read = std::min(this_pipeline->resampler_->input_free(), BUFFER_SIZE); - bytes_read = this_pipeline->decoder_->read(transfer_buffer, bytes_to_read); - bytes_written = this_pipeline->resampler_->write(transfer_buffer, bytes_read); - - // Move data from http reader to decoder - bytes_to_read = std::min(this_pipeline->decoder_->input_free(), BUFFER_SIZE); - bytes_read = this_pipeline->reader_->read(transfer_buffer, bytes_to_read); - bytes_written = this_pipeline->decoder_->write(transfer_buffer, bytes_read); - - this_pipeline->watch_(stopping_gracefully); - - if (!this_pipeline->reading_ && !this_pipeline->decoding_ && !this_pipeline->resampling_) { - break; - } - } - - event.type = EventType::STOPPING; - xQueueSend(this_pipeline->event_queue_, &event, portMAX_DELAY); - - allocator.deallocate(transfer_buffer, BUFFER_SIZE); - - event.type = EventType::STOPPED; - xQueueSend(this_pipeline->event_queue_, &event, portMAX_DELAY); - - while (true) { - delay(10); - } -} - -void Pipeline::watch_(bool stopping_gracefully) { - TaskEvent event; - CommandEvent command_event; - - while (this->reader_->read_event(&event)) { - switch (event.type) { - case EventType::STARTING: - this->reading_ = true; - break; - case EventType::STARTED: - this->reading_ = true; - command_event.command = CommandEventType::START; - command_event.media_file_type = event.media_file_type; - this->decoder_->send_command(&command_event); - break; - case EventType::IDLE: - this->reading_ = true; - break; - case EventType::RUNNING: - this->reading_ = true; - break; - case EventType::STOPPING: - this->reading_ = true; - break; - case EventType::STOPPED: { - if (stopping_gracefully) { - command_event.command = CommandEventType::STOP_GRACEFULLY; - this->decoder_->send_command(&command_event); - } - this->reader_->stop(); - this->reading_ = false; - break; - } - case EventType::WARNING: - this->reading_ = false; - xQueueSend(this->event_queue_, &event, portMAX_DELAY); - break; - } - } - - while (this->decoder_->read_event(&event)) { - switch (event.type) { - case EventType::STARTING: - this->decoding_ = true; - break; - case EventType::STARTED: - this->decoding_ = true; - command_event.command = CommandEventType::START; - command_event.media_file_type = event.media_file_type; - command_event.stream_info = event.stream_info; - this->resampler_->send_command(&command_event); - break; - case EventType::IDLE: - this->decoding_ = true; - break; - case EventType::RUNNING: - this->decoding_ = true; - break; - case EventType::STOPPING: - this->decoding_ = true; - break; - case EventType::STOPPED: - if (stopping_gracefully) { - command_event.command = CommandEventType::STOP_GRACEFULLY; - this->resampler_->send_command(&command_event); - } - this->decoder_->stop(); - this->decoding_ = false; - break; - case EventType::WARNING: - this->decoding_ = false; - xQueueSend(this->event_queue_, &event, portMAX_DELAY); - break; - } - } - - while (this->resampler_->read_event(&event)) { - switch (event.type) { - case EventType::STARTING: - this->resampling_ = true; - break; - case EventType::STARTED: - this->resampling_ = true; - break; - case EventType::IDLE: - this->resampling_ = true; - break; - case EventType::RUNNING: - this->resampling_ = true; - break; - case EventType::STOPPING: - this->resampling_ = true; - break; - case EventType::STOPPED: - if (!stopping_gracefully) { - if (this->pipeline_type_ == PipelineType::ANNOUNCEMENT) { - command_event.command = CommandEventType::CLEAR_ANNOUNCEMENT; - this->mixer_->send_command(&command_event); - } else if (this->pipeline_type_ == PipelineType::MEDIA) { - command_event.command = CommandEventType::CLEAR_MEDIA; - this->mixer_->send_command(&command_event); - } - } - this->resampler_->stop(); - this->resampling_ = false; - break; - case EventType::WARNING: - this->resampling_ = false; - xQueueSend(this->event_queue_, &event, portMAX_DELAY); - break; - } - } - if (this->reading_ || this->decoding_ || this->resampling_) { - event.type = EventType::RUNNING; - xQueueSend(this->event_queue_, &event, portMAX_DELAY); - } else { - event.type = EventType::IDLE; - xQueueSend(this->event_queue_, &event, portMAX_DELAY); - } -} - -} // namespace nabu -} // namespace esphome -#endif \ No newline at end of file diff --git a/esphome/components/nabu/pipeline.h b/esphome/components/nabu/pipeline.h deleted file mode 100644 index 095a06c..0000000 --- a/esphome/components/nabu/pipeline.h +++ /dev/null @@ -1,64 +0,0 @@ -#pragma once - -#ifdef USE_ESP_IDF - -#include "streamer.h" -#include "combine_streamer.h" -#include "decode_streamer.h" -#include "resample_streamer.h" - -#include "esphome/components/media_player/media_player.h" - -#include "esphome/core/hal.h" -#include "esphome/core/helpers.h" -#include "esphome/core/ring_buffer.h" - -#include -#include - -namespace esphome { -namespace nabu { - -class Pipeline { - public: - Pipeline(CombineStreamer *mixer, PipelineType pipeline_type); - - size_t available() { return this->decoder_->available(); } - - size_t read(uint8_t *buffer, size_t length); - - void start(const std::string &uri, const std::string &task_name, UBaseType_t priority = 1); - void start(media_player::MediaFile *media_file, const std::string &task_name, UBaseType_t priority = 1); - - void stop(); - - BaseType_t send_command(CommandEvent *command, TickType_t ticks_to_wait = portMAX_DELAY); - - BaseType_t read_event(TaskEvent *event, TickType_t ticks_to_wait = 0); - - protected: - static void transfer_task_(void *params); - void watch_(bool stopping_gracefully); - - std::unique_ptr reader_; - std::unique_ptr decoder_; - std::unique_ptr resampler_; - CombineStreamer *mixer_; - - TaskHandle_t task_handle_{nullptr}; - - QueueHandle_t event_queue_; - QueueHandle_t command_queue_; - - std::string current_uri_{}; - PipelineType pipeline_type_; - - bool reading_{false}; - bool decoding_{false}; - bool resampling_{false}; -}; - -} // namespace nabu -} // namespace esphome - -#endif \ No newline at end of file diff --git a/esphome/components/nabu/resample_streamer.cpp b/esphome/components/nabu/resample_streamer.cpp deleted file mode 100644 index 123400c..0000000 --- a/esphome/components/nabu/resample_streamer.cpp +++ /dev/null @@ -1,354 +0,0 @@ -#ifdef USE_ESP_IDF - -#include "resample_streamer.h" - -#include "biquad.h" -#include "resampler.h" -#include "streamer.h" - -#include "esphome/core/hal.h" -#include "esphome/core/helpers.h" - -namespace esphome { -namespace nabu { - -static const size_t BUFFER_SIZE = 32768; // Audio samples -static const size_t QUEUE_COUNT = 20; - -static const size_t NUM_TAPS = 32; -static const size_t NUM_FILTERS = 32; -static const bool USE_PRE_POST_FILTER = true; - -ResampleStreamer::ResampleStreamer() { - this->input_ring_buffer_ = RingBuffer::create(BUFFER_SIZE * sizeof(int16_t)); - this->output_ring_buffer_ = RingBuffer::create(BUFFER_SIZE * sizeof(int16_t)); - - this->event_queue_ = xQueueCreate(QUEUE_COUNT, sizeof(TaskEvent)); - this->command_queue_ = xQueueCreate(QUEUE_COUNT, sizeof(CommandEvent)); - - // TODO: Handle if this fails to allocate - if ((this->input_ring_buffer_) || (this->output_ring_buffer_ == nullptr)) { - return; - } -} - -void ResampleStreamer::start(const std::string &task_name, UBaseType_t priority) { - if (this->task_handle_ == nullptr) { - xTaskCreate(ResampleStreamer::resample_task_, task_name.c_str(), 3072, (void *) this, priority, - &this->task_handle_); - } -} - -size_t ResampleStreamer::write(uint8_t *buffer, size_t length) { - size_t free_bytes = this->input_ring_buffer_->free(); - size_t bytes_to_write = std::min(length, free_bytes); - if (bytes_to_write > 0) { - return this->input_ring_buffer_->write((void *) buffer, bytes_to_write); - } - return 0; -} - -void ResampleStreamer::resample_task_(void *params) { - ResampleStreamer *this_streamer = (ResampleStreamer *) params; - - TaskEvent event; - CommandEvent command_event; - - ExternalRAMAllocator allocator(ExternalRAMAllocator::ALLOW_FAILURE); - int16_t *input_buffer = allocator.allocate(BUFFER_SIZE); - int16_t *output_buffer = allocator.allocate(BUFFER_SIZE); - - ExternalRAMAllocator float_allocator(ExternalRAMAllocator::ALLOW_FAILURE); - float *float_input_buffer = float_allocator.allocate(BUFFER_SIZE); - float *float_output_buffer = float_allocator.allocate(BUFFER_SIZE); - - int16_t *input_buffer_current = input_buffer; - int16_t *output_buffer_current = output_buffer; - - size_t input_buffer_length = 0; - size_t output_buffer_length = 0; - - if ((input_buffer == nullptr) || (output_buffer == nullptr)) { - event.type = EventType::WARNING; - event.err = ESP_ERR_NO_MEM; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - event.type = EventType::STOPPED; - event.err = ESP_OK; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - while (true) { - delay(10); - } - - return; - } - - event.type = EventType::STARTED; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - StreamInfo stream_info; - stream_info.channels = 0; // will be set once we receive the start command - - bool resample = false; - Resample *resampler = nullptr; - float sample_ratio = 1.0; - float lowpass_ratio = 1.0; - int flags = 0; - Biquad lowpass[2][2]; - BiquadCoefficients lowpass_coeff; - - bool pre_filter = false; - bool post_filter = false; - - bool stopping = false; - - uint8_t channel_factor = 1; - - while (true) { - if (xQueueReceive(this_streamer->command_queue_, &command_event, (0 / portTICK_PERIOD_MS)) == pdTRUE) { - if (command_event.command == CommandEventType::START) { - stream_info = command_event.stream_info; - - if (stream_info.channels > 0) { - constexpr uint8_t output_channels = 2; // fix to stereo output for now - channel_factor = output_channels / stream_info.channels; - } - float resample_rate = 16000.0f; - if (stream_info.sample_rate != 16000) { - resample = true; - sample_ratio = resample_rate / stream_info.sample_rate; - if (sample_ratio < 1.0) { - lowpass_ratio -= (10.24 / 16); - - if (lowpass_ratio < 0.84) { - lowpass_ratio = 0.84; - } - - if (lowpass_ratio < sample_ratio) { - // avoid discontinuities near unity sample ratios - lowpass_ratio = sample_ratio; - } - } - if (lowpass_ratio * sample_ratio < 0.98 && USE_PRE_POST_FILTER) { - double cutoff = lowpass_ratio * sample_ratio / 2.0; - biquad_lowpass(&lowpass_coeff, cutoff); - pre_filter = true; - } - - if (lowpass_ratio / sample_ratio < 0.98 && USE_PRE_POST_FILTER && !pre_filter) { - double cutoff = lowpass_ratio / sample_ratio / 2.0; - biquad_lowpass(&lowpass_coeff, cutoff); - post_filter = true; - } - - if (pre_filter || post_filter) { - for (int i = 0; i < stream_info.channels; ++i) { - biquad_init(&lowpass[i][0], &lowpass_coeff, 1.0); - biquad_init(&lowpass[i][1], &lowpass_coeff, 1.0); - } - } - - if (sample_ratio < 1.0) { - resampler = resampleInit(stream_info.channels, NUM_TAPS, NUM_FILTERS, sample_ratio * lowpass_ratio, - flags | INCLUDE_LOWPASS); - } else if (lowpass_ratio < 1.0) { - resampler = - resampleInit(stream_info.channels, NUM_TAPS, NUM_FILTERS, lowpass_ratio, flags | INCLUDE_LOWPASS); - } else { - resampler = resampleInit(stream_info.channels, NUM_TAPS, NUM_FILTERS, 1.0, flags); - } - - resampleAdvancePosition(resampler, NUM_TAPS / 2.0); - } else { - resample = false; - } - - input_buffer_current = input_buffer; - output_buffer_current = output_buffer; - input_buffer_length = 0; // measured in bytes - output_buffer_length = 0; // measured in bytes - - stopping = false; - } else if (command_event.command == CommandEventType::STOP) { - break; - } else if (command_event.command == CommandEventType::STOP_GRACEFULLY) { - stopping = true; - } - } - - if (output_buffer_length > 0) { - // If we have data in the internal output buffer, only work on writing it to the ring buffer - - size_t bytes_to_write = std::min(output_buffer_length, this_streamer->output_ring_buffer_->free()); - size_t bytes_written = 0; - if (bytes_to_write > 0) { - bytes_written = this_streamer->output_ring_buffer_->write((void *) output_buffer_current, bytes_to_write); - - output_buffer_current += bytes_written / sizeof(int16_t); - output_buffer_length -= bytes_written; - } - } else { - ////// - // Refill input buffer - ////// - - // Move old data to the start of the buffer - if (input_buffer_length > 0) { - memmove((void *) input_buffer, (void *) input_buffer_current, input_buffer_length); - } - input_buffer_current = input_buffer; - - // Copy new data to the end of the of the buffer - size_t bytes_available = this_streamer->input_ring_buffer_->available(); - size_t bytes_to_read = std::min(bytes_available, BUFFER_SIZE * sizeof(int16_t) - input_buffer_length); - - // size_t bytes_to_read = 0; - // if (resample) { - // size_t output_bytes_free = this_streamer->output_ring_buffer_->free(); - // size_t input_bytes_available = bytes_available; - - // size_t required_samples_to_fill_free = resampleGetRequiredSamples(resampler, output_bytes_free, - // sample_ratio); - - // if (required_samples_to_fill_free > bytes_available + input_buffer_length) { - // // we can't fill in the output buffer fully with what is available, so just give as many samples as we can - // bytes_to_read = std::min(bytes_available, BUFFER_SIZE * sizeof(int16_t) - input_buffer_length); - // } else { - // bytes_to_read = required_samples_to_fill_free - input_buffer_length; - // } - // } else { - // bytes_to_read = std::min(bytes_available, BUFFER_SIZE * sizeof(int16_t) - input_buffer_length); - // } - - if (bytes_to_read > 0) { - int16_t *new_input_buffer_data = input_buffer + input_buffer_length / sizeof(int16_t); - size_t bytes_read = this_streamer->input_ring_buffer_->read((void *) new_input_buffer_data, bytes_to_read, - (10 / portTICK_PERIOD_MS)); - - input_buffer_length += bytes_read; - } - - ///// - // Resample here - ///// - - if (resample) { - if (input_buffer_length > 0) { - // Samples are indiviudal int16 values. Frames include 1 sample for mono and 2 samples for stereo - // Be careful converting between bytes, samples, and frames! - // 1 sample = 2 bytes = sizeof(int16_t) - // if mono: - // 1 frame = 1 sample - // if stereo: - // 1 frame = 2 samples (left and right) - - size_t samples_read = input_buffer_length / sizeof(int16_t); - - // This is inefficient! It reconverts any samples that weren't used in the previous resampling run - for (int i = 0; i < samples_read; ++i) { - float_input_buffer[i] = static_cast(input_buffer[i]) / 32768.0f; - } - - size_t frames_read = samples_read / stream_info.channels; - - // The low pass filter seems to be causing glitches... probably because samples are repeated due to the above - // ineffeciency! - if (pre_filter) { - for (int i = 0; i < stream_info.channels; ++i) { - biquad_apply_buffer(&lowpass[i][0], float_input_buffer + i, frames_read, stream_info.channels); - biquad_apply_buffer(&lowpass[i][1], float_input_buffer + i, frames_read, stream_info.channels); - } - } - - ResampleResult res; - - res = resampleProcessInterleaved(resampler, float_input_buffer, frames_read, float_output_buffer, - BUFFER_SIZE / channel_factor, sample_ratio); - - size_t frames_used = res.input_used; - size_t samples_used = frames_used * stream_info.channels; - - size_t frames_generated = res.output_generated; - if (post_filter) { - for (int i = 0; i < stream_info.channels; ++i) { - biquad_apply_buffer(&lowpass[i][0], float_output_buffer + i, frames_generated, stream_info.channels); - biquad_apply_buffer(&lowpass[i][1], float_output_buffer + i, frames_generated, stream_info.channels); - } - } - - size_t samples_generated = frames_generated * stream_info.channels; - - for (int i = 0; i < samples_generated; ++i) { - output_buffer[i] = static_cast(float_output_buffer[i] * 32767); - } - - input_buffer_current += samples_used; - input_buffer_length -= samples_used * sizeof(int16_t); - - output_buffer_current = output_buffer; - output_buffer_length += samples_generated * sizeof(int16_t); - } - - } else { - size_t bytes_to_transfer = std::min(BUFFER_SIZE * sizeof(int16_t) / channel_factor, input_buffer_length); - std::memcpy((void *) output_buffer, (void *) input_buffer_current, bytes_to_transfer); - - input_buffer_current += bytes_to_transfer / sizeof(int16_t); - input_buffer_length -= bytes_to_transfer; - - output_buffer_current = output_buffer; - output_buffer_length += bytes_to_transfer; - } - - if (stream_info.channels == 1) { - // Convert mono to stereo - for (int i = output_buffer_length / (sizeof(int16_t)) - 1; i >= 0; --i) { - output_buffer[2 * i] = output_buffer[i]; - output_buffer[2 * i + 1] = output_buffer[i]; - } - - output_buffer_length *= 2; // double the bytes for stereo samples - } - } - - if (this_streamer->input_ring_buffer_->available() || this_streamer->output_ring_buffer_->available() || - (output_buffer_length > 0) || (input_buffer_length > 0)) { - event.type = EventType::RUNNING; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - } else if (stopping) { - break; - } else { - event.type = EventType::IDLE; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - } - } - event.type = EventType::STOPPING; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - this_streamer->reset_ring_buffers(); - allocator.deallocate(input_buffer, BUFFER_SIZE); - allocator.deallocate(output_buffer, BUFFER_SIZE); - float_allocator.deallocate(float_input_buffer, BUFFER_SIZE); - float_allocator.deallocate(float_output_buffer, BUFFER_SIZE); - - if (resampler != nullptr) { - resampleFree(resampler); - } - - event.type = EventType::STOPPED; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - while (true) { - delay(10); - } -} - -void ResampleStreamer::reset_ring_buffers() { - this->input_ring_buffer_->reset(); - this->output_ring_buffer_->reset(); -} - -} // namespace nabu -} // namespace esphome -#endif \ No newline at end of file diff --git a/esphome/components/nabu/resample_streamer.h b/esphome/components/nabu/resample_streamer.h deleted file mode 100644 index 403deb7..0000000 --- a/esphome/components/nabu/resample_streamer.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once - -#ifdef USE_ESP_IDF - -#include "streamer.h" - -#include "esphome/core/hal.h" -#include "esphome/core/helpers.h" -#include "esphome/core/ring_buffer.h" - -#include -#include - -namespace esphome { -namespace nabu { -class ResampleStreamer : public OutputStreamer { - public: - ResampleStreamer(); - void start(const std::string &task_name, UBaseType_t priority = 1) override; - void reset_ring_buffers() override; - - size_t input_free() { return this->input_ring_buffer_->free(); } - - bool empty() { return (this->input_ring_buffer_->available() + this->output_ring_buffer_->available()) == 0; } - - size_t write(uint8_t *buffer, size_t length); - - protected: - static void resample_task_(void *params); - std::unique_ptr input_ring_buffer_; -}; - -} // namespace nabu -} // namespace esphome - -#endif \ No newline at end of file diff --git a/esphome/components/nabu/streamer.cpp b/esphome/components/nabu/streamer.cpp deleted file mode 100644 index 8d21276..0000000 --- a/esphome/components/nabu/streamer.cpp +++ /dev/null @@ -1,286 +0,0 @@ -#ifdef USE_ESP_IDF - -#include "streamer.h" - -#include "esphome/core/hal.h" -#include "esphome/core/helpers.h" - -#include - -namespace esphome { -namespace nabu { - -// Major TODOs: -// - Rename/split up file, it contains more than one class - -static const size_t HTTP_BUFFER_SIZE = 16 * 8192; -static const size_t QUEUE_COUNT = 20; - -void OutputStreamer::stop() { - vTaskDelete(this->task_handle_); - this->task_handle_ = nullptr; - - xQueueReset(this->event_queue_); - xQueueReset(this->command_queue_); -} - -HTTPStreamer::HTTPStreamer() { - this->output_ring_buffer_ = RingBuffer::create(HTTP_BUFFER_SIZE); - // TODO: Handle if this fails to allocate - if (this->output_ring_buffer_ == nullptr) { - return; - } - - this->event_queue_ = xQueueCreate(QUEUE_COUNT, sizeof(TaskEvent)); - this->command_queue_ = xQueueCreate(QUEUE_COUNT, sizeof(CommandEvent)); -} - -media_player::MediaFileType HTTPStreamer::establish_connection_(esp_http_client_handle_t *client) { - this->cleanup_connection_(client); - - if (this->current_uri_.empty()) { - return media_player::MediaFileType::NONE; - } - - esp_http_client_config_t config = { - .url = this->current_uri_.c_str(), - .cert_pem = nullptr, - .disable_auto_redirect = false, - .max_redirection_count = 10, - }; - *client = esp_http_client_init(&config); - - if (client == nullptr) { - printf("Failed to initialize HTTP connection"); - return media_player::MediaFileType::NONE; - } - - esp_err_t err; - if ((err = esp_http_client_open(*client, 0)) != ESP_OK) { - printf("Failed to open HTTP connection"); - this->cleanup_connection_(client); - return media_player::MediaFileType::NONE; - } - - int content_length = esp_http_client_fetch_headers(*client); - - // TODO: Figure out how to handle this better! Music Assistant streams don't send a content length - // if (content_length <= 0) { - // printf("Fialed to get content length"); - // this->cleanup_connection_(client); - // return media_player::MediaFileType::NONE; - // } - - char url[500]; - if (esp_http_client_get_url(*client, url, 500) != ESP_OK) { - this->cleanup_connection_(client); - return media_player::MediaFileType::NONE; - } - - std::string url_string = url; - - if (str_endswith(url_string, ".wav")) { - return media_player::MediaFileType::WAV; - } else if (str_endswith(url_string, ".mp3")) { - return media_player::MediaFileType::MP3; - } else if (str_endswith(url_string, ".flac")) { - return media_player::MediaFileType::FLAC; - } - - return media_player::MediaFileType::NONE; -} - -void HTTPStreamer::start_http(const std::string &task_name, UBaseType_t priority) { - if (this->task_handle_ == nullptr) { - xTaskCreate(HTTPStreamer::read_task_, task_name.c_str(), 3072, (void *) this, priority, &this->task_handle_); - } -} - -void HTTPStreamer::start_file(const std::string &task_name, UBaseType_t priority) { - if (this->task_handle_ == nullptr) { - xTaskCreate(HTTPStreamer::file_read_task_, task_name.c_str(), 3072, (void *) this, priority, &this->task_handle_); - } -} - -void HTTPStreamer::start(const std::string &task_name, UBaseType_t priority) { - if (this->task_handle_ == nullptr) { - xTaskCreate(HTTPStreamer::read_task_, task_name.c_str(), 3072, (void *) this, priority, &this->task_handle_); - } -} - -void HTTPStreamer::start(const std::string &uri, const std::string &task_name, UBaseType_t priority) { - this->current_uri_ = uri; - this->start_http(task_name, priority); - CommandEvent command_event; - command_event.command = CommandEventType::START; - this->send_command(&command_event); -} - -void HTTPStreamer::start(media_player::MediaFile *media_file, const std::string &task_name, UBaseType_t priority) { - this->current_media_file_ = media_file; - this->start_file(task_name, priority); - CommandEvent command_event; - command_event.command = CommandEventType::START; - command_event.media_file_type = media_file->file_type; - this->send_command(&command_event); -} - -void HTTPStreamer::cleanup_connection_(esp_http_client_handle_t *client) { - if (*client != nullptr) { - esp_http_client_close(*client); - esp_http_client_cleanup(*client); - *client = nullptr; - } -} - -void HTTPStreamer::read_task_(void *params) { - HTTPStreamer *this_streamer = (HTTPStreamer *) params; - - TaskEvent event; - CommandEvent command_event; - - esp_http_client_handle_t client = nullptr; - - ExternalRAMAllocator allocator(ExternalRAMAllocator::ALLOW_FAILURE); - uint8_t *buffer = allocator.allocate(HTTP_BUFFER_SIZE); - - if (buffer == nullptr) { - event.type = EventType::WARNING; - event.err = ESP_ERR_NO_MEM; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - event.type = EventType::STOPPED; - event.err = ESP_OK; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - while (true) { - delay(10); - } - - return; - } - - media_player::MediaFileType file_type = media_player::MediaFileType::NONE; - - while (true) { - if (xQueueReceive(this_streamer->command_queue_, &command_event, (10 / portTICK_PERIOD_MS)) == pdTRUE) { - if (command_event.command == CommandEventType::START) { - file_type = this_streamer->establish_connection_(&client); - if (file_type == media_player::MediaFileType::NONE) { - this_streamer->cleanup_connection_(&client); - break; - } else { - event.type = EventType::STARTED; - event.media_file_type = file_type; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - } - } else if (command_event.command == CommandEventType::STOP) { - this_streamer->cleanup_connection_(&client); - break; - } else if (command_event.command == CommandEventType::STOP_GRACEFULLY) { - // Waits until output ring buffer is empty before stopping the loop - this_streamer->cleanup_connection_(&client); - } - } - - if (client != nullptr) { - size_t bytes_to_read = this_streamer->output_ring_buffer_->free(); - int received_len = 0; - if (bytes_to_read > 0) { - received_len = esp_http_client_read(client, (char *) buffer, bytes_to_read); - } - - if (received_len > 0) { - size_t bytes_written = this_streamer->output_ring_buffer_->write((void *) buffer, received_len); - } else if (received_len < 0) { - // Error situation - } - - if (esp_http_client_is_complete_data_received(client)) { - this_streamer->cleanup_connection_(&client); - } - - event.type = EventType::RUNNING; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - } else if (this_streamer->output_ring_buffer_->available() > 0) { - // the connection is closed but there is still data in the ring buffer - event.type = EventType::IDLE; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - } else if (file_type != media_player::MediaFileType::NONE) { - // there is no active connection, the ring buffer is empty, and a file was actually read, so move to end task - break; - } - } - event.type = EventType::STOPPING; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - this_streamer->reset_ring_buffers(); - allocator.deallocate(buffer, HTTP_BUFFER_SIZE); - - event.type = EventType::STOPPED; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - while (true) { - delay(10); - } -} -void HTTPStreamer::file_read_task_(void *params) { - HTTPStreamer *this_streamer = (HTTPStreamer *) params; - - TaskEvent event; - CommandEvent command_event; - - media_player::MediaFileType file_type = media_player::MediaFileType::NONE; - size_t bytes_left = 0; - const uint8_t *file_current = nullptr; - - while (true) { - if (xQueueReceive(this_streamer->command_queue_, &command_event, (10 / portTICK_PERIOD_MS)) == pdTRUE) { - if (command_event.command == CommandEventType::START) { - file_type = this_streamer->current_media_file_->file_type; - bytes_left = this_streamer->current_media_file_->length; - file_current = this_streamer->current_media_file_->data; - - event.type = EventType::STARTED; - event.media_file_type = file_type; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - } else if (command_event.command == CommandEventType::STOP) { - break; - } - } - - if (file_type != media_player::MediaFileType::NONE) { - if (bytes_left > 0) { - size_t bytes_to_read = std::min(bytes_left, this_streamer->output_ring_buffer_->free()); - if (bytes_to_read > 0) { - size_t bytes_written = this_streamer->output_ring_buffer_->write((void *) file_current, bytes_to_read); - bytes_left -= bytes_written; - file_current += bytes_written; - } - event.type = EventType::RUNNING; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - } else if (this_streamer->output_ring_buffer_->available() > 0) { - event.type = EventType::IDLE; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - } else { - break; - } - } - } - event.type = EventType::STOPPING; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - this_streamer->reset_ring_buffers(); - - event.type = EventType::STOPPED; - xQueueSend(this_streamer->event_queue_, &event, portMAX_DELAY); - - while (true) { - delay(10); - } -} - -} // namespace nabu -} // namespace esphome - -#endif \ No newline at end of file diff --git a/esphome/components/nabu/wav_decoder.cpp b/esphome/components/nabu/wav_decoder.cpp index 32effbd..dc421f6 100644 --- a/esphome/components/nabu/wav_decoder.cpp +++ b/esphome/components/nabu/wav_decoder.cpp @@ -7,12 +7,12 @@ WAVDecoderResult WAVDecoder::next() { switch (this->state_) { case WAV_DECODER_BEFORE_RIFF: { - this->chunk_name_ = std::string((const char *)this->buffer_, 4); + this->chunk_name_ = std::string((const char *)*this->buffer_, 4); if (this->chunk_name_ != "RIFF") { return WAV_DECODER_ERROR_NO_RIFF; } - this->chunk_bytes_left_ = *((uint32_t *)(this->buffer_ + 4)); + this->chunk_bytes_left_ = *((uint32_t *)(*this->buffer_ + 4)); if ((this->chunk_bytes_left_ % 2) != 0) { // Pad byte this->chunk_bytes_left_++; @@ -25,7 +25,7 @@ WAVDecoderResult WAVDecoder::next() { } case WAV_DECODER_BEFORE_WAVE: { - this->chunk_name_ = std::string((const char *)this->buffer_, 4); + this->chunk_name_ = std::string((const char *)*this->buffer_, 4); if (this->chunk_name_ != "WAVE") { return WAV_DECODER_ERROR_NO_WAVE; } @@ -37,8 +37,8 @@ WAVDecoderResult WAVDecoder::next() { } case WAV_DECODER_BEFORE_FMT: { - this->chunk_name_ = std::string((const char *)this->buffer_, 4); - this->chunk_bytes_left_ = *((uint32_t *)(this->buffer_ + 4)); + this->chunk_name_ = std::string((const char *)*this->buffer_, 4); + this->chunk_bytes_left_ = *((uint32_t *)(*this->buffer_ + 4)); if ((this->chunk_bytes_left_ % 2) != 0) { // Pad byte this->chunk_bytes_left_++; @@ -74,9 +74,9 @@ WAVDecoderResult WAVDecoder::next() { * bits per sample (uint16_t) * [rest of format chunk] */ - this->num_channels_ = *((uint16_t *)(this->buffer_ + 2)); - this->sample_rate_ = *((uint32_t *)(this->buffer_ + 4)); - this->bits_per_sample_ = *((uint16_t *)(this->buffer_ + 14)); + this->num_channels_ = *((uint16_t *)(*this->buffer_ + 2)); + this->sample_rate_ = *((uint32_t *)(*this->buffer_ + 4)); + this->bits_per_sample_ = *((uint16_t *)(*this->buffer_ + 14)); // Next chunk this->state_ = WAV_DECODER_BEFORE_DATA; @@ -85,8 +85,8 @@ WAVDecoderResult WAVDecoder::next() { } case WAV_DECODER_BEFORE_DATA: { - this->chunk_name_ = std::string((const char *)this->buffer_, 4); - this->chunk_bytes_left_ = *((uint32_t *)(this->buffer_ + 4)); + this->chunk_name_ = std::string((const char *)*this->buffer_, 4); + this->chunk_bytes_left_ = *((uint32_t *)(*this->buffer_ + 4)); if ((this->chunk_bytes_left_ % 2) != 0) { // Pad byte this->chunk_bytes_left_++; diff --git a/esphome/components/nabu/wav_decoder.h b/esphome/components/nabu/wav_decoder.h index 13bcba1..4d5635c 100644 --- a/esphome/components/nabu/wav_decoder.h +++ b/esphome/components/nabu/wav_decoder.h @@ -54,7 +54,7 @@ enum WAVDecoderResult { class WAVDecoder { public: - WAVDecoder(uint8_t *buffer) : buffer_(buffer){}; + WAVDecoder(uint8_t **buffer) : buffer_(buffer){}; ~WAVDecoder(){}; WAVDecoderState state() { return this->state_; } @@ -86,7 +86,7 @@ class WAVDecoder { } protected: - uint8_t *buffer_; + uint8_t **buffer_; WAVDecoderState state_ = WAV_DECODER_BEFORE_RIFF; std::size_t bytes_needed_ = 8; // chunk name + size std::size_t bytes_to_skip_ = 0; diff --git a/voice-kit.yaml b/voice-kit.yaml index 2de05c3..2b8055e 100644 --- a/voice-kit.yaml +++ b/voice-kit.yaml @@ -58,6 +58,7 @@ esp32: CONFIG_ESP32S3_DATA_CACHE_64KB: "y" CONFIG_ESP32S3_DATA_CACHE_LINE_64B: "y" CONFIG_ESP32_S3_BOX_BOARD: "y" + CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY: "y" # CONFIG_FREERTOS_USE_TRACE_FACILITY: "y" # CONFIG_FREERTOS_GENERATE_RUN_TIME_STATS: "y" From 1a3fe1a984c6f19b888e708977dfcf0d852370c0 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Thu, 1 Aug 2024 13:36:06 -0400 Subject: [PATCH 2/2] disable task debug logging --- esphome/components/nabu/nabu_media_player.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp index 8ec1dfb..f817d62 100644 --- a/esphome/components/nabu/nabu_media_player.cpp +++ b/esphome/components/nabu/nabu_media_player.cpp @@ -148,7 +148,7 @@ static void stats_task(void *arg) { static const char *const TAG = "nabu_media_player"; void NabuMediaPlayer::setup() { - xTaskCreatePinnedToCore(stats_task, "stats", 4096, NULL, STATS_TASK_PRIO, NULL, tskNO_AFFINITY); + // xTaskCreatePinnedToCore(stats_task, "stats", 4096, NULL, STATS_TASK_PRIO, NULL, tskNO_AFFINITY); state = media_player::MEDIA_PLAYER_STATE_IDLE;