diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index df7f328..12b244a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,12 +1,14 @@ -set(COMMON_SRC "webrtc.cpp" "main.cpp" "http.cpp") +set(COMMON_SRC "webrtc.cpp" "main.cpp" "http.cpp" "media.cpp") if(IDF_TARGET STREQUAL linux) idf_component_register( - SRCS ${COMMON_SRC} + SRCS ${COMMON_SRC} "platform_linux.cpp" REQUIRES peer esp-libopus esp_http_client) + target_link_libraries(${COMPONENT_LIB} PRIVATE pulse pulse-simple) + target_link_libraries(${COMPONENT_LIB} PRIVATE "-lbsd") else() idf_component_register( - SRCS ${COMMON_SRC} "wifi.cpp" "media.cpp" + SRCS ${COMMON_SRC} "platform_esp32s3.cpp" "wifi.cpp" REQUIRES driver esp_wifi nvs_flash peer esp_psram esp-libopus esp_http_client) endif() diff --git a/src/http.cpp b/src/http.cpp index a960953..c06c3d0 100644 --- a/src/http.cpp +++ b/src/http.cpp @@ -3,6 +3,7 @@ #include #include "main.h" +#include "platform.h" #ifndef MIN #define MIN(a, b) (((a) < (b)) ? (a) : (b)) @@ -34,9 +35,7 @@ esp_err_t oai_http_event_handler(esp_http_client_event_t *evt) { ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_DATA, len=%d", evt->data_len); if (esp_http_client_is_chunked_response(evt->client)) { ESP_LOGE(LOG_TAG, "Chunked HTTP response not supported"); -#ifndef LINUX_BUILD - esp_restart(); -#endif + oai_platform_restart(); } if (output_len == 0 && evt->user_data) { @@ -88,9 +87,7 @@ void oai_http_request(char *offer, char *answer) { esp_err_t err = esp_http_client_perform(client); if (err != ESP_OK || esp_http_client_get_status_code(client) != 201) { ESP_LOGE(LOG_TAG, "Error perform http request %s", esp_err_to_name(err)); -#ifndef LINUX_BUILD - esp_restart(); -#endif + oai_platform_restart(); } esp_http_client_cleanup(client); diff --git a/src/main.cpp b/src/main.cpp index bd84e2d..e6ea1a4 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -4,29 +4,19 @@ #include #include -#ifndef LINUX_BUILD -#include "nvs_flash.h" +#include "platform.h" -extern "C" void app_main(void) { - esp_err_t ret = nvs_flash_init(); - if (ret == ESP_ERR_NVS_NO_FREE_PAGES || - ret == ESP_ERR_NVS_NEW_VERSION_FOUND) { - ESP_ERROR_CHECK(nvs_flash_erase()); - ret = nvs_flash_init(); - } - ESP_ERROR_CHECK(ret); +#ifndef LINUX_BUILD +#define MAIN extern "C" void app_main(void) +#else +#define MAIN int main(void) +#endif +MAIN { ESP_ERROR_CHECK(esp_event_loop_create_default()); peer_init(); - oai_init_audio_capture(); + oai_platform_init_audio_capture(); oai_init_audio_decoder(); oai_wifi(); oai_webrtc(); } -#else -int main(void) { - ESP_ERROR_CHECK(esp_event_loop_create_default()); - peer_init(); - oai_webrtc(); -} -#endif diff --git a/src/main.h b/src/main.h index 198ef5c..9ef6cff 100644 --- a/src/main.h +++ b/src/main.h @@ -1,3 +1,6 @@ +#ifndef _MAIN_H_ +#define _MAIN_H_ + #include #define LOG_TAG "realtimeapi-sdk" @@ -11,3 +14,5 @@ void oai_send_audio(PeerConnection *peer_connection); void oai_audio_decode(uint8_t *data, size_t size); void oai_webrtc(); void oai_http_request(char *offer, char *answer); + +#endif \ No newline at end of file diff --git a/src/media.cpp b/src/media.cpp index 37fbfa4..6e8b923 100644 --- a/src/media.cpp +++ b/src/media.cpp @@ -1,144 +1,69 @@ -#include #include +#include #include "main.h" +#include "platform.h" #define OPUS_OUT_BUFFER_SIZE 1276 // 1276 bytes is recommended by opus_encode -#define SAMPLE_RATE 8000 -#define BUFFER_SAMPLES 320 - -#define MCLK_PIN 0 -#define DAC_BCLK_PIN 15 -#define DAC_LRCLK_PIN 16 -#define DAC_DATA_PIN 17 -#define ADC_BCLK_PIN 38 -#define ADC_LRCLK_PIN 39 -#define ADC_DATA_PIN 40 +#define SAMPLE_RATE 48000 +#define FRAME_SIZE SAMPLE_RATE * 20 / 1000 +#define CAPTURE_CHANNELS 2 +#define PLAYBACK_CHANNELS 2 #define OPUS_ENCODER_BITRATE 30000 #define OPUS_ENCODER_COMPLEXITY 0 -void oai_init_audio_capture() { - i2s_config_t i2s_config_out = { - .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX), - .sample_rate = SAMPLE_RATE, - .bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT, - .channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT, - .communication_format = I2S_COMM_FORMAT_I2S_MSB, - .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1, - .dma_buf_count = 8, - .dma_buf_len = BUFFER_SAMPLES, - .use_apll = 1, - .tx_desc_auto_clear = true, - }; - if (i2s_driver_install(I2S_NUM_0, &i2s_config_out, 0, NULL) != ESP_OK) { - printf("Failed to configure I2S driver for audio output"); - return; - } - - i2s_pin_config_t pin_config_out = { - .mck_io_num = MCLK_PIN, - .bck_io_num = DAC_BCLK_PIN, - .ws_io_num = DAC_LRCLK_PIN, - .data_out_num = DAC_DATA_PIN, - .data_in_num = I2S_PIN_NO_CHANGE, - }; - if (i2s_set_pin(I2S_NUM_0, &pin_config_out) != ESP_OK) { - printf("Failed to set I2S pins for audio output"); - return; - } - i2s_zero_dma_buffer(I2S_NUM_0); - - i2s_config_t i2s_config_in = { - .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX), - .sample_rate = SAMPLE_RATE, - .bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT, - .channel_format = I2S_CHANNEL_FMT_ONLY_LEFT, - .communication_format = I2S_COMM_FORMAT_I2S_MSB, - .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1, - .dma_buf_count = 8, - .dma_buf_len = BUFFER_SAMPLES, - .use_apll = 1, - }; - if (i2s_driver_install(I2S_NUM_1, &i2s_config_in, 0, NULL) != ESP_OK) { - printf("Failed to configure I2S driver for audio input"); - return; - } - - i2s_pin_config_t pin_config_in = { - .mck_io_num = MCLK_PIN, - .bck_io_num = ADC_BCLK_PIN, - .ws_io_num = ADC_LRCLK_PIN, - .data_out_num = I2S_PIN_NO_CHANGE, - .data_in_num = ADC_DATA_PIN, - }; - if (i2s_set_pin(I2S_NUM_1, &pin_config_in) != ESP_OK) { - printf("Failed to set I2S pins for audio input"); - return; - } -} - -static opus_int16 *output_buffer = NULL; -static size_t output_buffer_size = BUFFER_SAMPLES * sizeof(opus_int16); static OpusDecoder *opus_decoder = NULL; +static OpusEncoder *opus_encoder = NULL; + +static opus_int16 output_buffer[FRAME_SIZE * PLAYBACK_CHANNELS]; +static opus_int16 input_buffer[FRAME_SIZE * CAPTURE_CHANNELS]; +static uint8_t encoder_output_buffer[OPUS_OUT_BUFFER_SIZE]; void oai_init_audio_decoder() { int decoder_error = 0; - opus_decoder = opus_decoder_create(SAMPLE_RATE, 2, &decoder_error); + opus_decoder = + opus_decoder_create(SAMPLE_RATE, PLAYBACK_CHANNELS, &decoder_error); if (decoder_error != OPUS_OK) { printf("Failed to create OPUS decoder"); return; } - - output_buffer = (opus_int16 *)malloc(output_buffer_size); } void oai_audio_decode(uint8_t *data, size_t size) { - int decoded_size = - opus_decode(opus_decoder, data, size, output_buffer, BUFFER_SAMPLES, 0); + int decoded_size = opus_decode(opus_decoder, data, size, output_buffer, + sizeof(output_buffer), 0); if (decoded_size > 0) { size_t bytes_written = 0; - i2s_write(I2S_NUM_0, output_buffer, output_buffer_size, - &bytes_written, portMAX_DELAY); + oai_platform_audio_write((char *)output_buffer, sizeof(output_buffer), + &bytes_written); } } -static OpusEncoder *opus_encoder = NULL; -static opus_int16 *encoder_input_buffer = NULL; -static uint8_t *encoder_output_buffer = NULL; - void oai_init_audio_encoder() { int encoder_error; - opus_encoder = opus_encoder_create(SAMPLE_RATE, 1, OPUS_APPLICATION_VOIP, - &encoder_error); + opus_encoder = opus_encoder_create(SAMPLE_RATE, CAPTURE_CHANNELS, + OPUS_APPLICATION_VOIP, &encoder_error); if (encoder_error != OPUS_OK) { printf("Failed to create OPUS encoder"); return; } - if (opus_encoder_init(opus_encoder, SAMPLE_RATE, 1, OPUS_APPLICATION_VOIP) != - OPUS_OK) { - printf("Failed to initialize OPUS encoder"); - return; - } - opus_encoder_ctl(opus_encoder, OPUS_SET_BITRATE(OPUS_ENCODER_BITRATE)); opus_encoder_ctl(opus_encoder, OPUS_SET_COMPLEXITY(OPUS_ENCODER_COMPLEXITY)); opus_encoder_ctl(opus_encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); - encoder_input_buffer = (opus_int16 *)malloc(BUFFER_SAMPLES); - encoder_output_buffer = (uint8_t *)malloc(OPUS_OUT_BUFFER_SIZE); } void oai_send_audio(PeerConnection *peer_connection) { size_t bytes_read = 0; - i2s_read(I2S_NUM_1, encoder_input_buffer, BUFFER_SAMPLES, &bytes_read, - portMAX_DELAY); + oai_platform_audio_read((char *)input_buffer, sizeof(input_buffer), + &bytes_read); auto encoded_size = - opus_encode(opus_encoder, encoder_input_buffer, BUFFER_SAMPLES / 2, - encoder_output_buffer, OPUS_OUT_BUFFER_SIZE); + opus_encode(opus_encoder, input_buffer, FRAME_SIZE, encoder_output_buffer, + sizeof(encoder_output_buffer)); peer_connection_send_audio(peer_connection, encoder_output_buffer, encoded_size); diff --git a/src/media.h b/src/media.h new file mode 100644 index 0000000..1a1ad25 --- /dev/null +++ b/src/media.h @@ -0,0 +1,5 @@ +#pragma once + +#include "peer_connection.h" + +void oai_send_audio(PeerConnection *peer_connection); diff --git a/src/platform.h b/src/platform.h new file mode 100644 index 0000000..c5a31a7 --- /dev/null +++ b/src/platform.h @@ -0,0 +1,17 @@ +#ifndef _PLATFORM_H_ +#define _PLATFORM_H_ + +#include + +#include "peer_connection.h" + +void oai_platform_init(void); +void oai_platform_restart(void); +void oai_platform_init_audio_capture(void); +void oai_platform_audio_write(char *output_buffer, size_t output_buffer_size, + size_t *bytes_written); +void oai_platform_audio_read(char *input_buffer, size_t input_buffer_size, + size_t *bytes_read); +void oai_platform_send_audio_task(PeerConnection *peer_connection); + +#endif \ No newline at end of file diff --git a/src/platform_esp32s3.cpp b/src/platform_esp32s3.cpp new file mode 100644 index 0000000..991c2c1 --- /dev/null +++ b/src/platform_esp32s3.cpp @@ -0,0 +1,126 @@ +#include + +#include "esp_system.h" +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "media.h" +#include "nvs_flash.h" +#include "peer_connection.h" +#include "platform.h" + +#define TICK_INTERVAL 15 + +void oai_platform_restart() { + esp_restart(); +} + +#define MCLK_PIN 0 +#define DAC_BCLK_PIN 15 +#define DAC_LRCLK_PIN 16 +#define DAC_DATA_PIN 17 +#define ADC_BCLK_PIN 38 +#define ADC_LRCLK_PIN 39 +#define ADC_DATA_PIN 40 + +#define SAMPLE_RATE 8000 +#define BUFFER_SAMPLES 320 + +void oai_platform_init(void) { + esp_err_t ret = nvs_flash_init(); + if (ret == ESP_ERR_NVS_NO_FREE_PAGES || + ret == ESP_ERR_NVS_NEW_VERSION_FOUND) { + ESP_ERROR_CHECK(nvs_flash_erase()); + ret = nvs_flash_init(); + } + ESP_ERROR_CHECK(ret); +} + +void oai_platform_init_audio_capture() { + i2s_config_t i2s_config_out = { + .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX), + .sample_rate = SAMPLE_RATE, + .bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT, + .channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT, + .communication_format = I2S_COMM_FORMAT_I2S_MSB, + .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1, + .dma_buf_count = 8, + .dma_buf_len = BUFFER_SAMPLES, + .use_apll = 1, + .tx_desc_auto_clear = true, + }; + if (i2s_driver_install(I2S_NUM_0, &i2s_config_out, 0, NULL) != ESP_OK) { + printf("Failed to configure I2S driver for audio output"); + return; + } + + i2s_pin_config_t pin_config_out = { + .mck_io_num = MCLK_PIN, + .bck_io_num = DAC_BCLK_PIN, + .ws_io_num = DAC_LRCLK_PIN, + .data_out_num = DAC_DATA_PIN, + .data_in_num = I2S_PIN_NO_CHANGE, + }; + if (i2s_set_pin(I2S_NUM_0, &pin_config_out) != ESP_OK) { + printf("Failed to set I2S pins for audio output"); + return; + } + i2s_zero_dma_buffer(I2S_NUM_0); + + i2s_config_t i2s_config_in = { + .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX), + .sample_rate = SAMPLE_RATE, + .bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT, + .channel_format = I2S_CHANNEL_FMT_ONLY_LEFT, + .communication_format = I2S_COMM_FORMAT_I2S_MSB, + .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1, + .dma_buf_count = 8, + .dma_buf_len = BUFFER_SAMPLES, + .use_apll = 1, + }; + if (i2s_driver_install(I2S_NUM_1, &i2s_config_in, 0, NULL) != ESP_OK) { + printf("Failed to configure I2S driver for audio input"); + return; + } + + i2s_pin_config_t pin_config_in = { + .mck_io_num = MCLK_PIN, + .bck_io_num = ADC_BCLK_PIN, + .ws_io_num = ADC_LRCLK_PIN, + .data_out_num = I2S_PIN_NO_CHANGE, + .data_in_num = ADC_DATA_PIN, + }; + if (i2s_set_pin(I2S_NUM_1, &pin_config_in) != ESP_OK) { + printf("Failed to set I2S pins for audio input"); + return; + } +} + +void oai_platform_audio_write(char *output_buffer, size_t output_buffer_size, + size_t *bytes_written) { + i2s_write(I2S_NUM_0, output_buffer, output_buffer_size, bytes_written, + portMAX_DELAY); +} + +void oai_platform_audio_read(char *input_buffer, size_t input_buffer_size, + size_t *bytes_read) { + i2s_read(I2S_NUM_1, input_buffer, input_buffer_size, bytes_read, + portMAX_DELAY); +} + +static TaskHandle_t task_handle; +static StaticTask_t task_buffer; + +static void oai_send_audio_task(void *user_data) { + while (1) { + oai_send_audio((PeerConnection *)user_data); + vTaskDelay(pdMS_TO_TICKS(TICK_INTERVAL)); + } +} + +void oai_platform_send_audio_task(PeerConnection *peer_connection) { + StackType_t *stack_memory = (StackType_t *)heap_caps_malloc( + 20000 * sizeof(StackType_t), MALLOC_CAP_SPIRAM); + xTaskCreateStaticPinnedToCore(oai_send_audio_task, "audio_publisher", 20000, + peer_connection, 7, stack_memory, &task_buffer, + 0); +} diff --git a/src/platform_linux.cpp b/src/platform_linux.cpp new file mode 100644 index 0000000..cd33236 --- /dev/null +++ b/src/platform_linux.cpp @@ -0,0 +1,121 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "media.h" +#include "peer_connection.h" +#include "platform.h" + +#define RATE 48000 +#define CHANNELS 2 + +static pa_simple *capture = NULL; +static pa_simple *playback = NULL; + +void oai_platform_init(void) {} + +void oai_platform_restart() { + abort(); +} + +void oai_wifi(void) {} + +void oai_platform_init_audio_capture() { + int error; + + // Sample format spec + pa_sample_spec ss = { + .format = PA_SAMPLE_S16LE, .rate = RATE, .channels = CHANNELS}; + + // Stream creation attributes + pa_buffer_attr attr = { + .maxlength = (uint32_t)-1, + .tlength = (uint32_t)-1, + .prebuf = (uint32_t)-1, + .minreq = (uint32_t)-1, + .fragsize = (uint32_t)-1, + }; + + // Create capture stream + capture = pa_simple_new(NULL, // Use default server + "Loopback", // Application name + PA_STREAM_RECORD, // Stream direction + NULL, // Use default device + "Capture", // Stream description + &ss, // Sample format + NULL, // Use default channel map + &attr, // Buffer attributes + &error // Error code + ); + + if (!capture) { + fprintf(stderr, "Failed to create capture stream: %s\n", + pa_strerror(error)); + return; + } + + // Create playback stream + playback = pa_simple_new(NULL, // Use default server + "Loopback", // Application name + PA_STREAM_PLAYBACK, // Stream direction + NULL, // Use default device + "Playback", // Stream description + &ss, // Sample format + NULL, // Use default channel map + &attr, // Buffer attributes + &error // Error code + ); + + if (!playback) { + fprintf(stderr, "Failed to create playback stream: %s\n", + pa_strerror(error)); + pa_simple_free(capture); + return; + } + + printf("Starting audio loopback (Ctrl+C to stop)...\n"); +} + +void oai_platform_audio_write(char *output_buffer, size_t output_buffer_size, + size_t *bytes_written) { + int error; + if (pa_simple_write(playback, output_buffer, output_buffer_size, &error) < + 0) { + fprintf(stderr, "Read failed: %s\n", pa_strerror(error)); + return; + } + + *bytes_written = output_buffer_size; +} + +void oai_platform_audio_read(char *input_buffer, size_t input_buffer_size, + size_t *bytes_read) { + int error; + if (pa_simple_read(capture, input_buffer, input_buffer_size, &error) < 0) { + fprintf(stderr, "Read failed: %s\n", pa_strerror(error)); + return; + } + + *bytes_read = input_buffer_size; +} + +static TaskHandle_t task_handle; + +static void oai_send_audio_task(void *user_data) { + while (1) { + oai_send_audio((PeerConnection *)user_data); + } +} + +void oai_platform_send_audio_task(PeerConnection *peer_connection) { + xTaskCreatePinnedToCore(oai_send_audio_task, "audio_publisher", 20000, + peer_connection, -7, &task_handle, 0); +} diff --git a/src/webrtc.cpp b/src/webrtc.cpp index 1b000c5..30344a2 100644 --- a/src/webrtc.cpp +++ b/src/webrtc.cpp @@ -1,30 +1,18 @@ -#ifndef LINUX_BUILD -#include -#include -#endif - #include #include +#include #include +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" #include "main.h" +#include "peer_connection.h" +#include "platform.h" #define TICK_INTERVAL 15 static PeerConnection *peer_connection = NULL; -#ifndef LINUX_BUILD -static StaticTask_t task_buffer; -void oai_send_audio_task(void *user_data) { - oai_init_audio_encoder(); - - while (1) { - oai_send_audio(peer_connection); - vTaskDelay(pdMS_TO_TICKS(TICK_INTERVAL)); - } -} -#endif - static void oai_onconnectionstatechange_task(PeerConnectionState state, void *user_data) { ESP_LOGI(LOG_TAG, "PeerConnectionState: %s", @@ -32,16 +20,10 @@ static void oai_onconnectionstatechange_task(PeerConnectionState state, if (state == PEER_CONNECTION_DISCONNECTED || state == PEER_CONNECTION_CLOSED) { -#ifndef LINUX_BUILD - esp_restart(); -#endif + oai_platform_restart(); } else if (state == PEER_CONNECTION_CONNECTED) { -#ifndef LINUX_BUILD - StackType_t *stack_memory = (StackType_t *)heap_caps_malloc( - 20000 * sizeof(StackType_t), MALLOC_CAP_SPIRAM); - xTaskCreateStaticPinnedToCore(oai_send_audio_task, "audio_publisher", 20000, - NULL, 7, stack_memory, &task_buffer, 0); -#endif + oai_init_audio_encoder(); + oai_platform_send_audio_task(peer_connection); } } @@ -58,9 +40,7 @@ void oai_webrtc() { .video_codec = CODEC_NONE, .datachannel = DATA_CHANNEL_NONE, .onaudiotrack = [](uint8_t *data, size_t size, void *userdata) -> void { -#ifndef LINUX_BUILD oai_audio_decode(data, size); -#endif }, .onvideotrack = NULL, .on_request_keyframe = NULL, @@ -70,9 +50,7 @@ void oai_webrtc() { peer_connection = peer_connection_create(&peer_connection_config); if (peer_connection == NULL) { ESP_LOGE(LOG_TAG, "Failed to create peer connection"); -#ifndef LINUX_BUILD - esp_restart(); -#endif + oai_platform_restart(); } peer_connection_oniceconnectionstatechange(peer_connection,