Skip to content

Commit

Permalink
Updating of GoogleRtcAudioProcessing component to provide a more gene…
Browse files Browse the repository at this point in the history
…ral behavior

Changes in the GoogleRtcAudioProcessing component to
--Allow sample rate and number of channels to explicitly be specified.
--Update the memory management.
--Allow parameters to be passed.
--Allow more than mono to be processed
--Allow parameters to be specified from Kconfig

Signed-off-by: Per Åhgren <[email protected]>
  • Loading branch information
Per Åhgren authored and cujomalainey committed Dec 10, 2022
1 parent 6d80a55 commit e616b42
Show file tree
Hide file tree
Showing 5 changed files with 441 additions and 56 deletions.
48 changes: 48 additions & 0 deletions src/audio/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,54 @@ config COMP_GOOGLE_RTC_AUDIO_PROCESSING
This component takes raw microphones input and playback reference
and outputs an echo-free microphone signal.

config COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ
depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING
int "Sample rate for Google Real Time Communication Audio processing"
default 48000
help
Sets the sample rate for the memory buffer for the Google real-time
communication audio processing.

config COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_CHANNELS
depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING
int "Number of channels to process for Google Real Time Communication Audio processing"
default 1
help
Sets the number of channels to process in the Google real-time
communication audio processing.

config COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_AEC_REFERENCE_CHANNELS
depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING
int "Number of AEC reference channels for Google Real Time Communication Audio processing"
default 2
help
Sets the number AEC reference channels in the Google real-time
communication audio processing.

config COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES
depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING
int "Memory buffer size for Google Real Time Communication Audio processing"
default 200000
help
Sets the size of the memory buffer for the Google real-time
communication audio processing.

config COMP_GOOGLE_RTC_AUDIO_PROCESSING_ECHO_PATH_DELAY_MS
depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING
int "Echo path delay for Google Real Time Communication Audio processing"
default 44
help
Sets the echo path delay to use for the Google real-time communication
audio processing.

config COMP_GOOGLE_RTC_AUDIO_PROCESSING_MIC_HEADROOM_LINEAR
depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING
int "Microphone headroom for Google Real Time Communication Audio processing"
default 4
help
Sets the microphone headroom for the Google real-time communication audio
processing.

config GOOGLE_RTC_AUDIO_PROCESSING_MOCK
bool "Google Real Time Communication Audio processing mock"
default n
Expand Down
222 changes: 193 additions & 29 deletions src/audio/google_rtc_audio_processing.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@

#include <google_rtc_audio_processing.h>
#include <google_rtc_audio_processing_platform.h>
#include <google_rtc_audio_processing_sof_message_reader.h>

#define GOOGLE_RTC_AUDIO_PROCESSING_SAMPLERATE 48000
#define GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES 100

LOG_MODULE_REGISTER(google_rtc_audio_processing, CONFIG_SOF_LOG_LEVEL);

Expand All @@ -52,13 +53,15 @@ struct google_rtc_audio_processing_comp_data {
struct comp_buffer *output;
uint32_t num_frames;
int num_aec_reference_channels;
int num_capture_channels;
GoogleRtcAudioProcessingState *state;
int16_t *aec_reference_buffer;
int16_t aec_reference_frame_index;
int aec_reference_frame_index;
int16_t *raw_mic_buffer;
int16_t raw_mic_buffer_index;
int raw_mic_buffer_frame_index;
int16_t *output_buffer;
int16_t output_buffer_index;
int output_buffer_frame_index;
uint8_t *memory_buffer;
struct comp_data_blob_handler *tuning_handler;
bool reconfigure;
};
Expand Down Expand Up @@ -126,11 +129,110 @@ static int google_rtc_audio_processing_reconfigure(struct comp_dev *dev)
config, size);

cd->reconfigure = false;
ret = GoogleRtcAudioProcessingReconfigure(cd->state, config, size);
if (ret) {
comp_err(dev, "GoogleRtcAudioProcessingReconfigure failed: %d",
ret);
return -EINVAL;

uint8_t *google_rtc_audio_processing_config;
size_t google_rtc_audio_processing_config_size;
int num_capture_input_channels;
int num_capture_output_channels;
float aec_reference_delay;
float mic_gain;
bool google_rtc_audio_processing_config_present;
bool num_capture_input_channels_present;
bool num_capture_output_channels_present;
bool aec_reference_delay_present;
bool mic_gain_present;

GoogleRtcAudioProcessingParseSofConfigMessage(config, size,
&google_rtc_audio_processing_config,
&google_rtc_audio_processing_config_size,
&num_capture_input_channels,
&num_capture_output_channels,
&aec_reference_delay,
&mic_gain,
&google_rtc_audio_processing_config_present,
&num_capture_input_channels_present,
&num_capture_output_channels_present,
&aec_reference_delay_present,
&mic_gain_present);

if (google_rtc_audio_processing_config_present) {
comp_info(dev,
"google_rtc_audio_processing_reconfigure(): Applying config of size %zu bytes",
google_rtc_audio_processing_config_size);

ret = GoogleRtcAudioProcessingReconfigure(cd->state,
google_rtc_audio_processing_config,
google_rtc_audio_processing_config_size);
if (ret) {
comp_err(dev, "GoogleRtcAudioProcessingReconfigure failed: %d",
ret);
return ret;
}
}

if (num_capture_input_channels_present || num_capture_output_channels_present) {
if (num_capture_input_channels_present && num_capture_output_channels_present) {
if (num_capture_input_channels != num_capture_output_channels) {
comp_err(dev, "GoogleRtcAudioProcessingReconfigure failed: unsupported channel counts");
return -EINVAL;
}
cd->num_capture_channels = num_capture_input_channels;
} else if (num_capture_input_channels_present) {
cd->num_capture_channels = num_capture_output_channels;
} else {
cd->num_capture_channels = num_capture_output_channels;
}
comp_info(dev,
"google_rtc_audio_processing_reconfigure(): Applying num capture channels %d",
cd->num_capture_channels);


ret = GoogleRtcAudioProcessingSetStreamFormats(cd->state,
CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ,
cd->num_capture_channels,
cd->num_capture_channels,
CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ,
cd->num_aec_reference_channels);

if (ret) {
comp_err(dev, "GoogleRtcAudioProcessingSetStreamFormats failed: %d",
ret);
return ret;
}
}

if (aec_reference_delay_present || mic_gain_present) {
float *capture_headroom_linear_use = NULL;
float *echo_path_delay_ms_use = NULL;

if (mic_gain_present) {
capture_headroom_linear_use = &mic_gain;

/* Logging of linear headroom, using integer workaround to the broken printout of floats */
comp_info(dev,
"google_rtc_audio_processing_reconfigure(): Applying capture linear headroom: %d.%d",
(int)mic_gain, (int)(100 * mic_gain) - 100 * ((int)mic_gain));
}
if (aec_reference_delay_present) {
echo_path_delay_ms_use = &aec_reference_delay;

/* Logging of delay, using integer workaround to the broken printout of floats */
comp_info(dev,
"google_rtc_audio_processing_reconfigure(): Applying aec reference delay: %d.%d",
(int)aec_reference_delay,
(int)(100 * aec_reference_delay) -
100 * ((int)aec_reference_delay));
}

ret = GoogleRtcAudioProcessingParameters(cd->state,
capture_headroom_linear_use,
echo_path_delay_ms_use);

if (ret) {
comp_err(dev, "GoogleRtcAudioProcessingParameters failed: %d",
ret);
return ret;
}
}

return 0;
Expand Down Expand Up @@ -218,6 +320,7 @@ static struct comp_dev *google_rtc_audio_processing_create(
{
struct comp_dev *dev;
struct google_rtc_audio_processing_comp_data *cd;
int ret;

comp_cl_info(drv, "google_rtc_audio_processing_create()");

Expand All @@ -238,39 +341,71 @@ static struct comp_dev *google_rtc_audio_processing_create(
if (!cd->tuning_handler)
goto fail;

cd->state = GoogleRtcAudioProcessingCreate();
cd->num_aec_reference_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_AEC_REFERENCE_CHANNELS;
cd->num_capture_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_CHANNELS;
cd->num_frames = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ /
GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES;

if (CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES > 0) {
cd->memory_buffer = rballoc(0, SOF_MEM_CAPS_RAM,
CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES *
sizeof(cd->memory_buffer[0]));
if (!cd->memory_buffer) {
comp_err(dev, "google_rtc_audio_processing_create: failed to allocate memory buffer");

goto fail;
}

GoogleRtcAudioProcessingAttachMemoryBuffer(cd->memory_buffer, CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES);
}

cd->state = GoogleRtcAudioProcessingCreateWithConfig(CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ,
cd->num_capture_channels,
cd->num_capture_channels,
CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ,
cd->num_aec_reference_channels,
/*config=*/NULL, /*config_size=*/0);

if (!cd->state) {
comp_err(dev, "Failed to initialized GoogleRtcAudioProcessing");
goto fail;
}
cd->num_aec_reference_channels = 2;
cd->num_frames = GOOGLE_RTC_AUDIO_PROCESSING_SAMPLERATE *
GoogleRtcAudioProcessingGetFramesizeInMs(cd->state) / 1000;

float capture_headroom_linear = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MIC_HEADROOM_LINEAR;
float echo_path_delay_ms = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_ECHO_PATH_DELAY_MS;
ret = GoogleRtcAudioProcessingParameters(cd->state,
&capture_headroom_linear,
&echo_path_delay_ms);

if (!cd->state) {
comp_err(dev, "Failed to apply GoogleRtcAudioProcessingParameters");
goto fail;
}

cd->raw_mic_buffer = rballoc(
0, SOF_MEM_CAPS_RAM,
cd->num_frames * sizeof(cd->raw_mic_buffer[0]));
cd->num_frames * cd->num_capture_channels * sizeof(cd->raw_mic_buffer[0]));
if (!cd->raw_mic_buffer)
goto fail;
bzero(cd->raw_mic_buffer, cd->num_frames * sizeof(cd->raw_mic_buffer[0]));
cd->raw_mic_buffer_index = 0;
bzero(cd->raw_mic_buffer, cd->num_frames * cd->num_capture_channels * sizeof(cd->raw_mic_buffer[0]));
cd->raw_mic_buffer_frame_index = 0;

cd->aec_reference_buffer = rballoc(
0, SOF_MEM_CAPS_RAM,
cd->num_frames * sizeof(cd->aec_reference_buffer[0]) *
cd->num_aec_reference_channels);
if (!cd->aec_reference_buffer)
goto fail;
bzero(cd->aec_reference_buffer, cd->num_frames * sizeof(cd->aec_reference_buffer[0]));
bzero(cd->aec_reference_buffer, cd->num_frames * cd->num_aec_reference_channels * sizeof(cd->aec_reference_buffer[0]));
cd->aec_reference_frame_index = 0;

cd->output_buffer = rballoc(
0, SOF_MEM_CAPS_RAM,
cd->num_frames * sizeof(cd->output_buffer[0]));
cd->num_frames * cd->num_capture_channels * sizeof(cd->output_buffer[0]));
if (!cd->output_buffer)
goto fail;
bzero(cd->output_buffer, cd->num_frames * sizeof(cd->output_buffer[0]));
cd->output_buffer_index = 0;
cd->output_buffer_frame_index = 0;

/* comp_is_new_data_blob_available always returns false for the first
* control write with non-empty config. The first non-empty write may
Expand All @@ -288,6 +423,11 @@ static struct comp_dev *google_rtc_audio_processing_create(
if (cd) {
rfree(cd->output_buffer);
rfree(cd->aec_reference_buffer);
if (cd->state) {
GoogleRtcAudioProcessingFree(cd->state);
}
GoogleRtcAudioProcessingDetachMemoryBuffer();
rfree(cd->memory_buffer);
rfree(cd->raw_mic_buffer);
comp_data_blob_handler_free(cd->tuning_handler);
rfree(cd);
Expand All @@ -306,6 +446,8 @@ static void google_rtc_audio_processing_free(struct comp_dev *dev)
cd->state = NULL;
rfree(cd->output_buffer);
rfree(cd->aec_reference_buffer);
GoogleRtcAudioProcessingDetachMemoryBuffer();
rfree(cd->memory_buffer);
rfree(cd->raw_mic_buffer);
comp_data_blob_handler_free(cd->tuning_handler);
rfree(cd);
Expand Down Expand Up @@ -362,6 +504,19 @@ static int google_rtc_audio_processing_prepare(struct comp_dev *dev)
rate = output_c->stream.rate;
buffer_release(output_c);


if (cd->num_capture_channels > cd->raw_microphone->stream.channels) {
comp_err(dev, "unsupported number of microphone channels: %d",
cd->raw_microphone->stream.channels);
return -EINVAL;
}

if (cd->num_capture_channels > cd->output->stream.channels) {
comp_err(dev, "unsupported number of output channels: %d",
cd->output->stream.channels);
return -EINVAL;
}

switch (frame_fmt) {
#if CONFIG_FORMAT_S16LE
case SOF_IPC_FRAME_S16_LE:
Expand All @@ -372,7 +527,7 @@ static int google_rtc_audio_processing_prepare(struct comp_dev *dev)
return -EINVAL;
}

if (rate != GOOGLE_RTC_AUDIO_PROCESSING_SAMPLERATE) {
if (rate != CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ) {
comp_err(dev, "unsupported samplerate: %d", rate);
return -EINVAL;
}
Expand Down Expand Up @@ -465,18 +620,27 @@ static int google_rtc_audio_processing_copy(struct comp_dev *dev)
nmax = audio_stream_frames_without_wrap(&output_buf->stream, dst);
n = MIN(n, nmax);
for (i = 0; i < n; i++) {
cd->raw_mic_buffer[cd->raw_mic_buffer_index] = *src;
++cd->raw_mic_buffer_index;

*dst = cd->output_buffer[cd->output_buffer_index];
++cd->output_buffer_index;

if (cd->raw_mic_buffer_index == cd->num_frames) {
memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
cd->num_capture_channels]),
cd->num_frames * cd->num_capture_channels *
sizeof(cd->raw_mic_buffer[0]), src,
sizeof(int16_t) * cd->num_capture_channels);
++cd->raw_mic_buffer_frame_index;


memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
sizeof(cd->output_buffer[0]),
&(cd->output_buffer[cd->output_buffer_frame_index *
cd->num_capture_channels]),
sizeof(int16_t) * cd->num_capture_channels);
++cd->output_buffer_frame_index;

if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
cd->raw_mic_buffer,
cd->output_buffer);
cd->output_buffer_index = 0;
cd->raw_mic_buffer_index = 0;
cd->output_buffer_frame_index = 0;
cd->raw_mic_buffer_frame_index = 0;
}

src += mic_buf->stream.channels;
Expand Down
Loading

0 comments on commit e616b42

Please sign in to comment.