Skip to content

Commit

Permalink
Audio: aec: optimize acoustic echo cancellation processing
Browse files Browse the repository at this point in the history
This check-in introduces performance optimization modifications to
the audio Echo Cancellation (AEC) implementation. The enhancements
primarily focus on refining loop structures and memory copy
operations to ensure more efficient use of cycles.

Signed-off-by: shastry <[email protected]>
  • Loading branch information
ShriramShastry committed Feb 26, 2024
1 parent 3681e09 commit 6ec3be3
Show file tree
Hide file tree
Showing 2 changed files with 234 additions and 87 deletions.
235 changes: 167 additions & 68 deletions src/audio/google/google_rtc_audio_processing.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@
#define GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES 100
#define GOOGLE_RTC_NUM_INPUT_PINS 2
#define GOOGLE_RTC_NUM_OUTPUT_PINS 1
#define ERR_INVALID_REF -1
#define ERR_MEMCPY_FAIL -2
#define ERR_INVALID_SRC -3
#define ERR_INVALID_DST -4


LOG_MODULE_REGISTER(google_rtc_audio_processing, CONFIG_SOF_LOG_LEVEL);

Expand Down Expand Up @@ -791,7 +796,6 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
size_t dst_buf_size;

size_t num_of_bytes_to_process;
size_t channel;
size_t buffer_offset;

struct sof_source *ref_stream, *src_stream;
Expand Down Expand Up @@ -822,23 +826,60 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
/* 32float: de-interlace ref buffer, convert it to float, skip channels if > Max
* 16int: linearize buffer, skip channels if > Max
*/
/* Reduce cycle waste by streamlining the inner loop,
* converting from array indexing to pointer arithmetic,
* and putting data copy verification outside the loop.
*/
buffer_offset = 0;
for (int i = 0; i < cd->num_frames; i++) {
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) {
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
cd->aec_reference_buffer_ptrs[channel][i] =
convert_int16_to_float(ref[channel]);
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
cd->aec_reference_buffer[buffer_offset++] = ref[channel];
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
const int16_t *ref_end = ref + cd->num_frames * cd->num_aec_reference_channels;

if ((void *)ref_end >= (void *)ref_buf_end)
ref_end = (void *)ref_buf_start;

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
float **ref_ptr = cd->aec_reference_buffer_ptrs;
int s_chan;
int i;

/* Loop over frames and channels, converting data from int16 to float */
for (i = 0; i < cd->num_frames; ++i) {
for (s_chan = 0; s_chan < cd->num_aec_reference_channels; ++s_chan) {
/* Check that ref is within the valid range of the ref_buf buffer */
if (ref && (void *)ref >= (void *)ref_buf_start &&
(void *)ref < (void *)ref_buf_end)
(*ref_ptr)[s_chan] = convert_int16_to_float(*ref++);
else
/* ref does not point to valid int16_t data */
return ERR_INVALID_REF;
}
ref_ptr++;
}

ref += cd->num_aec_reference_channels;
if ((void *)ref >= (void *)ref_buf_end)
ref = (void *)ref_buf_start;
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
int16_t *ref_buf = cd->aec_reference_buffer;

/* Check that ref is within the valid range of the ref_buf buffer */
if (ref && (void *)ref >= (void *)ref_buf_start && (void *)ref < (void *)ref_buf_end) {
/* Use memcpy_s to copy the data from ref buffer to ref_buf buffer until it reaches
* ref_end
* This assumes that the data in the ref buffer is contiguous
*/
size_t num_bytes = (ref_end - ref) * sizeof(*ref);

if (memcpy_s(ref_buf, num_bytes, ref, num_bytes) != 0) {
/* Handle error */
return -2;
}
/* Update the ref and ref_buf pointers */
ref = ref_end;
ref_buf += (ref_end - ref);
} else {
/* ref does not point to valid int16_t data */
return ERR_MEMCPY_FAIL;
}

#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state,
(const float **)
Expand All @@ -855,26 +896,57 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
(const void **)&src_buf_start, &src_buf_size);
assert(!ret);
src_buf_end = src_buf_start + src_buf_size;

/* The second optimization eliminates the inner loop
* and replaces it with pointer arithmetic for speedier access.
* To reduce cycle waste, the data copy check is moved outside of the loop.
*/
/* Initialize error_code to 0 (no error) */
int error_code = 0;
buffer_offset = 0;
for (int i = 0; i < cd->num_frames; i++) {
for (channel = 0; channel < cd->num_capture_channels; channel++)
const int16_t *src_end = src + cd->num_frames * cd->config.output_fmt.channels_count;

if ((void *)src_end >= (void *)src_buf_end)
src_end = (void *)src_buf_start;

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
cd->process_buffer_ptrs[channel][i] = convert_int16_to_float(src[channel]);
float **proc_ptr = cd->process_buffer_ptrs;

/* Check for null pointers and buffer overflows */
if (!src || !proc_ptr || src >= src_end)
/* If there's an error, set error_code to ERR_INVALID_SRC but don't return yet */
error_code = ERR_INVALID_SRC;
else
/* If there's no error, continue processing */
while (src != src_end) {
if ((void *)src >= (void *)src_buf_end)
src = (void *)src_buf_start;

*proc_ptr++ = convert_int16_to_float(src++);
}

#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
cd->process_buffer[buffer_offset++] = src[channel];
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
int16_t *proc_buf = cd->process_buffer;

/* Check for null pointers and buffer overflows */
if (!src || !proc_buf || src >= src_end)
/* If there's an error, set error_code to ERR_INVALID_SRC but don't return yet */
error_code = ERR_INVALID_SRC;
else
/* If there's no error, continue processing */
while (src != src_end) {
if ((void *)src >= (void *)src_buf_end)
src = (void *)src_buf_start;

*proc_buf++ = *src++;
}

/* move pointer to next frame
* number of incoming channels may be < cd->num_capture_channels
*/
src += cd->config.output_fmt.channels_count;
if ((void *)src >= (void *)src_buf_end)
src = (void *)src_buf_start;
}
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */

source_release_data(src_stream, num_of_bytes_to_process);

/* Return the error code. If there was no error, this will be 0 */
return error_code;

/* call the library, use same in/out buffers */
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
GoogleRtcAudioProcessingProcessCapture_float32(cd->state,
Expand All @@ -894,24 +966,33 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,

/* process all channels in output stream */
buffer_offset = 0;
for (int i = 0; i < cd->num_frames; i++) {
for (channel = 0; channel < cd->config.output_fmt.channels_count; channel++) {
/* set data in processed channels, zeroize not processed */
if (channel < cd->num_capture_channels)
int16_t *dst_end = dst + cd->num_frames * cd->config.output_fmt.channels_count;

if ((void *)dst_end >= (void *)dst_buf_end)
dst_end = (void *)dst_buf_start;

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
dst[channel] = convert_float_to_int16(
cd->process_buffer_ptrs[channel][i]);
float **proc_ptr = cd->process_buffer_ptrs;

/* Check for null pointers and buffer overflows */
if (!dst || !proc_ptr || dst >= dst_end || *proc_ptr >= *proc_ptr + cd->num_frames)
return ERR_INVALID_DST;

while (dst != dst_end)
*dst++ = convert_float_to_int16(*proc_ptr++);

#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
dst[channel] = cd->process_buffer[buffer_offset++];
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
else
dst[channel] = 0;
}
int16_t *process_buffer = cd->process_buffer;

dst += cd->config.output_fmt.channels_count;
if ((void *)dst >= (void *)dst_buf_end)
dst = (void *)dst_buf_start;
}
/* Check for null pointers and buffer overflows */
if (!dst || !process_buffer || dst >= dst_end ||
process_buffer >= process_buffer + cd->num_frames)
return ERR_INVALID_DST;

while (dst != dst_end)
*dst++ = *process_buffer++;

#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */

sink_commit_buffer(dst_stream, num_of_bytes_to_process);

Expand All @@ -928,6 +1009,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
int16_t *src, *dst, *ref;
uint32_t num_aec_reference_frames;
uint32_t num_aec_reference_bytes;
int ref_channels;
int aec_ref_product;
int num_samples_remaining;
int num_frames_remaining;
int channel;
Expand All @@ -950,25 +1033,33 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
ref_stream = ref_streamb->data;
ref = audio_stream_get_rptr(ref_stream);

/* Pre-calculate the number of channels in the reference stream for efficiency */
ref_channels = audio_stream_get_channels(ref_stream);

/* Pre-calculate the product of the number of AEC reference channels and the AEC
* reference frame index
*/
aec_ref_product = cd->num_aec_reference_channels * cd->aec_reference_frame_index;

num_aec_reference_frames = input_buffers[cd->aec_reference_source].size;
num_aec_reference_bytes = audio_stream_frame_bytes(ref_stream) * num_aec_reference_frames;

num_samples_remaining = num_aec_reference_frames * audio_stream_get_channels(ref_stream);
num_samples_remaining = num_aec_reference_frames * ref_channels;
while (num_samples_remaining) {
nmax = audio_stream_samples_without_wrap_s16(ref_stream, ref);
n = MIN(num_samples_remaining, nmax);
for (i = 0; i < n; i += cd->num_aec_reference_channels) {
j = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
j = aec_ref_product;
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel)
cd->aec_reference_buffer[j++] = ref[channel];

ref += audio_stream_get_channels(ref_stream);
ref += ref_channels;
++cd->aec_reference_frame_index;

if (cd->aec_reference_frame_index == cd->num_frames) {
GoogleRtcAudioProcessingAnalyzeRender_int16(cd->state,
cd->aec_reference_buffer);
cd->aec_reference_buffer);
cd->aec_reference_frame_index = 0;
/* Reset the product as the frame index is reset */
aec_ref_product = 0;
}
}
num_samples_remaining -= n;
Expand All @@ -984,6 +1075,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
src = audio_stream_get_rptr(mic_stream);
dst = audio_stream_get_wptr(out_stream);

/* Move out of loop */
int mic_stream_channels = audio_stream_get_channels(mic_stream);
frames = input_buffers[cd->raw_microphone_source].size;
num_frames_remaining = frames;

Expand All @@ -993,34 +1086,40 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
nmax = audio_stream_frames_without_wrap(out_stream, dst);
n = MIN(n, nmax);
for (i = 0; i < n; i++) {
memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
cd->num_capture_channels]),
cd->num_frames * cd->num_capture_channels *
sizeof(cd->raw_mic_buffer[0]), src,
sizeof(int16_t) * cd->num_capture_channels);
++cd->raw_mic_buffer_frame_index;

memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
sizeof(cd->output_buffer[0]),
&(cd->output_buffer[cd->output_buffer_frame_index *
cd->num_capture_channels]),
sizeof(int16_t) * cd->num_capture_channels);
++cd->output_buffer_frame_index;

if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
cd->raw_mic_buffer,
cd->output_buffer);
cd->output_buffer_frame_index = 0;
cd->raw_mic_buffer_frame_index = 0;
/* If we haven't filled the buffer yet, copy the data */
if (cd->raw_mic_buffer_frame_index < cd->num_frames) {
memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
cd->num_capture_channels]),
cd->num_frames * cd->num_capture_channels *
sizeof(cd->raw_mic_buffer[0]), src,
sizeof(int16_t) * cd->num_capture_channels);
++cd->raw_mic_buffer_frame_index;
}

if (cd->output_buffer_frame_index < cd->num_frames) {
memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
sizeof(cd->output_buffer[0]),
&(cd->output_buffer[cd->output_buffer_frame_index *
cd->num_capture_channels]),
sizeof(int16_t) * cd->num_capture_channels);
++cd->output_buffer_frame_index;
}

src += audio_stream_get_channels(mic_stream);
dst += audio_stream_get_channels(out_stream);
src += mic_stream_channels;
dst += mic_stream_channels;
}
num_frames_remaining -= n;
src = audio_stream_wrap(mic_stream, src);
dst = audio_stream_wrap(out_stream, dst);

/* If we've filled the buffer, process the data */
if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
cd->raw_mic_buffer,
cd->output_buffer);
cd->output_buffer_frame_index = 0;
cd->raw_mic_buffer_frame_index = 0;
}
}

module_update_buffer_position(&input_buffers[cd->raw_microphone_source],
Expand Down
Loading

0 comments on commit 6ec3be3

Please sign in to comment.