Skip to content

Commit

Permalink
Audio: aec: optimize acoustic echo cancellation processing
Browse files Browse the repository at this point in the history
This check-in introduces performance optimization modifications to
the audio Echo Cancellation (AEC) implementation. The enhancements
primarily focus on refining loop structures and memory copy
operations to ensure more efficient use of cycles.

Signed-off-by: shastry <[email protected]>
  • Loading branch information
ShriramShastry committed Feb 26, 2024
1 parent 3681e09 commit 943810a
Show file tree
Hide file tree
Showing 2 changed files with 211 additions and 83 deletions.
208 changes: 144 additions & 64 deletions src/audio/google/google_rtc_audio_processing.c
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,7 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
struct sof_sink **sinks, int num_of_sinks)
{
int ret;
int i;
int16_t const *src;
int8_t const *src_buf_start;
int8_t const *src_buf_end;
Expand All @@ -791,8 +792,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
size_t dst_buf_size;

size_t num_of_bytes_to_process;
size_t channel;
size_t buffer_offset;
int channel;

struct sof_source *ref_stream, *src_stream;
struct sof_sink *dst_stream;
Expand Down Expand Up @@ -822,23 +823,58 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
/* 32float: de-interlace ref buffer, convert it to float, skip channels if > Max
* 16int: linearize buffer, skip channels if > Max
*/
/* Reduce cycle waste by streamlining the inner loop,
* converting from array indexing to pointer arithmetic,
* and putting data copy verification outside the loop.
*/
buffer_offset = 0;
for (int i = 0; i < cd->num_frames; i++) {
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) {
const int16_t *ref_end = ref + cd->num_frames * cd->num_aec_reference_channels;

if ((void *)ref_end >= (void *)ref_buf_end)
ref_end = (void *)ref_buf_start;

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
cd->aec_reference_buffer_ptrs[channel][i] =
convert_int16_to_float(ref[channel]);
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
cd->aec_reference_buffer[buffer_offset++] = ref[channel];
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
float **ref_ptr = cd->aec_reference_buffer_ptrs;

/* Loop over frames and channels, converting data from int16 to float */
for (i = 0; i < cd->num_frames; ++i) {
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) {
/* Check that ref is within the valid range of the ref_buf buffer */
if (ref && (void *)ref >= (void *)ref_buf_start &&
(void *)ref < (void *)ref_buf_end)
(*ref_ptr)[channel] = convert_int16_to_float(*ref++);
else
/* ref does not point to valid int16_t data */
return -1;
}
ref_ptr++;
}

ref += cd->num_aec_reference_channels;
if ((void *)ref >= (void *)ref_buf_end)
ref = (void *)ref_buf_start;
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
int16_t *ref_buf = cd->aec_reference_buffer;

/* Check that ref is within the valid range of the ref_buf buffer */
if (ref && (void *)ref >= (void *)ref_buf_start && (void *)ref < (void *)ref_buf_end) {
/* Use memcpy_s to copy the data from ref buffer to ref_buf buffer until it reaches
* ref_end
* This assumes that the data in the ref buffer is contiguous
*/
size_t num_bytes = (ref_end - ref) * sizeof(*ref);

if (memcpy_s(ref_buf, num_bytes, ref, num_bytes) != 0) {
/* Handle error */
return -2;
}
/* Update the ref and ref_buf pointers */
ref = ref_end;
ref_buf += (ref_end - ref);
} else {
/* ref does not point to valid int16_t data */
return -2;
}

#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state,
(const float **)
Expand All @@ -855,24 +891,50 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
(const void **)&src_buf_start, &src_buf_size);
assert(!ret);
src_buf_end = src_buf_start + src_buf_size;

/* The second optimization eliminates the inner loop
* and replaces it with pointer arithmetic for speedier access.
* To reduce cycle waste, the data copy check is moved outside of the loop.
*/
buffer_offset = 0;
for (int i = 0; i < cd->num_frames; i++) {
for (channel = 0; channel < cd->num_capture_channels; channel++)
const int16_t *src_end = src + cd->num_frames * cd->config.output_fmt.channels_count;

if ((void *)src_end >= (void *)src_buf_end)
src_end = (void *)src_buf_start;

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
cd->process_buffer_ptrs[channel][i] = convert_int16_to_float(src[channel]);
float **proc_ptr = cd->process_buffer_ptrs;

/* Process the data until the source pointer reaches the end
* This assumes that the source buffer is continuous in memory
* If the source buffer is not continuous (i.e., if it wraps around
* like in a circular buffer), this code will not work correctly
*/
while (src != src_end) {
/* Check if src has exceeded the buffer end */
if ((void *)src >= (void *)src_buf_end)
src = (void *)src_buf_start;

*proc_ptr++ = convert_int16_to_float(src++);
}

#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
cd->process_buffer[buffer_offset++] = src[channel];
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
int16_t *proc_buf = cd->process_buffer;

/* move pointer to next frame
* number of incoming channels may be < cd->num_capture_channels
*/
src += cd->config.output_fmt.channels_count;
/* Process the data until the source pointer reaches the end
* This assumes that the source buffer is continuous in memory
* If the source buffer is not continuous (i.e., if it wraps
* around like in a circular buffer), this code will not work correctly
*/
while (src != src_end) {
/* Check if src has exceeded the buffer end */
if ((void *)src >= (void *)src_buf_end)
src = (void *)src_buf_start;

*proc_buf++ = *src++;
}

#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */

source_release_data(src_stream, num_of_bytes_to_process);

/* call the library, use same in/out buffers */
Expand All @@ -894,24 +956,24 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,

/* process all channels in output stream */
buffer_offset = 0;
for (int i = 0; i < cd->num_frames; i++) {
for (channel = 0; channel < cd->config.output_fmt.channels_count; channel++) {
/* set data in processed channels, zeroize not processed */
if (channel < cd->num_capture_channels)
int16_t *dst_end = dst + cd->num_frames * cd->config.output_fmt.channels_count;

if ((void *)dst_end >= (void *)dst_buf_end)
dst_end = (void *)dst_buf_start;

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
dst[channel] = convert_float_to_int16(
cd->process_buffer_ptrs[channel][i]);
float **proc_ptr = cd->process_buffer_ptrs;

while (dst != dst_end && *proc_ptr)
*dst++ = convert_float_to_int16(*proc_ptr++);

#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
dst[channel] = cd->process_buffer[buffer_offset++];
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
else
dst[channel] = 0;
}
int16_t *process_buffer = cd->process_buffer;

dst += cd->config.output_fmt.channels_count;
if ((void *)dst >= (void *)dst_buf_end)
dst = (void *)dst_buf_start;
}
while (dst != dst_end && *process_buffer)
*dst++ = *process_buffer++;

#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */

sink_commit_buffer(dst_stream, num_of_bytes_to_process);

Expand All @@ -928,6 +990,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
int16_t *src, *dst, *ref;
uint32_t num_aec_reference_frames;
uint32_t num_aec_reference_bytes;
int ref_channels;
int aec_ref_product;
int num_samples_remaining;
int num_frames_remaining;
int channel;
Expand All @@ -950,25 +1014,33 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
ref_stream = ref_streamb->data;
ref = audio_stream_get_rptr(ref_stream);

/* Pre-calculate the number of channels in the reference stream for efficiency */
ref_channels = audio_stream_get_channels(ref_stream);

/* Pre-calculate the product of the number of AEC reference channels and the AEC
* reference frame index
*/
aec_ref_product = cd->num_aec_reference_channels * cd->aec_reference_frame_index;

num_aec_reference_frames = input_buffers[cd->aec_reference_source].size;
num_aec_reference_bytes = audio_stream_frame_bytes(ref_stream) * num_aec_reference_frames;

num_samples_remaining = num_aec_reference_frames * audio_stream_get_channels(ref_stream);
num_samples_remaining = num_aec_reference_frames * ref_channels;
while (num_samples_remaining) {
nmax = audio_stream_samples_without_wrap_s16(ref_stream, ref);
n = MIN(num_samples_remaining, nmax);
for (i = 0; i < n; i += cd->num_aec_reference_channels) {
j = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
j = aec_ref_product;
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel)
cd->aec_reference_buffer[j++] = ref[channel];

ref += audio_stream_get_channels(ref_stream);
ref += ref_channels;
++cd->aec_reference_frame_index;

if (cd->aec_reference_frame_index == cd->num_frames) {
GoogleRtcAudioProcessingAnalyzeRender_int16(cd->state,
cd->aec_reference_buffer);
cd->aec_reference_buffer);
cd->aec_reference_frame_index = 0;
/* Reset the product as the frame index is reset */
aec_ref_product = 0;
}
}
num_samples_remaining -= n;
Expand All @@ -984,6 +1056,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
src = audio_stream_get_rptr(mic_stream);
dst = audio_stream_get_wptr(out_stream);

/* Move out of loop */
int mic_stream_channels = audio_stream_get_channels(mic_stream);
frames = input_buffers[cd->raw_microphone_source].size;
num_frames_remaining = frames;

Expand All @@ -993,34 +1067,40 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
nmax = audio_stream_frames_without_wrap(out_stream, dst);
n = MIN(n, nmax);
for (i = 0; i < n; i++) {
memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
cd->num_capture_channels]),
cd->num_frames * cd->num_capture_channels *
sizeof(cd->raw_mic_buffer[0]), src,
sizeof(int16_t) * cd->num_capture_channels);
++cd->raw_mic_buffer_frame_index;

memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
sizeof(cd->output_buffer[0]),
&(cd->output_buffer[cd->output_buffer_frame_index *
cd->num_capture_channels]),
sizeof(int16_t) * cd->num_capture_channels);
++cd->output_buffer_frame_index;

if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
cd->raw_mic_buffer,
cd->output_buffer);
cd->output_buffer_frame_index = 0;
cd->raw_mic_buffer_frame_index = 0;
/* If we haven't filled the buffer yet, copy the data */
if (cd->raw_mic_buffer_frame_index < cd->num_frames) {
memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
cd->num_capture_channels]),
cd->num_frames * cd->num_capture_channels *
sizeof(cd->raw_mic_buffer[0]), src,
sizeof(int16_t) * cd->num_capture_channels);
++cd->raw_mic_buffer_frame_index;
}

if (cd->output_buffer_frame_index < cd->num_frames) {
memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
sizeof(cd->output_buffer[0]),
&(cd->output_buffer[cd->output_buffer_frame_index *
cd->num_capture_channels]),
sizeof(int16_t) * cd->num_capture_channels);
++cd->output_buffer_frame_index;
}

src += audio_stream_get_channels(mic_stream);
dst += audio_stream_get_channels(out_stream);
src += mic_stream_channels;
dst += mic_stream_channels;
}
num_frames_remaining -= n;
src = audio_stream_wrap(mic_stream, src);
dst = audio_stream_wrap(out_stream, dst);

/* If we've filled the buffer, process the data */
if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
cd->raw_mic_buffer,
cd->output_buffer);
cd->output_buffer_frame_index = 0;
cd->raw_mic_buffer_frame_index = 0;
}
}

module_update_buffer_position(&input_buffers[cd->raw_microphone_source],
Expand Down
Loading

0 comments on commit 943810a

Please sign in to comment.