From bd12891382fd84cd9bc39ed175fef2dc7f08e01f Mon Sep 17 00:00:00 2001
From: Andy Ross <andyross@google.com>
Date: Tue, 12 Dec 2023 10:31:47 -0800
Subject: [PATCH] google_rtc_audio_processing: Major rework

Lots of work on AEC, with an eye to getting it working on main, where
it had bitrotten, and pulling in various features (IPC3 pipeline state
management and DP scheduling) that had merged in other branches.

Dynamically configure stream formats (sample format/rate and channel
count) from the connected streams at prepare() time instead of relying
on build-time tuning.

Port the code to use the source/sink API, in as sophisticated a manner
as I can find.  Copies unroll cleanly into just a few instructions per
sample, including integer/float conversions and de-/interleaving.

Support both 16 and 32 bit sample formats, with a fairly clever
inlining scheme to share as much code as possible between them.  The
component will select "copy" function pointers at prepare() time.
This works by using the _float32 variant of the AEC API (which is
actually the core internal implementation) instead of the _int16 one
which involves a conversion.

The large buffers required by this component (input and output staging
and an internally-managed pool/heap block) are now static symbols
instead of dynamic memory from the heap.  These are very large, taking
up about half of what is available to the linker on MTL.  Relying on
heap allocation is just dangerous in this context.

This fully decouples AEC from the playback stream.  It will run
without an active reference happily, feeding zeros to the processing,
and pick up in stride when the pipeline starts.  This requires adding
a trigger handler for pipeline control in IPC3, which will propagate
certain triggers across pipeline boundaries when shutting down
playback streams, breaking active capture.  Note that this feature is
unexercisable on IPC4, where the kernel automatically starts up
connected pipelines.

Fixes a few bugs and misfeatures also:

+ Chunk the copies by full buffer strides between AEC processing calls
  instead of testing at each copied frame.

+ Copy the reference and mic streams in tandem, preventing them from
  becoming out of sync if the devices weren't themselves synchronized.

+ Copy the AEC results to the output stream after the call to
  ProcessCapture() instead of before.  This was a hidden latency bug
  in the original code, I think.

+ Cleans up the Kconfig to remove stale variables and guard all
  the component-specific tunables under the top-level component
  variable.  Also uses a default instead of a select to couple to
  CONFIG_STUBS, allowing AEC to be manually tuned.

Signed-off-by: Andy Ross <andyross@google.com>
---
 src/audio/google/Kconfig                      |  27 +-
 .../google/google_rtc_audio_processing.c      | 606 ++++++++++--------
 .../google/google_rtc_audio_processing_mock.c |  60 +-
 src/include/ipc4/aec.h                        |   2 -
 4 files changed, 386 insertions(+), 309 deletions(-)

diff --git a/src/audio/google/Kconfig b/src/audio/google/Kconfig
index 027e15a2c229..ac323716a7f8 100644
--- a/src/audio/google/Kconfig
+++ b/src/audio/google/Kconfig
@@ -15,7 +15,6 @@ config COMP_GOOGLE_HOTWORD_DETECT
 config COMP_GOOGLE_RTC_AUDIO_PROCESSING
 	bool "Google Real Time Communication Audio processing"
 	select COMP_BLOB
-	select GOOGLE_RTC_AUDIO_PROCESSING_MOCK if COMP_STUBS
 	default n
 	help
 	  Select for Google real-time communication audio processing. It
@@ -24,6 +23,8 @@ config COMP_GOOGLE_RTC_AUDIO_PROCESSING
 	  This component takes raw microphones input and playback reference
 	  and outputs an echo-free microphone signal.
 
+if COMP_GOOGLE_RTC_AUDIO_PROCESSING
+
 config COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ
 	depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING
 	int "Sample rate for Google Real Time Communication Audio processing"
@@ -32,21 +33,15 @@ config COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ
 	  Sets the sample rate for the memory buffer for the Google real-time
 	  communication audio processing.
 
-config COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_CHANNELS
-	depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING
-	int "Number of channels to process for Google Real Time Communication Audio processing"
-	default 1
-	help
-	  Sets the number of channels to process in the Google real-time
-	  communication audio processing.
-
-config COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_AEC_REFERENCE_CHANNELS
-	depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING
-	int "Number of AEC reference channels for Google Real Time Communication Audio processing"
+config COMP_GOOGLE_RTC_AUDIO_PROCESSING_CHANNEL_MAX
+	int "Max number of AEC channels"
 	default 2
 	help
-	  Sets the number AEC reference channels in the Google real-time
-	  communication audio processing.
+	  Sets the maximum number source/sink channels Google Real
+	  Time Communication Audio Processing will use for.  This is a
+	  computation and memory budget tunable.  Channel counts are
+	  retrieved at runtime, but channels higher than this number
+	  are ignored (on input) or cleared (output).
 
 config COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES
 	depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING
@@ -74,10 +69,12 @@ config COMP_GOOGLE_RTC_AUDIO_PROCESSING_MIC_HEADROOM_LINEAR
 
 config GOOGLE_RTC_AUDIO_PROCESSING_MOCK
 	bool "Google Real Time Communication Audio processing mock"
-	default n
+	default y if COMP_STUBS
 	depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING
 	help
 	  Mock Google real-time communication audio processing.
 	  It allows for compilation check and basic audio flow checking.
 
+endif # COMP_GOOGLE_RTC_AUDIO_PROCESSING
+
 endmenu
diff --git a/src/audio/google/google_rtc_audio_processing.c b/src/audio/google/google_rtc_audio_processing.c
index 6c59c60aa632..ca9d3d434364 100644
--- a/src/audio/google/google_rtc_audio_processing.c
+++ b/src/audio/google/google_rtc_audio_processing.c
@@ -52,25 +52,51 @@ DECLARE_SOF_RT_UUID("google-rtc-audio-processing", google_rtc_audio_processing_u
 DECLARE_TR_CTX(google_rtc_audio_processing_tr, SOF_UUID(google_rtc_audio_processing_uuid),
 			   LOG_LEVEL_INFO);
 
-struct google_rtc_audio_processing_comp_data {
-#if CONFIG_IPC_MAJOR_4
-	struct sof_ipc4_aec_config config;
+#if !(defined(__ZEPHYR__) && defined(CONFIG_XTENSA))
+/* Zephyr provides uncached memory for static variables on SMP, but we
+ * are single-core component and know we can safely use the cache for
+ * AEC work.  XTOS SOF is cached by default, so stub the Zephyr API.
+ */
+#define arch_xtensa_cached_ptr(p) (p)
 #endif
+
+#ifndef __ZEPHYR__
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#endif
+
+static __aligned(PLATFORM_DCACHE_ALIGN)
+uint8_t aec_mem_blob[CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES];
+
+#define NUM_FRAMES (CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ \
+		    / GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES)
+#define CHAN_MAX CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_CHANNEL_MAX
+
+static __aligned(PLATFORM_DCACHE_ALIGN)
+float refoutbuf[CHAN_MAX][NUM_FRAMES];
+
+static __aligned(PLATFORM_DCACHE_ALIGN)
+float micbuf[CHAN_MAX][NUM_FRAMES];
+
+struct google_rtc_audio_processing_comp_data {
 	uint32_t num_frames;
 	int num_aec_reference_channels;
 	int num_capture_channels;
 	GoogleRtcAudioProcessingState *state;
-	int16_t *aec_reference_buffer;
-	int aec_reference_frame_index;
-	int16_t *raw_mic_buffer;
-	int raw_mic_buffer_frame_index;
-	int16_t *output_buffer;
-	int output_buffer_frame_index;
-	uint8_t *memory_buffer;
+	float *raw_mic_buffers[CHAN_MAX];
+	float *refout_buffers[CHAN_MAX];
+	int buffered_frames;
 	struct comp_data_blob_handler *tuning_handler;
+	bool active;
 	bool reconfigure;
+	bool last_ref_ok;
 	int aec_reference_source;
 	int raw_microphone_source;
+	struct comp_buffer *ref_comp_buffer;
+	int ref_framesz;
+	int cap_framesz;
+	void (*mic_copy)(struct sof_source *src, int frames, float **dst_bufs, int frame0);
+	void (*ref_copy)(struct sof_source *src, int frames, float **dst_bufs, int frame0);
+	void (*out_copy)(struct sof_sink *dst, int frames, float **src_bufs);
 };
 
 void *GoogleRtcMalloc(size_t size)
@@ -83,30 +109,129 @@ void GoogleRtcFree(void *ptr)
 	return rfree(ptr);
 }
 
-#if CONFIG_IPC_MAJOR_4
-static void google_rtc_audio_processing_params(struct processing_module *mod)
+static ALWAYS_INLINE float clamp_rescale(float max_val, float x)
 {
-	struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod);
-	struct sof_ipc_stream_params *params = mod->stream_params;
-	struct comp_buffer *sinkb, *sourceb;
-	struct list_item *source_list;
-	struct comp_dev *dev = mod->dev;
+	float min = -1.0f;
+	float max = 1.0f - 1.0f / max_val;
+
+	return max_val * (x < min ? min : (x > max ? max : x));
+}
 
-	ipc4_base_module_cfg_to_stream_params(&mod->priv.cfg.base_cfg, params);
-	component_set_nearest_period_frames(dev, params->rate);
+static ALWAYS_INLINE float s16_to_float(const char *ptr)
+{
+	float scale = -(float)SHRT_MIN;
+	float x = *(int16_t *)ptr;
+
+	return (1.0f / scale) * x;
+}
 
-	list_for_item(source_list, &dev->bsource_list) {
-		sourceb = container_of(source_list, struct comp_buffer, sink_list);
-		if (IPC4_SINK_QUEUE_ID(buf_get_id(sourceb)) == SOF_AEC_FEEDBACK_QUEUE_ID)
-			ipc4_update_buffer_format(sourceb, &cd->config.reference_fmt);
-		else
-			ipc4_update_buffer_format(sourceb, &mod->priv.cfg.base_cfg.audio_fmt);
+static ALWAYS_INLINE void float_to_s16(float x, char *dst)
+{
+	*(int16_t *)dst = (int16_t)clamp_rescale(-(float)SHRT_MIN, x);
+}
+
+static ALWAYS_INLINE float s32_to_float(const char *ptr)
+{
+	float scale = -(float)INT_MIN;
+	float x = *(int32_t *)ptr;
+
+	return (1.0f / scale) * x;
+}
+
+static ALWAYS_INLINE void float_to_s32(float x, char *dst)
+{
+	*(int32_t *)dst = (int16_t)clamp_rescale(-(float)INT_MIN, x);
+}
+
+static ALWAYS_INLINE void source_to_float(struct sof_source *src, float **dst_bufs,
+					  float (*cvt_fn)(const char *),
+					  int sample_sz, int frame0, int frames)
+{
+	size_t chan = source_get_channels(src);
+	size_t bytes = frames * chan * sample_sz;
+	int i, c, err, ndst = MIN(chan, CHAN_MAX);
+	const char *buf, *bufstart, *bufend;
+	float *dst[CHAN_MAX];
+	size_t bufsz;
+
+	for (i = 0; i < ndst; i++)
+		dst[i] = &dst_bufs[i][frame0];
+
+	err = source_get_data(src, bytes, (void *)&buf, (void *)&bufstart, &bufsz);
+	assert(err == 0);
+	bufend = &bufstart[bufsz];
+
+	while (frames) {
+		size_t n = MIN(frames, (bufsz - (buf - bufstart)) / (chan * sample_sz));
+
+		for (i = 0; i < n; i++) {
+			for  (c = 0; c < ndst; c++) {
+				*dst[c]++ = cvt_fn(buf);
+				buf += sample_sz;
+			}
+			buf += sample_sz * (chan - ndst); /* skip unused channels */
+		}
+		frames -= n;
+		if (buf >= bufend)
+			buf = bufstart;
 	}
+	source_release_data(src, bytes);
+}
+
+static ALWAYS_INLINE void float_to_sink(struct sof_sink *dst, float **src_bufs,
+					void (*cvt_fn)(float, char *),
+					int sample_sz, int frames)
+{
+	size_t chan = sink_get_channels(dst);
+	size_t bytes = frames * chan * sample_sz;
+	int i, c, err, nsrc = MIN(chan, CHAN_MAX);
+	char *buf, *bufstart, *bufend;
+	float *src[CHAN_MAX];
+	size_t bufsz;
 
-	sinkb = list_first_item(&dev->bsink_list, struct comp_buffer, source_list);
-	ipc4_update_buffer_format(sinkb, &mod->priv.cfg.base_cfg.audio_fmt);
+	for (i = 0; i < nsrc; i++)
+		src[i] = &src_bufs[i][0];
+
+	err = sink_get_buffer(dst, bytes, (void *)&buf, (void *)&bufstart, &bufsz);
+	assert(err == 0);
+	bufend = &bufstart[bufsz];
+
+	while (frames) {
+		size_t n = MIN(frames, (bufsz - (buf - bufstart)) / (chan * sample_sz));
+
+		for (i = 0; i < n; i++) {
+			for  (c = 0; c < nsrc; c++) {
+				cvt_fn(*src[c]++, buf);
+				buf += sample_sz;
+			}
+			buf += sample_sz * (chan - nsrc); /* skip unused channels */
+		}
+		frames -= n;
+		if (buf >= bufend)
+			buf = bufstart;
+	}
+	sink_commit_buffer(dst, bytes);
+}
+
+static void source_copy16(struct sof_source *src, int frames, float **dst_bufs, int frame0)
+{
+	source_to_float(src, dst_bufs, s16_to_float, sizeof(int16_t), frame0, frames);
+}
+
+static void source_copy32(struct sof_source *src, int frames, float **dst_bufs, int frame0)
+{
+	source_to_float(src, dst_bufs, s32_to_float, sizeof(int32_t), frame0, frames);
+}
+
+static void sink_copy16(struct sof_sink *dst, int frames, float **src_bufs)
+{
+	float_to_sink(dst, src_bufs, float_to_s16, sizeof(int16_t), frames);
+}
+
+static void sink_copy32(struct sof_sink *dst, int frames, float **src_bufs)
+{
+	float_to_sink(dst, src_bufs, float_to_s32, sizeof(int32_t), frames);
 }
-#endif
 
 static int google_rtc_audio_processing_reconfigure(struct processing_module *mod)
 {
@@ -376,7 +501,7 @@ static int google_rtc_audio_processing_init(struct processing_module *mod)
 	struct module_data *md = &mod->priv;
 	struct comp_dev *dev = mod->dev;
 	struct google_rtc_audio_processing_comp_data *cd;
-	int ret;
+	int ret, i;
 
 	comp_info(dev, "google_rtc_audio_processing_init()");
 
@@ -389,46 +514,19 @@ static int google_rtc_audio_processing_init(struct processing_module *mod)
 
 	md->private = cd;
 
-#if CONFIG_IPC_MAJOR_4
-	const struct ipc4_base_module_extended_cfg *base_cfg = md->cfg.init_data;
-	struct ipc4_input_pin_format reference_fmt, output_fmt;
-	const size_t size = sizeof(struct ipc4_input_pin_format);
-
-	cd->config.base_cfg = base_cfg->base_cfg;
-
-	/* Copy the reference format from input pin 1 format */
-	memcpy_s(&reference_fmt, size,
-		 &base_cfg->base_cfg_ext.pin_formats[size], size);
-	memcpy_s(&output_fmt, size,
-		 &base_cfg->base_cfg_ext.pin_formats[size * GOOGLE_RTC_NUM_INPUT_PINS], size);
-
-	cd->config.reference_fmt = reference_fmt.audio_fmt;
-	cd->config.output_fmt = output_fmt.audio_fmt;
-#endif
-
 	cd->tuning_handler = comp_data_blob_handler_new(dev);
 	if (!cd->tuning_handler) {
 		ret = -ENOMEM;
 		goto fail;
 	}
 
-	cd->num_aec_reference_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_AEC_REFERENCE_CHANNELS;
-	cd->num_capture_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_CHANNELS;
-	cd->num_frames = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ /
-		GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES;
-
-	if (CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES > 0) {
-		cd->memory_buffer = rballoc(0, SOF_MEM_CAPS_RAM,
-					    CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES *
-					    sizeof(cd->memory_buffer[0]));
-		if (!cd->memory_buffer) {
-			comp_err(dev, "google_rtc_audio_processing_init: failed to allocate memory buffer");
-			ret = -ENOMEM;
-			goto fail;
-		}
+	cd->num_aec_reference_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_CHANNEL_MAX;
+	cd->num_capture_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_CHANNEL_MAX;
+	cd->num_frames = NUM_FRAMES;
 
-		GoogleRtcAudioProcessingAttachMemoryBuffer(cd->memory_buffer, CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES);
-	}
+	/* Giant blob of scratch memory. */
+	GoogleRtcAudioProcessingAttachMemoryBuffer(arch_xtensa_cached_ptr(&aec_mem_blob[0]),
+						   sizeof(aec_mem_blob));
 
 	cd->state = GoogleRtcAudioProcessingCreateWithConfig(CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ,
 							     cd->num_capture_channels,
@@ -454,36 +552,12 @@ static int google_rtc_audio_processing_init(struct processing_module *mod)
 		goto fail;
 	}
 
-	cd->raw_mic_buffer = rballoc(
-		0, SOF_MEM_CAPS_RAM,
-		cd->num_frames * cd->num_capture_channels * sizeof(cd->raw_mic_buffer[0]));
-	if (!cd->raw_mic_buffer) {
-		ret = -EINVAL;
-		goto fail;
-	}
-	bzero(cd->raw_mic_buffer, cd->num_frames * cd->num_capture_channels * sizeof(cd->raw_mic_buffer[0]));
-	cd->raw_mic_buffer_frame_index = 0;
-
-	cd->aec_reference_buffer = rballoc(
-		0, SOF_MEM_CAPS_RAM,
-		cd->num_frames * sizeof(cd->aec_reference_buffer[0]) *
-		cd->num_aec_reference_channels);
-	if (!cd->aec_reference_buffer) {
-		ret = -ENOMEM;
-		goto fail;
+	for (i = 0; i < CHAN_MAX; i++) {
+		cd->raw_mic_buffers[i] = arch_xtensa_cached_ptr(&micbuf[i][0]);
+		cd->refout_buffers[i] = arch_xtensa_cached_ptr(&refoutbuf[i][0]);
 	}
-	bzero(cd->aec_reference_buffer, cd->num_frames * cd->num_aec_reference_channels * sizeof(cd->aec_reference_buffer[0]));
-	cd->aec_reference_frame_index = 0;
 
-	cd->output_buffer = rballoc(
-		0, SOF_MEM_CAPS_RAM,
-		cd->num_frames * cd->num_capture_channels * sizeof(cd->output_buffer[0]));
-	if (!cd->output_buffer) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-	bzero(cd->output_buffer, cd->num_frames * sizeof(cd->output_buffer[0]));
-	cd->output_buffer_frame_index = 0;
+	cd->buffered_frames = 0;
 
 	/* comp_is_new_data_blob_available always returns false for the first
 	 * control write with non-empty config. The first non-empty write may
@@ -501,14 +575,10 @@ static int google_rtc_audio_processing_init(struct processing_module *mod)
 fail:
 	comp_err(dev, "google_rtc_audio_processing_init(): Failed");
 	if (cd) {
-		rfree(cd->output_buffer);
-		rfree(cd->aec_reference_buffer);
 		if (cd->state) {
 			GoogleRtcAudioProcessingFree(cd->state);
 		}
 		GoogleRtcAudioProcessingDetachMemoryBuffer();
-		rfree(cd->memory_buffer);
-		rfree(cd->raw_mic_buffer);
 		comp_data_blob_handler_free(cd->tuning_handler);
 		rfree(cd);
 	}
@@ -524,16 +594,17 @@ static int google_rtc_audio_processing_free(struct processing_module *mod)
 
 	GoogleRtcAudioProcessingFree(cd->state);
 	cd->state = NULL;
-	rfree(cd->output_buffer);
-	rfree(cd->aec_reference_buffer);
 	GoogleRtcAudioProcessingDetachMemoryBuffer();
-	rfree(cd->memory_buffer);
-	rfree(cd->raw_mic_buffer);
 	comp_data_blob_handler_free(cd->tuning_handler);
 	rfree(cd);
 	return 0;
 }
 
+static bool is_ref_buffer(struct comp_dev *dev, struct comp_buffer *b)
+{
+	return b->source->pipeline->pipeline_id != dev->pipeline->pipeline_id;
+}
+
 static int google_rtc_audio_processing_prepare(struct processing_module *mod,
 					       struct sof_source **sources,
 					       int num_of_sources,
@@ -543,220 +614,239 @@ static int google_rtc_audio_processing_prepare(struct processing_module *mod,
 	struct comp_dev *dev = mod->dev;
 	struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod);
 	struct list_item *source_buffer_list_item;
-	struct comp_buffer *output;
-	unsigned int aec_channels = 0, frame_fmt, rate;
-	int microphone_stream_channels = 0;
-	int output_stream_channels;
-	int ret;
-	int i = 0;
+	int ret = 0, i = 0;
 
 	comp_info(dev, "google_rtc_audio_processing_prepare()");
 
-#if CONFIG_IPC_MAJOR_4
-	google_rtc_audio_processing_params(mod);
-#endif
+	if (num_of_sources != 2 || num_of_sinks != 1) {
+		comp_err(dev, "Invalid source/sink count");
+		return -EINVAL;
+	}
 
-	/* searching for stream and feedback source buffers */
-	list_for_item(source_buffer_list_item, &dev->bsource_list) {
-		struct comp_buffer *source = container_of(source_buffer_list_item,
-							  struct comp_buffer, sink_list);
-#if CONFIG_IPC_MAJOR_4
-		if (IPC4_SINK_QUEUE_ID(buf_get_id(source)) ==
-			SOF_AEC_FEEDBACK_QUEUE_ID) {
-#else
-		if (source->source->pipeline->pipeline_id != dev->pipeline->pipeline_id) {
+	struct comp_buffer *b0 = list_first_item(&dev->bsource_list, struct comp_buffer, sink_list);
+	struct comp_buffer *b1 = list_next_item(b0, sink_list);
+
+	cd->aec_reference_source = (is_ref_buffer(dev, b0) ? 0 : 1);
+	cd->raw_microphone_source = (cd->aec_reference_source == 1) ? 0 : 1;
+	cd->ref_comp_buffer = (cd->aec_reference_source == 0) ? b0 : b1;
+
+#ifdef CONFIG_IPC_MAJOR_4
+	/* Workaround: nothing in the framework sets up the stream for
+	 * the reference source correctly from topology input, so we
+	 * have to do it here.  Input pin "1" is just a magic number
+	 * that must match the input_pin_index token in a format
+	 * record from our topology.
+	 */
+	ipc4_update_source_format(sources[cd->aec_reference_source],
+				  &mod->priv.cfg.input_pins[1].audio_fmt);
 #endif
-			cd->aec_reference_source = i;
-			aec_channels = audio_stream_get_channels(&source->stream);
-			comp_dbg(dev, "reference index = %d, channels = %d", i, aec_channels);
-		} else {
-			cd->raw_microphone_source = i;
-			microphone_stream_channels = audio_stream_get_channels(&source->stream);
-			comp_dbg(dev, "microphone index = %d, channels = %d", i,
-				 microphone_stream_channels);
-		}
 
-		i++;
-	}
+	/* Validate channel, format and rate on each of our three inputs */
+	int ref_fmt = source_get_frm_fmt(sources[cd->aec_reference_source]);
+	int ref_chan = source_get_channels(sources[cd->aec_reference_source]);
+	int ref_rate = source_get_rate(sources[cd->aec_reference_source]);
 
-	output = list_first_item(&dev->bsink_list, struct comp_buffer, source_list);
+	int mic_fmt = source_get_frm_fmt(sources[cd->raw_microphone_source]);
+	int mic_chan = source_get_channels(sources[cd->raw_microphone_source]);
+	int mic_rate = source_get_rate(sources[cd->raw_microphone_source]);
 
-	/* On some platform the playback output is left right left right due to a crossover
-	 * later on the signal processing chain. That makes the aec_reference be 4 channels
-	 * and the AEC should only use the 2 first.
-	 */
-	if (cd->num_aec_reference_channels > aec_channels) {
-		comp_err(dev, "unsupported number of AEC reference channels: %d",
-			 aec_channels);
-		return -EINVAL;
+	int out_fmt = sink_get_frm_fmt(sinks[0]);
+	int out_chan = sink_get_channels(sinks[0]);
+	int out_rate = sink_get_rate(sinks[0]);
+
+	cd->ref_framesz = source_get_frame_bytes(sources[cd->aec_reference_source]);
+	cd->cap_framesz = sink_get_frame_bytes(sinks[0]);
+
+	cd->num_aec_reference_channels = MIN(ref_chan, CHAN_MAX);
+	cd->num_capture_channels = MIN(mic_chan, CHAN_MAX);
+
+	/* Too many channels is a soft failure, AEC treats only the first N */
+	if (mic_chan > CHAN_MAX)
+		comp_warn(dev, "Too many mic channels: %d, truncating to %d",
+			  mic_chan, CHAN_MAX);
+	if (ref_chan > CHAN_MAX)
+		comp_warn(dev, "Too many ref channels: %d, truncating to %d",
+			  ref_chan, CHAN_MAX);
+
+	if (out_chan != mic_chan) {
+		comp_err(dev, "Input/output mic channel mismatch");
+		ret = -EINVAL;
 	}
 
-	frame_fmt = audio_stream_get_frm_fmt(&output->stream);
-	rate = audio_stream_get_rate(&output->stream);
-	output_stream_channels = audio_stream_get_channels(&output->stream);
+	if (ref_rate != mic_rate || ref_rate != out_rate ||
+	    ref_rate != CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ) {
+		comp_err(dev, "Incorrect source/sink sample rate, expect %d\n",
+			 CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ);
+		ret = -EINVAL;
+	}
 
-	if (cd->num_capture_channels > microphone_stream_channels) {
-		comp_err(dev, "unsupported number of microphone channels: %d",
-			 microphone_stream_channels);
-		return -EINVAL;
+	if (mic_fmt != out_fmt) {
+		comp_err(dev, "Mismatched in/out frame format");
+		ret = -EINVAL;
 	}
 
-	if (cd->num_capture_channels > output_stream_channels) {
-		comp_err(dev, "unsupported number of output channels: %d",
-			 output_stream_channels);
-		return -EINVAL;
+	if ((mic_fmt != SOF_IPC_FRAME_S32_LE && mic_fmt != SOF_IPC_FRAME_S16_LE) ||
+	    (ref_fmt != SOF_IPC_FRAME_S32_LE && ref_fmt != SOF_IPC_FRAME_S16_LE)) {
+		comp_err(dev, "Unsupported sample format");
+		ret = -EINVAL;
 	}
 
-	switch (frame_fmt) {
-#if CONFIG_FORMAT_S16LE
-	case SOF_IPC_FRAME_S16_LE:
-		break;
-#endif /* CONFIG_FORMAT_S16LE */
-	default:
-		comp_err(dev, "unsupported data format: %d", frame_fmt);
-		return -EINVAL;
+#ifdef CONFIG_IPC_MAJOR_4
+	int ref_bufsz = source_get_min_available(sources[cd->aec_reference_source]);
+	int mic_bufsz = source_get_min_available(sources[cd->raw_microphone_source]);
+	int out_bufsz = sink_get_min_free_space(sinks[0]);
+
+	if (mic_bufsz > cd->num_frames * cd->cap_framesz) {
+		comp_err(dev, "Mic IBS %d >1 AEC block, needless delay!", mic_bufsz);
+		ret = -EINVAL;
 	}
 
-	if (rate != CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ) {
-		comp_err(dev, "unsupported samplerate: %d", rate);
-		return -EINVAL;
+	if (ref_bufsz > cd->num_frames * cd->ref_framesz) {
+		comp_err(dev, "Ref IBS %d >1 one AEC block, needless delay!", ref_bufsz);
+		ret = -EINVAL;
 	}
 
+	if (out_bufsz < cd->num_frames * cd->cap_framesz) {
+		comp_err(dev, "Capture OBS %d too small, must fit 1 AEC block", out_bufsz);
+		ret = -EINVAL;
+	}
+#endif
+
+	if (ret < 0)
+		return ret;
+
+	cd->mic_copy = mic_fmt == SOF_IPC_FRAME_S16_LE ? source_copy16 : source_copy32;
+	cd->ref_copy = ref_fmt == SOF_IPC_FRAME_S16_LE ? source_copy16 : source_copy32;
+	cd->out_copy = out_fmt == SOF_IPC_FRAME_S16_LE ? sink_copy16 : sink_copy32;
+
+	cd->last_ref_ok = false;
+	cd->active = false;
+
+	ret = GoogleRtcAudioProcessingSetStreamFormats(cd->state, mic_rate,
+						       cd->num_capture_channels,
+						       cd->num_capture_channels,
+						       ref_rate, cd->num_aec_reference_channels);
+
 	/* Blobs sent during COMP_STATE_READY is assigned to blob_handler->data
 	 * directly, so comp_is_new_data_blob_available always returns false.
 	 */
-	ret = google_rtc_audio_processing_reconfigure(mod);
-	if (ret)
-		return ret;
+	if (ret == 0)
+		ret = google_rtc_audio_processing_reconfigure(mod);
 
-	return 0;
+	return ret;
+}
+
+static int trigger_handler(struct processing_module *mod, int cmd)
+{
+	struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod);
+
+	/* Ignore and halt propagation if we get a trigger from the
+	 * playback pipeline: not for us.
+	 */
+	if (cd->ref_comp_buffer->walking)
+		return PPL_STATUS_PATH_STOP;
+
+	/* Note: not module_adapter_set_state().  With IPC4 those are
+	 * identical, but IPC3 has some odd-looking logic that
+	 * validates that no sources are active when receiving a
+	 * PRE_START command, which obviously breaks for our reference
+	 * stream if playback was already running when our pipeline
+	 * started
+	 */
+	return comp_set_state(mod->dev, cmd);
 }
 
 static int google_rtc_audio_processing_reset(struct processing_module *mod)
 {
 	comp_dbg(mod->dev, "google_rtc_audio_processing_reset()");
-
 	return 0;
 }
 
-static int google_rtc_audio_processing_process(struct processing_module *mod,
-					       struct input_stream_buffer *input_buffers,
-					       int num_input_buffers,
-					       struct output_stream_buffer *output_buffers,
-					       int num_output_buffers)
+static inline void execute_aec(struct google_rtc_audio_processing_comp_data *cd)
+{
+	/* Note that reference input and mic output share the same
+	 * buffer for efficiency
+	 */
+	GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state,
+						      (const float **)cd->refout_buffers);
+	GoogleRtcAudioProcessingProcessCapture_float32(cd->state,
+						       (const float **)cd->raw_mic_buffers,
+						       cd->refout_buffers);
+	cd->buffered_frames = 0;
+}
+
+static bool ref_stream_active(struct google_rtc_audio_processing_comp_data *cd)
+{
+	return cd->ref_comp_buffer->source &&
+		cd->ref_comp_buffer->source->state == COMP_STATE_ACTIVE;
+}
+
+static int mod_process(struct processing_module *mod, struct sof_source **sources,
+		       int num_of_sources, struct sof_sink **sinks, int num_of_sinks)
 {
 	struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod);
-	int16_t *src, *dst, *ref;
-	uint32_t num_aec_reference_frames;
-	uint32_t num_aec_reference_bytes;
-	int num_samples_remaining;
-	int num_frames_remaining;
-	int channel;
-	int frames;
-	int nmax;
-	int ret;
-	int i, j, n;
 
-	struct input_stream_buffer *ref_streamb, *mic_streamb;
-	struct output_stream_buffer *out_streamb;
-	struct audio_stream *ref_stream, *mic_stream, *out_stream;
+	if (cd->reconfigure)
+		google_rtc_audio_processing_reconfigure(mod);
 
-	if (cd->reconfigure) {
-		ret = google_rtc_audio_processing_reconfigure(mod);
-		if (ret)
-			return ret;
-	}
+	struct sof_source *mic = sources[cd->raw_microphone_source];
+	struct sof_source *ref = sources[cd->aec_reference_source];
+	struct sof_sink *out = sinks[0];
+	bool ref_ok = ref_stream_active(cd);
 
-	ref_streamb = &input_buffers[cd->aec_reference_source];
-	ref_stream = ref_streamb->data;
-	ref = audio_stream_get_rptr(ref_stream);
-
-	num_aec_reference_frames = input_buffers[cd->aec_reference_source].size;
-	num_aec_reference_bytes = audio_stream_frame_bytes(ref_stream) * num_aec_reference_frames;
-
-	num_samples_remaining = num_aec_reference_frames * audio_stream_get_channels(ref_stream);
-	while (num_samples_remaining) {
-		nmax = audio_stream_samples_without_wrap_s16(ref_stream, ref);
-		n = MIN(num_samples_remaining, nmax);
-		for (i = 0; i < n; i += cd->num_aec_reference_channels) {
-			j = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
-			for (channel = 0; channel < cd->num_aec_reference_channels; ++channel)
-				cd->aec_reference_buffer[j++] = ref[channel];
-
-			ref += audio_stream_get_channels(ref_stream);
-			++cd->aec_reference_frame_index;
-
-			if (cd->aec_reference_frame_index == cd->num_frames) {
-				GoogleRtcAudioProcessingAnalyzeRender_int16(cd->state,
-									    cd->aec_reference_buffer);
-				cd->aec_reference_frame_index = 0;
-			}
-		}
-		num_samples_remaining -= n;
-		ref = audio_stream_wrap(ref_stream, ref);
+	/* Clear the buffer if the reference pipeline shuts off */
+	if (!ref_ok && cd->last_ref_ok)
+		bzero(arch_xtensa_cached_ptr(refoutbuf), sizeof(refoutbuf));
+
+	int fmic = source_get_data_frames_available(mic);
+	int fref = source_get_data_frames_available(ref);
+	int frames = ref_ok ? MIN(fmic, fref) : fmic;
+	int n, frames_rem;
+
+	/* If fref > fmic at the startup of the stream, we should
+	 * consume the early samples so AEC compares the most recent
+	 * values.  It's common to have stale reference data waiting
+	 * in the pipe when the first capture bytes arrive.
+	 */
+	if (!cd->active && fref > fmic) {
+		comp_info(mod->dev, "Startup: prune %d stale ref frames", fref - fmic);
+		source_release_data(ref, (fref - fmic) * cd->ref_framesz);
 	}
-	input_buffers[cd->aec_reference_source].consumed = num_aec_reference_bytes;
 
-	mic_streamb = &input_buffers[cd->raw_microphone_source];
-	mic_stream = mic_streamb->data;
-	out_streamb = &output_buffers[0];
-	out_stream = out_streamb->data;
+	for (frames_rem = frames; frames_rem; frames_rem -= n) {
+		n = MIN(frames_rem, cd->num_frames - cd->buffered_frames);
 
-	src = audio_stream_get_rptr(mic_stream);
-	dst = audio_stream_get_wptr(out_stream);
+		cd->mic_copy(mic, n, cd->raw_mic_buffers, cd->buffered_frames);
 
-	frames = input_buffers[cd->raw_microphone_source].size;
-	num_frames_remaining = frames;
+		if (ref_ok)
+			cd->ref_copy(ref, n, cd->refout_buffers, cd->buffered_frames);
 
-	while (num_frames_remaining) {
-		nmax = audio_stream_frames_without_wrap(mic_stream, src);
-		n = MIN(num_frames_remaining, nmax);
-		nmax = audio_stream_frames_without_wrap(out_stream, dst);
-		n = MIN(n, nmax);
-		for (i = 0; i < n; i++) {
-			memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
-						      cd->num_capture_channels]),
-				 cd->num_frames * cd->num_capture_channels *
-				 sizeof(cd->raw_mic_buffer[0]), src,
-				 sizeof(int16_t) * cd->num_capture_channels);
-			++cd->raw_mic_buffer_frame_index;
-
-			memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
-				 sizeof(cd->output_buffer[0]),
-				 &(cd->output_buffer[cd->output_buffer_frame_index *
-						     cd->num_capture_channels]),
-				 sizeof(int16_t) * cd->num_capture_channels);
-			++cd->output_buffer_frame_index;
-
-			if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
-				GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
-									     cd->raw_mic_buffer,
-									     cd->output_buffer);
-				cd->output_buffer_frame_index = 0;
-				cd->raw_mic_buffer_frame_index = 0;
+		cd->buffered_frames += n;
+
+		if (cd->buffered_frames >= cd->num_frames) {
+			/* Safety valve; OBS only guarantees us space for one block */
+			if (sink_get_free_size(out) < cd->num_frames * cd->cap_framesz) {
+				comp_warn(mod->dev, "AEC sink backed up!");
+				break;
 			}
 
-			src += audio_stream_get_channels(mic_stream);
-			dst += audio_stream_get_channels(out_stream);
+			execute_aec(cd);
+			cd->out_copy(out, cd->num_frames, cd->refout_buffers);
 		}
-		num_frames_remaining -= n;
-		src = audio_stream_wrap(mic_stream, src);
-		dst = audio_stream_wrap(out_stream, dst);
 	}
-
-	module_update_buffer_position(&input_buffers[cd->raw_microphone_source],
-				      &output_buffers[0], frames);
-
+	cd->last_ref_ok = ref_ok;
+	cd->active = true;
 	return 0;
 }
 
 static struct module_interface google_rtc_audio_processing_interface = {
 	.init  = google_rtc_audio_processing_init,
 	.free = google_rtc_audio_processing_free,
-	.process_audio_stream = google_rtc_audio_processing_process,
+	.process = mod_process,
 	.prepare = google_rtc_audio_processing_prepare,
 	.set_configuration = google_rtc_audio_processing_set_config,
 	.get_configuration = google_rtc_audio_processing_get_config,
+	.trigger = trigger_handler,
 	.reset = google_rtc_audio_processing_reset,
 };
 
diff --git a/src/audio/google/google_rtc_audio_processing_mock.c b/src/audio/google/google_rtc_audio_processing_mock.c
index a6c55c641270..681d003d3592 100644
--- a/src/audio/google/google_rtc_audio_processing_mock.c
+++ b/src/audio/google/google_rtc_audio_processing_mock.c
@@ -10,8 +10,6 @@
 #include <string.h>
 #include <stdint.h>
 
-#include <sof/audio/format.h>
-#include <sof/math/numbers.h>
 #include <rtos/alloc.h>
 #include "ipc/topology.h"
 
@@ -23,7 +21,7 @@ struct GoogleRtcAudioProcessingState {
 	int num_aec_reference_channels;
 	int num_output_channels;
 	int num_frames;
-	int16_t *aec_reference;
+	float *aec_reference;
 };
 
 static void SetFormats(GoogleRtcAudioProcessingState *const state,
@@ -140,46 +138,40 @@ int GoogleRtcAudioProcessingReconfigure(GoogleRtcAudioProcessingState *const sta
 	return 0;
 }
 
-int GoogleRtcAudioProcessingProcessCapture_int16(GoogleRtcAudioProcessingState *const state,
-						 const int16_t *const src,
-						 int16_t *const dest)
+int GoogleRtcAudioProcessingProcessCapture_float32(GoogleRtcAudioProcessingState * const state,
+						   const float * const *src,
+						   float * const *dest)
 {
-	int16_t *ref = state->aec_reference;
-	int16_t *mic = (int16_t *) src;
-	int16_t *out = dest;
-	int n, io, im, ir;
-
-	/* Mix input and reference channels to output. The matching channels numbers
-	 * are mixed. If e.g. microphone and output channels count is 4, and reference
-	 * has 2 channels, output channels 3 and 4 are copy of microphone channels 3 and 4,
-	 * and output channels 1 and 2 are sum of microphone and reference.
-	 */
-	memset(dest, 0, sizeof(int16_t) * state->num_output_channels * state->num_frames);
-	for (n = 0; n < state->num_frames; ++n) {
-		im = 0;
-		ir = 0;
-		for (io = 0; io < state->num_output_channels; io++) {
-			out[io] = sat_int16(
-				(im < state->num_capture_channels ? (int32_t)mic[im++] : 0) +
-				(ir < state->num_aec_reference_channels ? (int32_t)ref[ir++] : 0));
+	float *ref = state->aec_reference;
+	float **mic = (float **)src;
+	int n, chan;
+
+	for (chan = 0; chan < state->num_output_channels; chan++) {
+		for (n = 0; n < state->num_frames; ++n) {
+			float mic_save = mic[chan][n];	/* allow same in/out buffer */
+
+			if (chan < state->num_aec_reference_channels)
+				dest[chan][n] = mic_save + ref[n + (chan * state->num_frames)];
+			else
+				dest[chan][n] = mic_save;
 		}
-
-		ref += state->num_aec_reference_channels;
-		out += state->num_output_channels;
-		mic += state->num_capture_channels;
 	}
 	return 0;
 }
 
-int GoogleRtcAudioProcessingAnalyzeRender_int16(GoogleRtcAudioProcessingState *const state,
-						const int16_t *const data)
+int GoogleRtcAudioProcessingAnalyzeRender_float32(GoogleRtcAudioProcessingState * const state,
+						  const float * const *data)
 {
 	const size_t buffer_size =
 		sizeof(state->aec_reference[0])
-		* state->num_frames
-		* state->num_aec_reference_channels;
-	memcpy_s(state->aec_reference, buffer_size,
-		 data, buffer_size);
+		* state->num_frames;
+	int channel;
+
+	for (channel = 0; channel < state->num_aec_reference_channels; channel++) {
+		memcpy_s(&state->aec_reference[channel * state->num_frames], buffer_size,
+			 data[channel], buffer_size);
+	}
+
 	return 0;
 }
 
diff --git a/src/include/ipc4/aec.h b/src/include/ipc4/aec.h
index 71b0afbbfe88..9456f86bcdf8 100644
--- a/src/include/ipc4/aec.h
+++ b/src/include/ipc4/aec.h
@@ -38,6 +38,4 @@ enum sof_ipc4_aec_config_params {
 	IPC4_AEC_SET_EXT_FMT,
 };
 
-#define SOF_AEC_FEEDBACK_QUEUE_ID 1
-
 #endif