Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Audio: Mixin_mixout: Add HiFi5 implementation. #8793

Merged
merged 1 commit into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/audio/mixin_mixout/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
add_local_sources(sof mixin_mixout.c mixin_mixout_generic.c mixin_mixout_hifi3.c)
add_local_sources(sof mixin_mixout.c mixin_mixout_generic.c mixin_mixout_hifi3.c mixin_mixout_hifi5.c)
37 changes: 37 additions & 0 deletions src/audio/mixin_mixout/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,40 @@ config COMP_MIXIN_MIXOUT
default y
help
Select for Mixin_mixout component

choice "MIXIN_MIXOUT_SIMD_LEVEL_SELECT"
prompt "choose which SIMD level used for MIXIN_MIXOUT module"
depends on COMP_MIXIN_MIXOUT
default MIXIN_MIXOUT_HIFI_MAX

config MIXIN_MIXOUT_HIFI_MAX
prompt "Max HiFi level available in the toolchain"
bool
help
When this was selected, optimization level will be determined
by toolchain.

config MIXIN_MIXOUT_HIFI_5
prompt "choose HIFI4 intrinsic optimized MIXIN_MIXOUT module"
bool
help
This option used to build HIFI4 optimized MIXIN_MIXOUT code

config MIXIN_MIXOUT_HIFI_4
prompt "choose HIFI4 intrinsic optimized MIXIN_MIXOUT module"
bool
help
This option used to build HIFI4 optimized MIXIN_MIXOUT code

config MIXIN_MIXOUT_HIFI_3
prompt "choose HIFI3 intrinsic optimized MIXIN_MIXOUT module"
bool
help
This option used to build HIFI3 intrinsic optimized MIXIN_MIXOUT code

config MIXIN_MIXOUT_HIFI_NONE
prompt "choose generic C MIXIN_MIXOUT module, no HIFI SIMD involved"
bool
help
This option used to build MIXIN_MIXOUT generic code.
endchoice
12 changes: 0 additions & 12 deletions src/audio/mixin_mixout/mixin_mixout.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,6 @@
#include <sof/platform.h>
#include <stddef.h>

#define MIXIN_MIXOUT_GENERIC

#if defined(__XCC__)

#include <xtensa/config/core-isa.h>
#if XCHAL_HAVE_HIFI3 || XCHAL_HAVE_HIFI4
#undef MIXIN_MIXOUT_GENERIC
#define MIXIN_MIXOUT_HIFI3
#endif

#endif

enum ipc4_mixin_config_param {
/* large_config_set param id for ipc4_mixer_mode_config */
IPC4_MIXER_MODE = 1
Expand Down
2 changes: 1 addition & 1 deletion src/audio/mixin_mixout/mixin_mixout_generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

#include "mixin_mixout.h"

#ifdef MIXIN_MIXOUT_GENERIC
#if SOF_USE_HIFI(NONE, MIXIN_MIXOUT)

#if CONFIG_FORMAT_S16LE
static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixed_samples,
Expand Down
2 changes: 1 addition & 1 deletion src/audio/mixin_mixout/mixin_mixout_hifi3.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

#include "mixin_mixout.h"

#ifdef MIXIN_MIXOUT_HIFI3
#if SOF_USE_HIFI(3, MIXIN_MIXOUT) || SOF_USE_HIFI(4, MIXIN_MIXOUT)

#if CONFIG_FORMAT_S16LE
static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixed_samples,
Expand Down
285 changes: 285 additions & 0 deletions src/audio/mixin_mixout/mixin_mixout_hifi5.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
// SPDX-License-Identifier: BSD-3-Clause
//
// Copyright(c) 2024 Intel Corporation. All rights reserved.
//
// Author: Andrula Song <[email protected]>

#include <sof/common.h>

#include "mixin_mixout.h"

#if SOF_USE_HIFI(5, MIXIN_MIXOUT)

#if CONFIG_FORMAT_S16LE
static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixed_samples,
const struct cir_buf_ptr *source,
int32_t sample_count, uint16_t gain)
{
int samples_to_mix, samples_to_copy, left_samples;
int n, nmax, i, m, left;
ae_int16x4 in_sample, in_sample1;
ae_int16x4 out_sample, out_sample1;
ae_int16x8 *in;
ae_int16x8 *out;
ae_valignx2 inu = AE_ZALIGN128();
ae_valignx2 outu1 = AE_ZALIGN128();
ae_valignx2 outu2 = AE_ZALIGN128();
/* cir_buf_wrap() is required and is done below in a loop */
ae_int16 *dst = (ae_int16 *)sink->ptr + start_sample;
ae_int16 *src = source->ptr;

assert(mixed_samples >= start_sample);
samples_to_mix = AE_MIN32(mixed_samples - start_sample, sample_count);
samples_to_copy = sample_count - samples_to_mix;
n = 0;

for (left_samples = samples_to_mix; left_samples > 0; left_samples -= n) {
src = cir_buf_wrap(src + n, source->buf_start, source->buf_end);
dst = cir_buf_wrap(dst + n, sink->buf_start, sink->buf_end);
/* calculate the remaining samples*/
nmax = (ae_int16 *)source->buf_end - src;
n = AE_MIN32(left_samples, nmax);
nmax = (ae_int16 *)sink->buf_end - dst;
n = AE_MIN32(n, nmax);
in = (ae_int16x8 *)src;
out = (ae_int16x8 *)dst;
inu = AE_LA128_PP(in);
outu1 = AE_LA128_PP(out);
m = n >> 3;
left = n & 0x07;
/* process 8 samples per loop */
for (i = 0; i < m; i++) {
AE_LA16X4X2_IP(in_sample, in_sample1, inu, in);
AE_LA16X4X2_IP(out_sample, out_sample1, outu1, out);
out--;
out_sample = AE_ADD16S(in_sample, out_sample);
out_sample1 = AE_ADD16S(in_sample1, out_sample1);
AE_SA16X4X2_IP(out_sample, out_sample1, outu2, out);
}
AE_SA128POS_FP(outu2, out);

/* process the left samples that less than 8
* one by one to avoid memory access overrun
*/
for (i = 0; i < left ; i++) {
AE_L16_IP(in_sample, (ae_int16 *)in, sizeof(ae_int16));
AE_L16_IP(out_sample, (ae_int16 *)out, 0);
out_sample = AE_ADD16S(in_sample, out_sample);
AE_S16_0_IP(out_sample, (ae_int16 *)out, sizeof(ae_int16));
}
}

for (left_samples = samples_to_copy; left_samples > 0; left_samples -= n) {
src = cir_buf_wrap(src + n, source->buf_start, source->buf_end);
dst = cir_buf_wrap(dst + n, sink->buf_start, sink->buf_end);
/* calculate the remaining samples*/
nmax = (ae_int16 *)source->buf_end - src;
n = AE_MIN32(left_samples, nmax);
nmax = (ae_int16 *)sink->buf_end - dst;
n = AE_MIN32(n, nmax);
in = (ae_int16x8 *)src;
out = (ae_int16x8 *)dst;
inu = AE_LA128_PP(in);
m = n >> 3;
left = n & 0x07;
/* process 8 frames per loop */
for (i = 0; i < m; i++) {
AE_LA16X4X2_IP(in_sample, in_sample1, inu, in);
AE_SA16X4X2_IP(in_sample, in_sample1, outu2, out);
}
AE_SA128POS_FP(outu2, out);

/* process the left samples that less than 8
* one by one to avoid memory access overrun
*/
for (i = 0; i < left ; i++) {
AE_L16_IP(in_sample, (ae_int16 *)in, sizeof(ae_int16));
AE_S16_0_IP(in_sample, (ae_int16 *)out, sizeof(ae_int16));
}
}
}
#endif /* CONFIG_FORMAT_S16LE */

#if CONFIG_FORMAT_S24LE
static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixed_samples,
const struct cir_buf_ptr *source,
int32_t sample_count, uint16_t gain)
{
int samples_to_mix, samples_to_copy, left_samples;
int n, nmax, i, m, left;
ae_int32x2 in_sample, in_sample1;
ae_int32x2 out_sample, out_sample1;
ae_int32x4 *in;
ae_int32x4 *out;
ae_valignx2 inu = AE_ZALIGN128();
ae_valignx2 outu1 = AE_ZALIGN128();
ae_valignx2 outu2 = AE_ZALIGN128();
/* cir_buf_wrap() is required and is done below in a loop */
int32_t *dst = (int32_t *)sink->ptr + start_sample;
int32_t *src = source->ptr;

assert(mixed_samples >= start_sample);
samples_to_mix = AE_MIN32(mixed_samples - start_sample, sample_count);
samples_to_copy = sample_count - samples_to_mix;
n = 0;

for (left_samples = samples_to_mix; left_samples > 0; left_samples -= n) {
src = cir_buf_wrap(src + n, source->buf_start, source->buf_end);
dst = cir_buf_wrap(dst + n, sink->buf_start, sink->buf_end);
/* calculate the remaining samples*/
nmax = (int32_t *)source->buf_end - src;
n = AE_MIN32(left_samples, nmax);
nmax = (int32_t *)sink->buf_end - dst;
n = AE_MIN32(n, nmax);
in = (ae_int32x4 *)src;
out = (ae_int32x4 *)dst;
inu = AE_LA128_PP(in);
outu1 = AE_LA128_PP(out);
m = n >> 2;
left = n & 3;
/* process 2 samples per time */
for (i = 0; i < m; i++) {
AE_LA32X2X2_IP(in_sample, in_sample1, inu, in);
AE_LA32X2X2_IP(out_sample, out_sample1, outu1, out);
out--;
out_sample = AE_ADD24S(in_sample, out_sample);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Has this mix code been been verified? The ALSA S24_LE is aligned to least significant bits while ae_f24 in Xtensa is most significant bits aligned. In the past at least SOF was using the ALSA S24_LE. We used to have a mess with this and I'm no more sure if we have discarded the 24 bit ALSA format to be compatible with Windows OS's 24 bit format. Is the previous generic or HiFi3 mixing like this, if yes this would be OK. Changing 24 bit assumptions is another discussion and apparently we may support both in parts of code (cc @kv2019i ). But we should reject in prepare() unsupported 24 bit flavor.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok - having a reject message would be fine here until support for both 24bit formats is upstream. This message and 24 bit support can be added incrementally though since this is good for testing now and passing CI.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hope CI is testing the 24 bit case. Proposing to make another test patch that breaks mix_s24() function and see if it fails the CI run.

out_sample1 = AE_ADD24S(in_sample1, out_sample1);
AE_SA32X2X2_IP(out_sample, out_sample1, outu2, out);
}
AE_SA128POS_FP(outu2, out);

/* process the left sample to avoid memory access overrun */
if (left) {
AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32));
AE_L32_IP(out_sample, (ae_int32 *)out, 0);
out_sample = AE_ADD24S(in_sample, out_sample);
AE_S32_L_IP(out_sample, (ae_int32 *)out, sizeof(ae_int32));
}
}

for (left_samples = samples_to_copy; left_samples > 0; left_samples -= n) {
src = cir_buf_wrap(src + n, source->buf_start, source->buf_end);
dst = cir_buf_wrap(dst + n, sink->buf_start, sink->buf_end);
nmax = (int32_t *)source->buf_end - src;
n = AE_MIN32(left_samples, nmax);
nmax = (int32_t *)sink->buf_end - dst;
n = AE_MIN32(n, nmax);
in = (ae_int32x4 *)src;
out = (ae_int32x4 *)dst;
inu = AE_LA128_PP(in);
m = n >> 2;
left = n & 3;
for (i = 0; i < m; i++) {
AE_LA32X2X2_IP(in_sample, in_sample1, inu, in);
AE_SA32X2X2_IP(in_sample, in_sample1, outu2, out);
}
AE_SA128POS_FP(outu2, out);
/* process the left sample to avoid memory access overrun */
if (left) {
AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32));
AE_S32_L_IP(in_sample, (ae_int32 *)out, sizeof(ae_int32));
}
}
}

#endif /* CONFIG_FORMAT_S24LE */

#if CONFIG_FORMAT_S32LE
static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixed_samples,
const struct cir_buf_ptr *source,
int32_t sample_count, uint16_t gain)
{
int samples_to_mix, samples_to_copy, left_samples;
int n, nmax, i, m, left;
ae_int32x2 in_sample, in_sample1;
ae_int32x2 out_sample, out_sample1;
ae_int32x4 *in;
ae_int32x4 *out;
ae_valignx2 inu = AE_ZALIGN128();
ae_valignx2 outu1 = AE_ZALIGN128();
ae_valignx2 outu2 = AE_ZALIGN128();
/* cir_buf_wrap() is required and is done below in a loop */
int32_t *dst = (int32_t *)sink->ptr + start_sample;
int32_t *src = source->ptr;

assert(mixed_samples >= start_sample);
samples_to_mix = AE_MIN32(mixed_samples - start_sample, sample_count);
samples_to_copy = sample_count - samples_to_mix;
n = 0;

for (left_samples = samples_to_mix; left_samples > 0; left_samples -= n) {
src = cir_buf_wrap(src + n, source->buf_start, source->buf_end);
dst = cir_buf_wrap(dst + n, sink->buf_start, sink->buf_end);
/* calculate the remaining samples*/
nmax = (int32_t *)source->buf_end - src;
n = AE_MIN32(left_samples, nmax);
nmax = (int32_t *)sink->buf_end - dst;
n = AE_MIN32(n, nmax);
in = (ae_int32x4 *)src;
out = (ae_int32x4 *)dst;
inu = AE_LA128_PP(in);
outu1 = AE_LA128_PP(out);
m = n >> 2;
left = n & 3;
for (i = 0; i < m; i++) {
AE_LA32X2X2_IP(in_sample, in_sample1, inu, in);
AE_LA32X2X2_IP(out_sample, out_sample1, outu1, out);
out--;
out_sample = AE_ADD32S(in_sample, out_sample);
out_sample1 = AE_ADD32S(in_sample1, out_sample1);
AE_SA32X2X2_IP(out_sample, out_sample1, outu2, out);
}
AE_SA128POS_FP(outu2, out);

/* process the left sample to avoid memory access overrun */
if (left) {
AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32));
AE_L32_IP(out_sample, (ae_int32 *)out, 0);
out_sample = AE_ADD32S(in_sample, out_sample);
AE_S32_L_IP(out_sample, (ae_int32 *)out, sizeof(ae_int32));
}
}

for (left_samples = samples_to_copy; left_samples > 0; left_samples -= n) {
src = cir_buf_wrap(src + n, source->buf_start, source->buf_end);
dst = cir_buf_wrap(dst + n, sink->buf_start, sink->buf_end);
/* calculate the remaining samples*/
nmax = (int32_t *)source->buf_end - src;
n = AE_MIN32(left_samples, nmax);
nmax = (int32_t *)sink->buf_end - dst;
n = AE_MIN32(n, nmax);
in = (ae_int32x4 *)src;
out = (ae_int32x4 *)dst;
inu = AE_LA128_PP(in);
m = n >> 2;
left = n & 3;
for (i = 0; i < m; i++) {
AE_LA32X2X2_IP(in_sample, in_sample1, inu, in);
AE_SA32X2X2_IP(in_sample, in_sample1, outu2, out);
}
AE_SA128POS_FP(outu2, out);
/* process the left sample to avoid memory access overrun */
if (left) {
AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32));
AE_S32_L_IP(in_sample, (ae_int32 *)out, sizeof(ae_int32));
}
}
}

#endif /* CONFIG_FORMAT_S32LE */

const struct mix_func_map mix_func_map[] = {
#if CONFIG_FORMAT_S16LE
{ SOF_IPC_FRAME_S16_LE, mix_s16 },
#endif
#if CONFIG_FORMAT_S24LE
{ SOF_IPC_FRAME_S24_4LE, mix_s24 },
#endif
#if CONFIG_FORMAT_S32LE
{ SOF_IPC_FRAME_S32_LE, mix_s32 }
#endif
};

const size_t mix_count = ARRAY_SIZE(mix_func_map);

#endif
1 change: 1 addition & 0 deletions zephyr/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,7 @@ zephyr_library_sources_ifdef(CONFIG_COMP_MIXIN_MIXOUT
${SOF_AUDIO_PATH}/mixin_mixout/mixin_mixout.c
${SOF_AUDIO_PATH}/mixin_mixout/mixin_mixout_generic.c
${SOF_AUDIO_PATH}/mixin_mixout/mixin_mixout_hifi3.c
${SOF_AUDIO_PATH}/mixin_mixout/mixin_mixout_hifi5.c
)

zephyr_library_sources_ifdef(CONFIG_COMP_TONE
Expand Down
Loading