From 394c08d4f7db0cace3f7941d3fbdddc10c05243b Mon Sep 17 00:00:00 2001 From: Andrula Song Date: Wed, 7 Feb 2024 16:35:19 +0800 Subject: [PATCH] Audio: Volume: Add HiFi5 implementation. Add HiFi5 implementation of volume functions, compared with HiFi3 version, can reduce about 28% cycles. Signed-off-by: Andrula Song Signed-off-by: Seppo Ingalsuo --- src/audio/volume/CMakeLists.txt | 2 + src/audio/volume/Kconfig.simd | 6 + src/audio/volume/volume.c | 10 +- src/audio/volume/volume_hifi4.c | 2 +- src/audio/volume/volume_hifi4_with_peakvol.c | 2 +- src/audio/volume/volume_hifi5.c | 516 +++++++++++++++ src/audio/volume/volume_hifi5_with_peakvol.c | 626 +++++++++++++++++++ src/include/sof/common.h | 8 +- zephyr/CMakeLists.txt | 2 + 9 files changed, 1167 insertions(+), 7 deletions(-) create mode 100644 src/audio/volume/volume_hifi5.c create mode 100644 src/audio/volume/volume_hifi5_with_peakvol.c diff --git a/src/audio/volume/CMakeLists.txt b/src/audio/volume/CMakeLists.txt index 790bd37e4d6a..5c5b764b5912 100644 --- a/src/audio/volume/CMakeLists.txt +++ b/src/audio/volume/CMakeLists.txt @@ -5,9 +5,11 @@ if(CONFIG_COMP_VOLUME) volume_generic.c volume_hifi3.c volume_hifi4.c + volume_hifi5.c volume_generic_with_peakvol.c volume_hifi3_with_peakvol.c volume_hifi4_with_peakvol.c + volume_hifi5_with_peakvol.c volume.c) if(CONFIG_IPC_MAJOR_3) add_local_sources(sof volume_ipc3.c) diff --git a/src/audio/volume/Kconfig.simd b/src/audio/volume/Kconfig.simd index bdddb5d68e8b..88dd09cfb57a 100644 --- a/src/audio/volume/Kconfig.simd +++ b/src/audio/volume/Kconfig.simd @@ -14,6 +14,12 @@ choice "VOLUME_SIMD_LEVEL_SELECT" When this was selected, optimization level will be determined by toolchain pre-defined macros in core isa header file. + config VOLUME_HIFI_5 + prompt "choose HIFI5 intrinsic optimized volume module" + bool + help + This option used to build HIFI5 optimized volume code + config VOLUME_HIFI_4 prompt "choose HIFI4 intrinsic optimized volume module" bool diff --git a/src/audio/volume/volume.c b/src/audio/volume/volume.c index ed0bbc99dec1..a49a06b59586 100644 --- a/src/audio/volume/volume.c +++ b/src/audio/volume/volume.c @@ -646,19 +646,21 @@ static vol_zc_func vol_get_zc_function(struct comp_dev *dev, static void volume_set_alignment(struct audio_stream *source, struct audio_stream *sink) { -#if SOF_USE_HIFI(3, VOLUME) || SOF_USE_HIFI(4, VOLUME) || SOF_USE_HIFI(5, VOLUME) - /* Both source and sink buffer in HiFi 3 or HiFi4 processing version, + /* Both source and sink buffer in HiFi5 processing version, + * xtensa intrinsics ask for 16-byte aligned. + * + * Both source and sink buffer in HiFi 3 or HiFi4 processing version, * xtensa intrinsics ask for 8-byte aligned. 5.1 format SSE audio * requires 16-byte aligned. */ - const uint32_t byte_align = audio_stream_get_channels(source) == 6 ? 16 : 8; + const uint32_t byte_align = audio_stream_get_channels(source) == 6 ? + SOF_FRAME_BYTE_ALIGN_6CH : SOF_FRAME_BYTE_ALIGN; /*There is no limit for frame number, so both source and sink set it to be 1*/ const uint32_t frame_align_req = 1; audio_stream_set_align(byte_align, frame_align_req, source); audio_stream_set_align(byte_align, frame_align_req, sink); -#endif } /** diff --git a/src/audio/volume/volume_hifi4.c b/src/audio/volume/volume_hifi4.c index 894078a4f68a..96259297b9c3 100644 --- a/src/audio/volume/volume_hifi4.c +++ b/src/audio/volume/volume_hifi4.c @@ -21,7 +21,7 @@ LOG_MODULE_DECLARE(volume_hifi4, CONFIG_SOF_LOG_LEVEL); #include "volume.h" -#if SOF_USE_HIFI(4, VOLUME) || SOF_USE_HIFI(5, VOLUME) +#if SOF_USE_HIFI(4, VOLUME) #if (!CONFIG_COMP_PEAK_VOL) diff --git a/src/audio/volume/volume_hifi4_with_peakvol.c b/src/audio/volume/volume_hifi4_with_peakvol.c index d30f64ce47a3..cc85f6076b12 100644 --- a/src/audio/volume/volume_hifi4_with_peakvol.c +++ b/src/audio/volume/volume_hifi4_with_peakvol.c @@ -21,7 +21,7 @@ LOG_MODULE_DECLARE(volume_hifi4, CONFIG_SOF_LOG_LEVEL); #include "volume.h" -#if SOF_USE_HIFI(4, VOLUME) || SOF_USE_HIFI(5, VOLUME) +#if SOF_USE_HIFI(4, VOLUME) #if CONFIG_COMP_PEAK_VOL #include diff --git a/src/audio/volume/volume_hifi5.c b/src/audio/volume/volume_hifi5.c new file mode 100644 index 000000000000..ec17194ff46f --- /dev/null +++ b/src/audio/volume/volume_hifi5.c @@ -0,0 +1,516 @@ +// SPDX-License-Identifier: BSD-3-Clause +// +// Copyright(c) 2024 Intel Corporation. All rights reserved. +// +// Author: Andrula Song + +/** + * \file + * \brief Volume HiFi5 processing implementation without peak volume detection + * \authors Andrula Song + */ + +#include +#include +#include +#include +#include +#include + +LOG_MODULE_DECLARE(volume, CONFIG_SOF_LOG_LEVEL); + +#include "volume.h" + +#if SOF_USE_HIFI(5, VOLUME) + +#if (!CONFIG_COMP_PEAK_VOL) + +#include + +/** + * \brief store volume gain 4 times for xtensa multi-way intrinsic operations. + * Simultaneous processing 4 samples. + * \param[in,out] cd Volume component private data. + * \param[in] channels_count Number of channels to process. + */ +static void vol_store_gain(struct vol_data *cd, const int channels_count) +{ + int32_t i; + + /* using for loop instead of memcpy_s(), because for loop costs less cycles */ + for (i = 0; i < channels_count; i++) { + cd->vol[i] = cd->volume[i]; + cd->vol[i + channels_count * 1] = cd->volume[i]; + cd->vol[i + channels_count * 2] = cd->volume[i]; + cd->vol[i + channels_count * 3] = cd->volume[i]; + } + cd->copy_gain = false; +} + +#if CONFIG_FORMAT_S24LE +/** + * \brief HiFi5 enabled volume processing from 24/32 bit to 24/32 or 32 bit. + * \param[in,out] dev Volume base component device. + * \param[in,out] sink Destination buffer. + * \param[in,out] source Input buffer. + * \param[in] frames Number of frames to process. + * \param[in] attenuation factor for peakmeter adjustment (unused) + */ +static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, uint32_t frames, + uint32_t attenuation) +{ + struct vol_data *cd = module_get_private_data(mod); + struct audio_stream *source = bsource->data; + struct audio_stream *sink = bsink->data; + ae_int32x2 in_sample, in_sample1; + ae_int32x2 out_sample, out_sample1; + ae_int32x2 volume, volume1; + ae_int32x4 *buf; + ae_int32x4 *buf_end; + int i, n, m; + ae_int32x4 *vol; + ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 outu = AE_ZALIGN128(); + ae_int32x4 *in = (ae_int32x4 *)audio_stream_wrap(source, + (char *)audio_stream_get_rptr(source) + + bsource->consumed); + ae_int32x4 *out = (ae_int32x4 *)audio_stream_wrap(sink, + (char *)audio_stream_get_wptr(sink) + + bsink->size); + const int channels_count = audio_stream_get_channels(sink); + const int inc = sizeof(ae_int32x4); + int samples = channels_count * frames; + + /** to ensure the adsress is 16-byte aligned and avoid risk of + * error loading of volume gain while the cd->vol would be set + * as circular buffer + */ + if (cd->copy_gain) + vol_store_gain(cd, channels_count); + + buf = (ae_int32x4 *)cd->vol; + buf_end = (ae_int32x4 *)(cd->vol + channels_count * 4); + vol = buf; + /* Set buf who stores the volume gain data as circular buffer */ + AE_SETCBEGIN0(buf); + AE_SETCEND0(buf_end); + + bsource->consumed += VOL_S32_SAMPLES_TO_BYTES(samples); + bsink->size += VOL_S32_SAMPLES_TO_BYTES(samples); + + while (samples) { + m = audio_stream_samples_without_wrap_s24(source, in); + n = MIN(m, samples); + m = audio_stream_samples_without_wrap_s24(sink, out); + n = MIN(m, n); + inu = AE_LA128_PP(in); + /* process four continuous samples once */ + for (i = 0; i < n; i += 4) { + /* Load the volume value */ + AE_L32X2X2_XC(volume, volume1, vol, inc); + + /* Load the input sample */ + AE_LA32X2X2_IP(in_sample, in_sample1, inu, in); + + /* Multiply the input sample */ +#if COMP_VOLUME_Q8_16 + AE_MULF2P32X4RS(out_sample, out_sample1, + AE_SLAI32S(volume, 7), AE_SLAI32S(volume1, 7), + AE_SLAI32(in_sample, 8), AE_SLAI32(in_sample1, 8)); +#elif COMP_VOLUME_Q1_23 + AE_MULF2P32X4RS(out_sample, out_sample1, volume, volume1, + AE_SLAI32(in_sample, 8), AE_SLAI32(in_sample1, 8)); +#else +#error "Need CONFIG_COMP_VOLUME_Qx_y" +#endif + + /* Shift for S24_LE */ + out_sample = AE_SLAI32S(out_sample, 8); + out_sample = AE_SRAI32(out_sample, 8); + out_sample1 = AE_SLAI32S(out_sample1, 8); + out_sample1 = AE_SRAI32(out_sample1, 8); + + /* Store the output sample */ + AE_SA32X2X2_IP(out_sample, out_sample1, outu, out); + } + AE_SA128POS_FP(outu, out); + samples -= n; + in = audio_stream_wrap(source, in); + out = audio_stream_wrap(sink, out); + } +} + +/** + * \brief HiFi5 enabled volume passthrough from 24/32 bit to 24/32 or 32 bit. + * \param[in,out] dev Volume base component device. + * \param[in,out] sink Destination buffer. + * \param[in,out] source Input buffer. + * \param[in] frames Number of frames to process. + * \param[in] attenuation factor for peakmeter adjustment (unused) + */ +static void vol_passthrough_s24_to_s24_s32(struct processing_module *mod, + struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, uint32_t frames, + uint32_t attenuation) +{ + struct audio_stream *source = bsource->data; + struct audio_stream *sink = bsink->data; + ae_int32x2 in_sample, in_sample1; + int i, n, m; + ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 outu = AE_ZALIGN128(); + ae_int32x4 *in = (ae_int32x4 *)audio_stream_wrap(source, + (char *)audio_stream_get_rptr(source) + + bsource->consumed); + ae_int32x4 *out = (ae_int32x4 *)audio_stream_wrap(sink, + (char *)audio_stream_get_wptr(sink) + + bsink->size); + int samples = audio_stream_get_channels(sink) * frames; + + bsource->consumed += VOL_S32_SAMPLES_TO_BYTES(samples); + bsink->size += VOL_S32_SAMPLES_TO_BYTES(samples); + + while (samples) { + m = audio_stream_samples_without_wrap_s24(source, in); + n = MIN(m, samples); + m = audio_stream_samples_without_wrap_s24(sink, out); + n = MIN(m, n); + inu = AE_LA128_PP(in); + /* process 4 continuous samples once */ + for (i = 0; i < n; i += 4) { + /* Load the input sample */ + AE_LA32X2X2_IP(in_sample, in_sample1, inu, in); + /* Store the output sample */ + AE_SA32X2X2_IP(in_sample, in_sample1, outu, out); + } + AE_SA128POS_FP(outu, out); + samples -= n; + in = audio_stream_wrap(source, in); + out = audio_stream_wrap(sink, out); + } +} + +#endif /* CONFIG_FORMAT_S24LE */ + +#if CONFIG_FORMAT_S32LE +/** + * \brief HiFi5 enabled volume processing from 32 bit to 24/32 or 32 bit. + * \param[in,out] mod Pointer to struct processing_module + * \param[in,out] sink Destination buffer. + * \param[in,out] source Input buffer. + * \param[in] frames Number of frames to process. + * \param[in] attenuation factor for peakmeter adjustment (unused) + */ +static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, uint32_t frames, + uint32_t attenuation) +{ + struct vol_data *cd = module_get_private_data(mod); + struct audio_stream *source = bsource->data; + struct audio_stream *sink = bsink->data; + ae_int32x2 in_sample, in_sample1; + ae_int32x2 out_sample, out_sample1; + ae_int32x2 volume, volume1; + int i, n, m; + ae_int64 mult0; + ae_int64 mult1; + ae_int32x4 *buf; + ae_int32x4 *buf_end; + ae_int32x4 *vol; + ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 outu = AE_ZALIGN128(); + const int channels_count = audio_stream_get_channels(sink); + const int inc = sizeof(ae_int32x4); + int samples = channels_count * frames; + ae_int32x4 *in = (ae_int32x4 *)audio_stream_wrap(source, + (char *)audio_stream_get_rptr(source) + + bsource->consumed); + ae_int32x4 *out = (ae_int32x4 *)audio_stream_wrap(sink, + (char *)audio_stream_get_wptr(sink) + + bsink->size); + + /** to ensure the address is 16-byte aligned and avoid risk of + * error loading of volume gain while the cd->vol would be set + * as circular buffer + */ + if (cd->copy_gain) + vol_store_gain(cd, channels_count); + + buf = (ae_int32x4 *)cd->vol; + buf_end = (ae_int32x4 *)(cd->vol + channels_count * 4); + vol = buf; + /* Set buf who stores the volume gain data as circular buffer */ + AE_SETCBEGIN0(buf); + AE_SETCEND0(buf_end); + + bsource->consumed += VOL_S32_SAMPLES_TO_BYTES(samples); + bsink->size += VOL_S32_SAMPLES_TO_BYTES(samples); + + while (samples) { + m = audio_stream_samples_without_wrap_s32(source, in); + n = MIN(m, samples); + m = audio_stream_samples_without_wrap_s32(sink, out); + n = MIN(m, n); + inu = AE_LA128_PP(in); + /* process four continuous samples once */ + for (i = 0; i < n; i += 4) { + /* Load the volume value */ + AE_L32X2X2_XC(volume, volume1, vol, inc); + + /* Load the input sample */ + AE_LA32X2X2_IP(in_sample, in_sample1, inu, in); + +#if COMP_VOLUME_Q8_16 + /* Q8.16 x Q1.31 << 1 -> Q9.48 */ + mult0 = AE_MULF32S_HH(volume, in_sample); + mult0 = AE_SRAI64(mult0, 1); /* Q9.47 */ + mult1 = AE_MULF32S_LL(volume, in_sample); + mult1 = AE_SRAI64(mult1, 1); + out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q9.47 -> Q1.31 */ + + mult0 = AE_MULF32S_HH(volume1, in_sample1); + mult0 = AE_SRAI64(mult0, 1); /* Q9.47 */ + mult1 = AE_MULF32S_LL(volume1, in_sample1); + mult1 = AE_SRAI64(mult1, 1); + out_sample1 = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q9.47 -> Q1.31 */ +#elif COMP_VOLUME_Q1_23 + /* Q1.23 x Q1.31 << 1 -> Q2.55 */ + mult0 = AE_MULF32S_HH(volume, in_sample); + mult0 = AE_SRAI64(mult0, 8); /* Q2.47 */ + mult1 = AE_MULF32S_LL(volume, in_sample); + mult1 = AE_SRAI64(mult1, 8); + out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ + + mult0 = AE_MULF32S_HH(volume1, in_sample1); + mult0 = AE_SRAI64(mult0, 8); /* Q2.47 */ + mult1 = AE_MULF32S_LL(volume1, in_sample1); + mult1 = AE_SRAI64(mult1, 8); + out_sample1 = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ +#else +#error "Need CONFIG_COMP_VOLUME_Qx_y" +#endif + AE_SA32X2X2_IP(out_sample, out_sample1, outu, out); + } + AE_SA128POS_FP(outu, out); + samples -= n; + in = audio_stream_wrap(source, in); + out = audio_stream_wrap(sink, out); + } +} + +/** + * \brief HiFi5 enabled volume passthrough from 32 bit to 24/32 or 32 bit. + * \param[in,out] mod Pointer to struct processing_module + * \param[in,out] sink Destination buffer. + * \param[in,out] source Input buffer. + * \param[in] frames Number of frames to process. + * \param[in] attenuation factor for peakmeter adjustment (unused) + */ +static void vol_passthrough_s32_to_s24_s32(struct processing_module *mod, + struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, uint32_t frames, + uint32_t attenuation) +{ + struct audio_stream *source = bsource->data; + struct audio_stream *sink = bsink->data; + ae_int32x2 in_sample, in_sample1; + int i, n, m; + ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 outu = AE_ZALIGN128(); + const int channels_count = audio_stream_get_channels(sink); + int samples = channels_count * frames; + ae_int32x4 *in = (ae_int32x4 *)audio_stream_wrap(source, + (char *)audio_stream_get_rptr(source) + + bsource->consumed); + ae_int32x4 *out = (ae_int32x4 *)audio_stream_wrap(sink, + (char *)audio_stream_get_wptr(sink) + + bsink->size); + + bsource->consumed += VOL_S32_SAMPLES_TO_BYTES(samples); + bsink->size += VOL_S32_SAMPLES_TO_BYTES(samples); + while (samples) { + m = audio_stream_samples_without_wrap_s32(source, in); + n = MIN(m, samples); + m = audio_stream_samples_without_wrap_s32(sink, out); + n = MIN(m, n); + inu = AE_LA128_PP(in); + /* process four continuous samples once */ + for (i = 0; i < n; i += 4) { + /* Load the input sample */ + AE_LA32X2X2_IP(in_sample, in_sample1, inu, in); + AE_SA32X2X2_IP(in_sample, in_sample1, outu, out); + } + AE_SA128POS_FP(outu, out); + samples -= n; + in = audio_stream_wrap(source, in); + out = audio_stream_wrap(sink, out); + } +} +#endif /* CONFIG_FORMAT_S32LE */ + +#if CONFIG_FORMAT_S16LE +/** + * \brief HiFi5 enabled volume processing from 16 bit to 16 bit. + * \param[in,out] dev Volume base component device. + * \param[in,out] sink Destination buffer. + * \param[in,out] source Input buffer. + * \param[in] frames Number of frames to process. + * \param[in] attenuation factor for peakmeter adjustment (unused) + */ +static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, uint32_t frames, + uint32_t attenuation) +{ + struct vol_data *cd = module_get_private_data(mod); + struct audio_stream *source = bsource->data; + struct audio_stream *sink = bsink->data; + ae_int32x2 volume, volume1, volume2, volume3; + ae_int32x2 out_temp, out_temp1; + ae_int16x4 in_sample, in_sample1; + ae_int16x4 out_sample, out_sample1; + int i, n, m; + ae_int32x4 *buf; + ae_int32x4 *buf_end; + ae_int32x4 *vol; + ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 outu = AE_ZALIGN128(); + ae_int16x8 *in = (ae_int16x8 *)audio_stream_wrap(source, + (char *)audio_stream_get_rptr(source) + + bsource->consumed); + ae_int16x8 *out = (ae_int16x8 *)audio_stream_wrap(sink, + (char *)audio_stream_get_wptr(sink) + + bsink->size); + const int channels_count = audio_stream_get_channels(sink); + const int inc = sizeof(ae_int32x4); + int samples = channels_count * frames; + + /** to ensure the adsress is 16-byte aligned and avoid risk of + * error loading of volume gain while the cd->vol would be set + * as circular buffer + */ + if (cd->copy_gain) + vol_store_gain(cd, channels_count); + + buf = (ae_int32x4 *)cd->vol; + buf_end = (ae_int32x4 *)(cd->vol + channels_count * 4); + vol = buf; + + /* Set buf as circular buffer */ + AE_SETCBEGIN0(buf); + AE_SETCEND0(buf_end); + + while (samples) { + m = audio_stream_samples_without_wrap_s16(source, in); + n = MIN(m, samples); + m = audio_stream_samples_without_wrap_s16(sink, out); + n = MIN(m, n); + inu = AE_LA128_PP(in); + for (i = 0; i < n; i += 8) { + /* load 4x2 volume gain */ + AE_L32X2X2_XC(volume, volume1, vol, inc); + + AE_L32X2X2_XC(volume2, volume3, vol, inc); + +#if COMP_VOLUME_Q8_16 + /* Q8.16 to Q9.23 */ + volume = AE_SLAI32S(volume, 7); + volume1 = AE_SLAI32S(volume1, 7); + + volume2 = AE_SLAI32S(volume2, 7); + volume3 = AE_SLAI32S(volume3, 7); +#elif COMP_VOLUME_Q1_23 + /* No need to shift, Q1.23 is OK as such */ +#else +#error "Need CONFIG_COMP_VOLUME_Qx_y" +#endif + /* Load the input sample */ + AE_LA16X4X2_IP(in_sample, in_sample1, inu, in); + + AE_MULF2P32X16X4RS(out_temp, out_temp1, volume, volume1, in_sample); + /* Q9.23 to Q1.31 */ + out_temp = AE_SLAI32S(out_temp, 8); + out_temp1 = AE_SLAI32S(out_temp1, 8); + out_sample = AE_ROUND16X4F32SSYM(out_temp, out_temp1); + + AE_MULF2P32X16X4RS(out_temp, out_temp1, volume2, volume3, in_sample1); + /* Q9.23 to Q1.31 */ + out_temp = AE_SLAI32S(out_temp, 8); + out_temp1 = AE_SLAI32S(out_temp1, 8); + /* store the output */ + out_sample1 = AE_ROUND16X4F32SSYM(out_temp, out_temp1); + + AE_SA16X4X2_IP(out_sample, out_sample1, outu, out); + } + AE_SA128POS_FP(outu, out); + samples -= n; + bsource->consumed += VOL_S16_SAMPLES_TO_BYTES(n); + bsink->size += VOL_S16_SAMPLES_TO_BYTES(n); + in = audio_stream_wrap(source, in); + out = audio_stream_wrap(sink, out); + } +} + +/** + * \brief HiFi5 enabled volume passthrough from 16 bit to 16 bit. + * \param[in,out] dev Volume base component device. + * \param[in,out] sink Destination buffer. + * \param[in,out] source Input buffer. + * \param[in] frames Number of frames to process. + * \param[in] attenuation factor for peakmeter adjustment (unused) + */ +static void vol_passthrough_s16_to_s16(struct processing_module *mod, + struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, uint32_t frames, + uint32_t attenuation) +{ + struct audio_stream *source = bsource->data; + struct audio_stream *sink = bsink->data; + ae_int16x4 in_sample, in_sample1; + int i, n, m; + ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 outu = AE_ZALIGN128(); + ae_int16x8 *in = (ae_int16x8 *)audio_stream_wrap(source, + (char *)audio_stream_get_rptr(source) + + bsource->consumed); + ae_int16x8 *out = (ae_int16x8 *)audio_stream_wrap(sink, + (char *)audio_stream_get_wptr(sink) + + bsink->size); + const int channels_count = audio_stream_get_channels(sink); + int samples = channels_count * frames; + + bsource->consumed += VOL_S16_SAMPLES_TO_BYTES(samples); + bsink->size += VOL_S16_SAMPLES_TO_BYTES(samples); + while (samples) { + m = audio_stream_samples_without_wrap_s16(source, in); + n = MIN(m, samples); + m = audio_stream_samples_without_wrap_s16(sink, out); + n = MIN(m, n); + inu = AE_LA128_PP(in); + for (i = 0; i < n; i += 8) { + /* Load the input sample */ + AE_LA16X4X2_IP(in_sample, in_sample1, inu, in); + AE_SA16X4X2_IP(in_sample, in_sample1, outu, out); + } + AE_SA128POS_FP(outu, out); + samples -= n; + in = audio_stream_wrap(source, in); + out = audio_stream_wrap(sink, out); + } +} +#endif /* CONFIG_FORMAT_S16LE */ +const struct comp_func_map volume_func_map[] = { +#if CONFIG_FORMAT_S16LE + { SOF_IPC_FRAME_S16_LE, vol_s16_to_s16, vol_passthrough_s16_to_s16}, +#endif +#if CONFIG_FORMAT_S24LE + { SOF_IPC_FRAME_S24_4LE, vol_s24_to_s24_s32, vol_passthrough_s24_to_s24_s32}, +#endif +#if CONFIG_FORMAT_S32LE + { SOF_IPC_FRAME_S32_LE, vol_s32_to_s24_s32, vol_passthrough_s32_to_s24_s32}, +#endif +}; + +const size_t volume_func_count = ARRAY_SIZE(volume_func_map); +#endif +#endif diff --git a/src/audio/volume/volume_hifi5_with_peakvol.c b/src/audio/volume/volume_hifi5_with_peakvol.c new file mode 100644 index 000000000000..13316533bf83 --- /dev/null +++ b/src/audio/volume/volume_hifi5_with_peakvol.c @@ -0,0 +1,626 @@ +// SPDX-License-Identifier: BSD-3-Clause +// +// Copyright(c) 2024 Intel Corporation. All rights reserved. +// +// Author: Andrula Song + +/** + * \file + * \brief Volume HIFI5 processing implementation with peak volume detection + * \authors Andrula Song + */ + +#include +#include +#include +#include +#include +#include + +LOG_MODULE_DECLARE(volume, CONFIG_SOF_LOG_LEVEL); + +#include "volume.h" + +#if SOF_USE_HIFI(5, VOLUME) + +#if CONFIG_COMP_PEAK_VOL +#include + +static inline void vol_store_gain(struct vol_data *cd, const int channels_count) +{ + int32_t i; + + /* using for loop instead of memcpy_s(), because for loop costs less cycles */ + for (i = 0; i < channels_count; i++) { + cd->vol[i] = cd->volume[i]; + cd->vol[i + channels_count * 1] = cd->volume[i]; + cd->vol[i + channels_count * 2] = cd->volume[i]; + cd->vol[i + channels_count * 3] = cd->volume[i]; + } + cd->copy_gain = false; +} + +#if CONFIG_FORMAT_S24LE +/** + * \brief HiFi5 enabled volume processing from 24/32 bit to 24/32 or 32 bit. + * \param[in,out] dev Volume base component device. + * \param[in,out] sink Destination buffer. + * \param[in,out] source Input buffer. + * \param[in] frames Number of frames to process. + * \param[in] attenuation factor for peakmeter adjustment + */ +static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, uint32_t frames, + uint32_t attenuation) +{ + struct vol_data *cd = module_get_private_data(mod); + struct audio_stream *source = bsource->data; + struct audio_stream *sink = bsink->data; + ae_int32x2 in_sample, in_sample1; + ae_int32x2 out_sample, out_sample1; + ae_int32x2 volume, volume1; + int i, n, m; + ae_int32x4 *vol; + ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 outu = AE_ZALIGN128(); + ae_int32x4 *in = (ae_int32x4 *)audio_stream_wrap(source, + (char *)audio_stream_get_rptr(source) + + bsource->consumed); + ae_int32x4 *out = (ae_int32x4 *)audio_stream_wrap(sink, + (char *)audio_stream_get_wptr(sink) + + bsink->size); + const int channels_count = audio_stream_get_channels(sink); + const int inc = sizeof(ae_int32x4); + int samples = channels_count * frames; + ae_int32x2 temp, temp1; + ae_int32x4 *peakvol = (ae_int32x4 *)cd->peak_vol; + + /* Set peakvol(which stores the peak volume data four times) as circular buffer */ + AE_SETCBEGIN1(cd->peak_vol); + AE_SETCEND1(cd->peak_vol + channels_count * 4); + + /** to ensure the adsress is 16-byte aligned and avoid risk of + * error loading of volume gain while the cd->vol would be set + * as circular buffer + */ + if (cd->copy_gain) + vol_store_gain(cd, channels_count); + + vol = (ae_int32x4 *)cd->vol; + /* Set buf who stores the volume gain data as circular buffer */ + AE_SETCBEGIN0(vol); + AE_SETCEND0(cd->vol + channels_count * 4); + + bsource->consumed += VOL_S32_SAMPLES_TO_BYTES(samples); + bsink->size += VOL_S32_SAMPLES_TO_BYTES(samples); + + while (samples) { + m = audio_stream_samples_without_wrap_s32(source, in); + n = MIN(m, samples); + m = audio_stream_samples_without_wrap_s16(sink, out); + n = MIN(m, n); + inu = AE_LA128_PP(in); + /* process four continuous samples per loop */ + for (i = 0; i < n; i += 4) { + /* Load the volume value */ + AE_L32X2X2_XC(volume, volume1, vol, inc); + + /* Load the input sample */ + AE_LA32X2X2_IP(in_sample, in_sample1, inu, in); + /* calculate the peak volume*/ + AE_L32X2X2_XC1(temp, temp1, peakvol, 0); + temp = AE_MAXABS32S(in_sample, temp); + temp1 = AE_MAXABS32S(in_sample1, temp1); + AE_S32X2X2_XC1(temp, temp1, peakvol, inc); + /* Multiply the input sample */ +#if COMP_VOLUME_Q8_16 + AE_MULF2P32X4RS(out_sample, out_sample1, AE_SLAI32S(volume, 7), + AE_SLAI32S(volume1, 7), + AE_SLAI32(in_sample, 8), AE_SLAI32(in_sample1, 8)); +#elif COMP_VOLUME_Q1_23 + AE_MULF2P32X4RS(out_sample, out_sample1, volume, volume1, + AE_SLAI32(in_sample, 8), + AE_SLAI32(in_sample1, 8)); +#else +#error "Need CONFIG_COMP_VOLUME_Qx_y" +#endif + + /* Shift for S24_LE */ + out_sample = AE_SLAI32S(out_sample, 8); + out_sample = AE_SRAI32(out_sample, 8); + out_sample1 = AE_SLAI32S(out_sample1, 8); + out_sample1 = AE_SRAI32(out_sample1, 8); + + /* Store the output sample */ + AE_SA32X2X2_IP(out_sample, out_sample1, outu, out); + } + AE_SA128POS_FP(outu, out); + samples -= n; + in = audio_stream_wrap(source, in); + out = audio_stream_wrap(sink, out); + } + + for (i = 0; i < channels_count; i++) { + m = MAX(cd->peak_vol[i], cd->peak_vol[i + channels_count]); + m = MAX(m, cd->peak_vol[i + channels_count * 2]); + m = MAX(m, cd->peak_vol[i + channels_count * 3]); + cd->peak_regs.peak_meter[i] = m << (attenuation + PEAK_24S_32C_ADJUST); + } +} + +/** + * \brief HiFi5 enabled volume passthrough from 24/32 bit to 24/32 or 32 bit. + * \param[in,out] dev Volume base component device. + * \param[in,out] sink Destination buffer. + * \param[in,out] source Input buffer. + * \param[in] frames Number of frames to process. + * \param[in] attenuation factor for peakmeter adjustment + */ +static void vol_passthrough_s24_to_s24_s32(struct processing_module *mod, + struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, uint32_t frames, + uint32_t attenuation) +{ + struct vol_data *cd = module_get_private_data(mod); + struct audio_stream *source = bsource->data; + struct audio_stream *sink = bsink->data; + ae_int32x2 in_sample, in_sample1; + int i, n, m; + ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 outu = AE_ZALIGN128(); + ae_int32x4 *in = (ae_int32x4 *)audio_stream_wrap(source, + (char *)audio_stream_get_rptr(source) + + bsource->consumed); + ae_int32x4 *out = (ae_int32x4 *)audio_stream_wrap(sink, + (char *)audio_stream_get_wptr(sink) + + bsink->size); + const int channels_count = audio_stream_get_channels(sink); + const int inc = sizeof(ae_int32x4); + int samples = channels_count * frames; + ae_int32x2 temp, temp1; + ae_int32x4 *peakvol = (ae_int32x4 *)cd->peak_vol; + + /* Set peakvol(which stores the peak volume data four times) as circular buffer */ + AE_SETCBEGIN1(cd->peak_vol); + AE_SETCEND1(cd->peak_vol + channels_count * 4); + + bsource->consumed += VOL_S32_SAMPLES_TO_BYTES(samples); + bsink->size += VOL_S32_SAMPLES_TO_BYTES(samples); + + while (samples) { + m = audio_stream_samples_without_wrap_s32(source, in); + n = MIN(m, samples); + m = audio_stream_samples_without_wrap_s16(sink, out); + n = MIN(m, n); + inu = AE_LA128_PP(in); + /* process four continuous samples per loop */ + for (i = 0; i < n; i += 4) { + /* Load the input sample */ + AE_LA32X2X2_IP(in_sample, in_sample1, inu, in); + /* calculate the peak volume*/ + AE_L32X2X2_XC1(temp, temp1, peakvol, 0); + temp = AE_MAXABS32S(in_sample, temp); + temp1 = AE_MAXABS32S(in_sample1, temp1); + AE_S32X2X2_XC1(temp, temp1, peakvol, inc); + /* Store the output sample */ + AE_SA32X2X2_IP(in_sample, in_sample1, outu, out); + } + AE_SA128POS_FP(outu, out); + samples -= n; + in = audio_stream_wrap(source, in); + out = audio_stream_wrap(sink, out); + } + for (i = 0; i < channels_count; i++) { + m = MAX(cd->peak_vol[i], cd->peak_vol[i + channels_count]); + m = MAX(m, cd->peak_vol[i + channels_count * 2]); + m = MAX(m, cd->peak_vol[i + channels_count * 3]); + cd->peak_regs.peak_meter[i] = m << (attenuation + PEAK_24S_32C_ADJUST); + } +} +#endif /* CONFIG_FORMAT_S24LE */ + +#if CONFIG_FORMAT_S32LE +/** + * \brief HiFi5 enabled volume processing from 32 bit to 24/32 or 32 bit. + * \param[in,out] mod Pointer to struct processing_module + * \param[in,out] sink Destination buffer. + * \param[in,out] source Input buffer. + * \param[in] frames Number of frames to process. + * \param[in] attenuation factor for peakmeter adjustment + */ +static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, uint32_t frames, + uint32_t attenuation) +{ + struct vol_data *cd = module_get_private_data(mod); + struct audio_stream *source = bsource->data; + struct audio_stream *sink = bsink->data; + ae_int32x2 in_sample, in_sample1; + ae_int32x2 out_sample, out_sample1; + ae_int32x2 volume, volume1; + int i, n, m; + ae_int64 mult0; + ae_int64 mult1; + ae_int32x4 *buf; + ae_int32x4 *buf_end; + ae_int32x4 *vol; + ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 outu = AE_ZALIGN128(); + const int channels_count = audio_stream_get_channels(sink); + const int inc = sizeof(ae_int32x4); + int samples = channels_count * frames; + ae_int32x4 *in = (ae_int32x4 *)audio_stream_wrap(source, + (char *)audio_stream_get_rptr(source) + + bsource->consumed); + ae_int32x4 *out = (ae_int32x4 *)audio_stream_wrap(sink, + (char *)audio_stream_get_wptr(sink) + + bsink->size); + ae_int32x2 temp, temp1; + ae_int32x4 *peakvol = (ae_int32x4 *)cd->peak_vol; + + /* Set peakvol(which stores the peak volume data four times) as circular buffer */ + AE_SETCBEGIN1(cd->peak_vol); + AE_SETCEND1(cd->peak_vol + channels_count * 4); + + /** to ensure the address is 16-byte aligned and avoid risk of + * error loading of volume gain while the cd->vol would be set + * as circular buffer + */ + if (cd->copy_gain) + vol_store_gain(cd, channels_count); + + buf = (ae_int32x4 *)cd->vol; + buf_end = (ae_int32x4 *)(cd->vol + channels_count * 4); + vol = buf; + /* Set buf who stores the volume gain data as circular buffer */ + AE_SETCBEGIN0(buf); + AE_SETCEND0(buf_end); + + bsource->consumed += VOL_S32_SAMPLES_TO_BYTES(samples); + bsink->size += VOL_S32_SAMPLES_TO_BYTES(samples); + + while (samples) { + m = audio_stream_samples_without_wrap_s32(source, in); + n = MIN(m, samples); + m = audio_stream_samples_without_wrap_s32(sink, out); + n = MIN(m, n); + inu = AE_LA128_PP(in); + /* process four continuous samples per loop */ + for (i = 0; i < n; i += 4) { + /* Load the volume value */ + AE_L32X2X2_XC(volume, volume1, vol, inc); + + /* Load the input sample */ + AE_LA32X2X2_IP(in_sample, in_sample1, inu, in); + /* calculate the peak volume*/ + AE_L32X2X2_XC1(temp, temp1, peakvol, 0); + temp = AE_MAXABS32S(in_sample, temp); + temp1 = AE_MAXABS32S(in_sample1, temp1); + AE_S32X2X2_XC1(temp, temp1, peakvol, inc); +#if COMP_VOLUME_Q8_16 + /* Q8.16 x Q1.31 << 1 -> Q9.48 */ + mult0 = AE_MULF32S_HH(volume, in_sample); + mult0 = AE_SRAI64(mult0, 1); /* Q9.47 */ + mult1 = AE_MULF32S_LL(volume, in_sample); + mult1 = AE_SRAI64(mult1, 1); + out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q9.47 -> Q1.31 */ + + mult0 = AE_MULF32S_HH(volume1, in_sample1); + mult0 = AE_SRAI64(mult0, 1); /* Q9.47 */ + mult1 = AE_MULF32S_LL(volume1, in_sample1); + mult1 = AE_SRAI64(mult1, 1); + out_sample1 = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q9.47 -> Q1.31 */ +#elif COMP_VOLUME_Q1_23 + /* Q1.23 x Q1.31 << 1 -> Q2.55 */ + mult0 = AE_MULF32S_HH(volume, in_sample); + mult0 = AE_SRAI64(mult0, 8); /* Q2.47 */ + mult1 = AE_MULF32S_LL(volume, in_sample); + mult1 = AE_SRAI64(mult1, 8); + out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ + + mult0 = AE_MULF32S_HH(volume1, in_sample1); + mult0 = AE_SRAI64(mult0, 8); /* Q2.47 */ + mult1 = AE_MULF32S_LL(volume1, in_sample1); + mult1 = AE_SRAI64(mult1, 8); + out_sample1 = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ +#else +#error "Need CONFIG_COMP_VOLUME_Qx_y" +#endif + AE_SA32X2X2_IP(out_sample, out_sample1, outu, out); + } + AE_SA128POS_FP(outu, out); + samples -= n; + in = audio_stream_wrap(source, in); + out = audio_stream_wrap(sink, out); + } + for (i = 0; i < channels_count; i++) { + m = MAX(cd->peak_vol[i], cd->peak_vol[i + channels_count]); + m = MAX(m, cd->peak_vol[i + channels_count * 2]); + m = MAX(m, cd->peak_vol[i + channels_count * 3]); + cd->peak_regs.peak_meter[i] = m << attenuation; + } +} + +/** + * \brief HiFi5 enabled volume passthrough from 32 bit to 24/32 or 32 bit. + * \param[in,out] mod Pointer to struct processing_module + * \param[in,out] sink Destination buffer. + * \param[in,out] source Input buffer. + * \param[in] frames Number of frames to process. + * \param[in] attenuation factor for peakmeter adjustment + */ +static void vol_passthrough_s32_to_s24_s32(struct processing_module *mod, + struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, uint32_t frames, + uint32_t attenuation) +{ + struct vol_data *cd = module_get_private_data(mod); + struct audio_stream *source = bsource->data; + struct audio_stream *sink = bsink->data; + ae_int32x2 in_sample, in_sample1; + int i, n, m; + ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 outu = AE_ZALIGN128(); + const int channels_count = audio_stream_get_channels(sink); + const int inc = sizeof(ae_int32x4); + int samples = channels_count * frames; + ae_int32x4 *in = (ae_int32x4 *)audio_stream_wrap(source, + (char *)audio_stream_get_rptr(source) + + bsource->consumed); + ae_int32x4 *out = (ae_int32x4 *)audio_stream_wrap(sink, + (char *)audio_stream_get_wptr(sink) + + bsink->size); + ae_int32x2 temp, temp1; + ae_int32x4 *peakvol = (ae_int32x4 *)cd->peak_vol; + + /* Set peakvol(which stores the peak volume data four times) as circular buffer */ + AE_SETCBEGIN1(cd->peak_vol); + AE_SETCEND1(cd->peak_vol + channels_count * 4); + bsource->consumed += VOL_S32_SAMPLES_TO_BYTES(samples); + bsink->size += VOL_S32_SAMPLES_TO_BYTES(samples); + + while (samples) { + m = audio_stream_samples_without_wrap_s32(source, in); + n = MIN(m, samples); + m = audio_stream_samples_without_wrap_s32(sink, out); + n = MIN(m, n); + inu = AE_LA128_PP(in); + /* process four continuous samples once */ + for (i = 0; i < n; i += 4) { + /* Load the input sample */ + AE_LA32X2X2_IP(in_sample, in_sample1, inu, in); + /* calculate the peak volume*/ + AE_L32X2X2_XC1(temp, temp1, peakvol, 0); + temp = AE_MAXABS32S(in_sample, temp); + temp1 = AE_MAXABS32S(in_sample1, temp1); + AE_S32X2X2_XC1(temp, temp1, peakvol, inc); + + AE_SA32X2X2_IP(in_sample, in_sample1, outu, out); + } + AE_SA128POS_FP(outu, out); + samples -= n; + in = audio_stream_wrap(source, in); + out = audio_stream_wrap(sink, out); + } + for (i = 0; i < channels_count; i++) { + m = MAX(cd->peak_vol[i], cd->peak_vol[i + channels_count]); + m = MAX(m, cd->peak_vol[i + channels_count * 2]); + m = MAX(m, cd->peak_vol[i + channels_count * 3]); + cd->peak_regs.peak_meter[i] = m << attenuation; + } +} +#endif /* CONFIG_FORMAT_S32LE */ + +#if CONFIG_FORMAT_S16LE +/** + * \brief HiFi5 enabled volume processing from 16 bit to 16 bit. + * \param[in,out] dev Volume base component device. + * \param[in,out] sink Destination buffer. + * \param[in,out] source Input buffer. + * \param[in] frames Number of frames to process. + * \param[in] attenuation factor for peakmeter adjustment (unused for 16bit) + */ +static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, uint32_t frames, + uint32_t attenuation) +{ + struct vol_data *cd = module_get_private_data(mod); + struct audio_stream *source = bsource->data; + struct audio_stream *sink = bsink->data; + ae_int32x2 volume, volume1, volume2, volume3; + ae_int32x2 out_temp, out_temp1; + ae_int16x4 in_sample, in_sample1; + ae_int16x4 out_sample, out_sample1; + int i, n, m; + ae_int32x4 *buf; + ae_int32x4 *buf_end; + ae_int32x4 *vol; + ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 outu = AE_ZALIGN128(); + ae_int16x8 *in = (ae_int16x8 *)audio_stream_wrap(source, + (char *)audio_stream_get_rptr(source) + + bsource->consumed); + ae_int16x8 *out = (ae_int16x8 *)audio_stream_wrap(sink, + (char *)audio_stream_get_wptr(sink) + + bsink->size); + const int channels_count = audio_stream_get_channels(sink); + const int inc = sizeof(ae_int32x4); + int samples = channels_count * frames; + ae_int32x2 temp, temp1; + ae_int32x4 *peakvol = (ae_int32x4 *)cd->peak_vol; + + /* Set peakvol(which stores the peak volume data 4 times) as circular buffer */ + AE_SETCBEGIN1(cd->peak_vol); + AE_SETCEND1(cd->peak_vol + channels_count * 4); + + /** to ensure the adsress is 8-byte aligned and avoid risk of + * error loading of volume gain while the cd->vol would be set + * as circular buffer + */ + if (cd->copy_gain) + vol_store_gain(cd, channels_count); + + buf = (ae_int32x4 *)cd->vol; + buf_end = (ae_int32x4 *)(cd->vol + channels_count * 4); + vol = buf; + + /* Set buf as circular buffer */ + AE_SETCBEGIN0(buf); + AE_SETCEND0(buf_end); + + while (samples) { + m = audio_stream_samples_without_wrap_s16(source, in); + n = MIN(m, samples); + m = audio_stream_samples_without_wrap_s16(sink, out); + n = MIN(m, n); + inu = AE_LA128_PP(in); + for (i = 0; i < n; i += 8) { + /* load volume gain */ + AE_L32X2X2_XC(volume, volume1, vol, inc); + AE_L32X2X2_XC(volume2, volume3, vol, inc); + +#if COMP_VOLUME_Q8_16 + /* Q8.16 to Q9.23 */ + volume = AE_SLAI32S(volume, 7); + volume1 = AE_SLAI32S(volume1, 7); + volume2 = AE_SLAI32S(volume2, 7); + volume3 = AE_SLAI32S(volume3, 7); +#elif COMP_VOLUME_Q1_23 + /* No need to shift, Q1.23 is OK as such */ +#else +#error "Need CONFIG_COMP_VOLUME_Qx_y" +#endif + /* Load the input sample */ + AE_LA16X4X2_IP(in_sample, in_sample1, inu, in); + /* calculate the peak volume*/ + AE_L32X2X2_XC1(temp, temp1, peakvol, 0); + temp = AE_MAXABS32S(AE_SEXT32X2D16_32(in_sample), temp); + temp1 = AE_MAXABS32S(AE_SEXT32X2D16_10(in_sample), temp1); + AE_S32X2X2_XC1(temp, temp1, peakvol, inc); + + AE_L32X2X2_XC1(temp, temp1, peakvol, 0); + temp = AE_MAXABS32S(AE_SEXT32X2D16_32(in_sample1), temp); + temp1 = AE_MAXABS32S(AE_SEXT32X2D16_10(in_sample1), temp1); + AE_S32X2X2_XC1(temp, temp1, peakvol, inc); + + /* Multiply the input sample */ + AE_MULF2P32X16X4RS(out_temp, out_temp1, volume, volume1, in_sample); + /* Q9.23 to Q1.31 */ + out_temp = AE_SLAI32S(out_temp, 8); + out_temp1 = AE_SLAI32S(out_temp1, 8); + out_sample = AE_ROUND16X4F32SSYM(out_temp, out_temp1); + + AE_MULF2P32X16X4RS(out_temp, out_temp1, volume2, volume3, in_sample1); + /* Q9.23 to Q1.31 */ + out_temp = AE_SLAI32S(out_temp, 8); + out_temp1 = AE_SLAI32S(out_temp1, 8); + /* store the output */ + out_sample1 = AE_ROUND16X4F32SSYM(out_temp, out_temp1); + + AE_SA16X4X2_IP(out_sample, out_sample1, outu, out); + } + AE_SA128POS_FP(outu, out); + samples -= n; + in = audio_stream_wrap(source, in); + out = audio_stream_wrap(sink, out); + bsource->consumed += VOL_S16_SAMPLES_TO_BYTES(n); + bsink->size += VOL_S16_SAMPLES_TO_BYTES(n); + } + for (i = 0; i < channels_count; i++) { + m = MAX(cd->peak_vol[i], cd->peak_vol[i + channels_count]); + m = MAX(m, cd->peak_vol[i + channels_count * 2]); + m = MAX(m, cd->peak_vol[i + channels_count * 3]); + cd->peak_regs.peak_meter[i] = m << PEAK_16S_32C_ADJUST; + } +} + +/** + * \brief HiFi5 enabled volume passthrough from 16 bit to 16 bit. + * \param[in,out] dev Volume base component device. + * \param[in,out] sink Destination buffer. + * \param[in,out] source Input buffer. + * \param[in] frames Number of frames to process. + * \param[in] attenuation factor for peakmeter adjustment (unused for 16bit) + */ +static void vol_passthrough_s16_to_s16(struct processing_module *mod, + struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, uint32_t frames, + uint32_t attenuation) +{ + struct vol_data *cd = module_get_private_data(mod); + struct audio_stream *source = bsource->data; + struct audio_stream *sink = bsink->data; + ae_int16x4 in_sample, in_sample1; + int i, n, m; + ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 outu = AE_ZALIGN128(); + ae_int16x8 *in = (ae_int16x8 *)audio_stream_wrap(source, + (char *)audio_stream_get_rptr(source) + + bsource->consumed); + ae_int16x8 *out = (ae_int16x8 *)audio_stream_wrap(sink, + (char *)audio_stream_get_wptr(sink) + + bsink->size); + const int channels_count = audio_stream_get_channels(sink); + const int inc = sizeof(ae_int32x4); + int samples = channels_count * frames; + ae_int32x2 temp, temp1; + ae_int32x4 *peakvol = (ae_int32x4 *)cd->peak_vol; + + /* Set peakvol(which stores the peak volume data 4 times) as circular buffer */ + AE_SETCBEGIN1(cd->peak_vol); + AE_SETCEND1(cd->peak_vol + channels_count * 4); + + while (samples) { + m = audio_stream_samples_without_wrap_s16(source, in); + n = MIN(m, samples); + m = audio_stream_samples_without_wrap_s16(sink, out); + n = MIN(m, n); + inu = AE_LA128_PP(in); + for (i = 0; i < n; i += 8) { + /* Load the input sample */ + AE_LA16X4X2_IP(in_sample, in_sample1, inu, in); + /* calculate the peak volume*/ + AE_L32X2X2_XC1(temp, temp1, peakvol, 0); + temp = AE_MAXABS32S(AE_SEXT32X2D16_32(in_sample), temp); + temp1 = AE_MAXABS32S(AE_SEXT32X2D16_10(in_sample), temp1); + AE_S32X2X2_XC1(temp, temp1, peakvol, inc); + + AE_L32X2X2_XC1(temp, temp1, peakvol, 0); + temp = AE_MAXABS32S(AE_SEXT32X2D16_32(in_sample1), temp); + temp1 = AE_MAXABS32S(AE_SEXT32X2D16_10(in_sample1), temp1); + AE_S32X2X2_XC1(temp, temp1, peakvol, inc); + + /* store the output */ + AE_SA16X4X2_IP(in_sample, in_sample1, outu, out); + } + AE_SA128POS_FP(outu, out); + samples -= n; + in = audio_stream_wrap(source, in); + out = audio_stream_wrap(sink, out); + bsource->consumed += VOL_S16_SAMPLES_TO_BYTES(n); + bsink->size += VOL_S16_SAMPLES_TO_BYTES(n); + } + for (i = 0; i < channels_count; i++) { + m = MAX(cd->peak_vol[i], cd->peak_vol[i + channels_count]); + m = MAX(m, cd->peak_vol[i + channels_count * 2]); + m = MAX(m, cd->peak_vol[i + channels_count * 3]); + cd->peak_regs.peak_meter[i] = m << PEAK_16S_32C_ADJUST; + } +} +#endif /* CONFIG_FORMAT_S16LE */ + +const struct comp_func_map volume_func_map[] = { +#if CONFIG_FORMAT_S16LE + { SOF_IPC_FRAME_S16_LE, vol_s16_to_s16, vol_passthrough_s16_to_s16}, +#endif +#if CONFIG_FORMAT_S24LE + { SOF_IPC_FRAME_S24_4LE, vol_s24_to_s24_s32, vol_passthrough_s24_to_s24_s32}, +#endif +#if CONFIG_FORMAT_S32LE + { SOF_IPC_FRAME_S32_LE, vol_s32_to_s24_s32, vol_passthrough_s32_to_s24_s32}, +#endif +}; + +const size_t volume_func_count = ARRAY_SIZE(volume_func_map); +#endif +#endif diff --git a/src/include/sof/common.h b/src/include/sof/common.h index f07df7c85e4d..2a39523df645 100644 --- a/src/include/sof/common.h +++ b/src/include/sof/common.h @@ -206,10 +206,16 @@ // IS_ENABLED() above. # if XCHAL_HAVE_HIFI5 # define SOF_MAX_XCHAL_HIFI 5 +# define SOF_FRAME_BYTE_ALIGN 16 +# define SOF_FRAME_BYTE_ALIGN_6CH 16 # elif XCHAL_HAVE_HIFI4 # define SOF_MAX_XCHAL_HIFI 4 -# elif XCHAL_HAVE_HIFI3 +# define SOF_FRAME_BYTE_ALIGN 8 +# define SOF_FRAME_BYTE_ALIGN_6CH 16 +## elif XCHAL_HAVE_HIFI3 # define SOF_MAX_XCHAL_HIFI 3 +# define SOF_FRAME_BYTE_ALIGN 8 +# define SOF_FRAME_BYTE_ALIGN_6CH 16 # else # define SOF_MAX_XCHAL_HIFI NONE # endif diff --git a/zephyr/CMakeLists.txt b/zephyr/CMakeLists.txt index 9a84d044ffaa..0150563913b1 100644 --- a/zephyr/CMakeLists.txt +++ b/zephyr/CMakeLists.txt @@ -775,9 +775,11 @@ if(CONFIG_COMP_VOLUME STREQUAL "m") add_dependencies(app volume) elseif(CONFIG_COMP_VOLUME) zephyr_library_sources( + ${SOF_AUDIO_PATH}/volume/volume_hifi5.c ${SOF_AUDIO_PATH}/volume/volume_hifi4.c ${SOF_AUDIO_PATH}/volume/volume_hifi3.c ${SOF_AUDIO_PATH}/volume/volume_generic.c + ${SOF_AUDIO_PATH}/volume/volume_hifi5_with_peakvol.c ${SOF_AUDIO_PATH}/volume/volume_hifi4_with_peakvol.c ${SOF_AUDIO_PATH}/volume/volume_hifi3_with_peakvol.c ${SOF_AUDIO_PATH}/volume/volume_generic_with_peakvol.c