Skip to content

Commit

Permalink
fix the handling of the (audio-) volume (SSE2/AVX)
Browse files Browse the repository at this point in the history
  • Loading branch information
pionere committed Sep 1, 2024
1 parent d5e4c2b commit 912399e
Showing 1 changed file with 116 additions and 47 deletions.
163 changes: 116 additions & 47 deletions 3rdParty/SDL2_mixer/src/effect_position.c
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ static void SDLCALL _Eff_position_u16lsb_c6(int chan, void *stream, int len, voi
#ifdef FULL // FIX_EFF
static void SDLCALL _Eff_position_s16lsb(int chan, void *stream, int len, void *udata)
#else
//static_assert(SDL_MAX_SINT16 * MIX_MAX_POS_EFFECT * MIX_MAX_VOLUME <= SDL_MAX_SINT32, "Volume might overflow when the effects are calculated.");
SDL_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_MIX_MAX_VOLUME, SDL_MAX_SINT16 * MIX_MAX_POS_EFFECT * MIX_MAX_VOLUME <= SDL_MAX_SINT32);
#define ADJUST_SIDE_VOLUME(s, v) (s = (s*v)/(MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT))
static SDL_bool _Eff_position_s16lsb(void* stream, unsigned len, void* udata)
#endif
Expand All @@ -765,9 +765,10 @@ static SDL_bool _Eff_position_s16lsb(void* stream, unsigned len, void* udata)
//if (left_f < 1.0f / SDL_MAX_SINT16 && right_f < 1.0f / SDL_MAX_SINT16)
// return SDL_FALSE;
int voll, volr;
const int left = channel->effect.left_vol * channel->volume;
const int right = channel->effect.right_vol * channel->volume;
if (left == 0 && right == 0)
const int volume = ((Mix_Channel*)udata)->volume;
const int left = channel->effect.left_vol * volume;
const int right = channel->effect.right_vol * volume;
if (volume == 0)
return SDL_FALSE;
#endif
#if 0
Expand Down Expand Up @@ -823,26 +824,43 @@ static SDL_bool SDL_TARGETING("sse2") _Eff_position_s16lsb_SSE2(void* stream, un
/* 16 signed bits (lsb) * 2 channels. */
Sint16* ptr = (Sint16*)stream;
Mix_Channel* channel = (Mix_Channel*)udata;
int left = channel->effect.left_vol * channel->volume;
int right = channel->effect.right_vol * channel->volume;
if (left == 0 && right == 0)
const int volume = ((Mix_Channel*)udata)->volume;
int left = channel->effect.left_vol * volume;
int right = channel->effect.right_vol * volume;
if (volume == 0)
return SDL_FALSE;

//static_assert((MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0, "_Eff_position_s16lsb_SSE2 expects MIX_MAX_VOLUME to be a power of 2.");
//static_assert((MIX_MAX_POS_EFFECT & (MIX_MAX_POS_EFFECT - 1)) == 0, "_Eff_position_s16lsb_SSE2 expects MIX_MAX_POS_EFFECT to be a power of 2.");
//static_assert((MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT) <= (1 << 16), "_Eff_position_s16lsb_SSE2 expects MIX_MAX_VOLUME to be low.");
left *= (1 << 16) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT);
right *= (1 << 16) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT);

__m128i ma = _mm_set1_epi16(left);
__m128i mb = _mm_set1_epi16(right);
__m128i mm = _mm_unpacklo_epi16(ma, mb);
#if 1
SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_SSE2_MIX_MAX_VOLUME, (MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0);
SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_SSE2_MIX_MAX_POS_EFFECT, (MIX_MAX_POS_EFFECT & (MIX_MAX_POS_EFFECT - 1)) == 0);
SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_SSE2_low, (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT) <= (1 << 15));
left *= (1 << 15) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT);
right *= (1 << 15) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT);
if ((int16_t)left < 0) {
left = INT16_MAX;
}
if ((int16_t)right < 0) {
right = INT16_MAX;
}
#endif
__m128i mm = _mm_set1_epi32(left | (right << 16));

while (len >= 16) {
len -= 16;
__m128i aa = _mm_loadu_si128((const __m128i*)ptr);
__m128i bb = _mm_mulhi_epi16(aa, mm);
_mm_storeu_si128((__m128i*)ptr, bb);
#if 1
__m128i sm = _mm_mulhi_epi16(aa, mm);
__m128i cc = _mm_slli_epi16(sm, 1);
#else
__m128i lo = _mm_mullo_epi16(aa, mm);
__m128i hi = _mm_mulhi_epi16(aa, mm);
__m128i c0 = _mm_unpacklo_epi16(lo, hi);
__m128i c1 = _mm_unpackhi_epi16(lo, hi);
__m128i d0 = _mm_srai_epi32(c0, 10 + 4);
__m128i d1 = _mm_srai_epi32(c1, 10 + 4);
__m128i cc = _mm_packs_epi32(d0, d1);
#endif
_mm_storeu_si128((__m128i*)ptr, cc);
ptr += 8;
}

Expand All @@ -858,26 +876,47 @@ static SDL_bool SDL_TARGETING("avx2") _Eff_position_s16lsb_AVX2(void* stream, un
/* 16 signed bits (lsb) * 2 channels. */
Sint16* ptr = (Sint16*)stream;
Mix_Channel* channel = (Mix_Channel*)udata;
int left = channel->effect.left_vol * channel->volume;
int right = channel->effect.right_vol * channel->volume;
if (left == 0 && right == 0)
const int volume = ((Mix_Channel*)udata)->volume;
int left = channel->effect.left_vol * volume;
int right = channel->effect.right_vol * volume;
if (volume == 0)
return SDL_FALSE;

//static_assert((MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0, "_Eff_position_s16lsb_AVX2 expects MIX_MAX_VOLUME to be a power of 2.");
//static_assert((MIX_MAX_POS_EFFECT & (MIX_MAX_POS_EFFECT - 1)) == 0, "_Eff_position_s16lsb_AVX2 expects MIX_MAX_POS_EFFECT to be a power of 2.");
//static_assert((MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT) <= (1 << 16), "_Eff_position_s16lsb_AVX2 expects MIX_MAX_VOLUME to be low.");
left *= (1 << 16) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT);
right *= (1 << 16) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT);
#if 1
SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_AVX2_MIX_MAX_VOLUME, (MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0);
SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_AVX2_MIX_MAX_POS_EFFECT, (MIX_MAX_POS_EFFECT & (MIX_MAX_POS_EFFECT - 1)) == 0);
SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_AVX2_low, (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT) <= (1 << 15));
left *= (1 << 15) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT);
right *= (1 << 15) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT);
if ((int16_t)left < 0) {
left = INT16_MAX;
}
if ((int16_t)right < 0) {
right = INT16_MAX;
}
#else
SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_AVX2_MIX_MAX_VOLUME, MIX_MAX_VOLUME == (1 << 10));
SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_AVX2_MIX_MAX_POS_EFFECT, MIX_MAX_POS_EFFECT == (1 << 4));
#endif

__m256i ma = _mm256_set1_epi16(left);
__m256i mb = _mm256_set1_epi16(right);
__m256i mm = _mm256_unpacklo_epi16(ma, mb);
__m256i mm = _mm256_set1_epi32(left | (right << 16));

while (len >= 32) {
len -= 32;
__m256i aa = _mm256_loadu_si256((const __m256i*)ptr);
__m256i bb = _mm256_mulhi_epi16(aa, mm);
_mm256_storeu_si256((__m256i*)ptr, bb);
#if 1
__m256i sm = _mm256_mulhi_epi16(aa, mm);
__m256i cc = _mm256_slli_epi16(sm, 1);
#else
__m256i lo = _mm256_mullo_epi16(aa, mm);
__m256i hi = _mm256_mulhi_epi16(aa, mm);
__m256i c0 = _mm256_unpacklo_epi16(lo, hi);
__m256i c1 = _mm256_unpackhi_epi16(lo, hi);
__m256i d0 = _mm256_srai_epi32(c0, 10 + 4);
__m256i d1 = _mm256_srai_epi32(c1, 10 + 4);
__m256i cc = _mm256_packs_epi32(d0, d1);
#endif
_mm256_storeu_si256((__m256i*)ptr, cc);
ptr += 16;
}

Expand Down Expand Up @@ -917,21 +956,36 @@ static SDL_bool SDL_TARGETING("sse2") _Eff_volume_s16lbs_SSE2(void* stream, unsi
if (volume == MIX_MAX_VOLUME)
return SDL_TRUE;

//static_assert((MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0, "_Eff_volume_s16lbs_SSE2 expects MIX_MAX_VOLUME to be a power of 2.");
//static_assert(MIX_MAX_VOLUME <= (1 << 16), "_Eff_volume_s16lbs_SSE2 expects MIX_MAX_VOLUME to be low.");
volume *= (1 << 16) / MIX_MAX_VOLUME;

#if 1
SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_volume_s16lbs_SSE2_MIX_MAX_VOLUME, (MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0);
SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_volume_s16lbs_SSE2_low, MIX_MAX_VOLUME <= (1 << 15));
volume *= (1 << 15) / MIX_MAX_VOLUME;
#else
SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_volume_s16lbs_SSE2_MIX_MAX_VOLUME, MIX_MAX_VOLUME == (1 << 10));
#endif
__m128i mm = _mm_set1_epi16(volume);

while (len >= 16) {
len -= 16;
__m128i aa = _mm_loadu_si128((const __m128i*)ptr);
__m128i bb = _mm_mulhi_epi16(aa, mm);
_mm_storeu_si128((__m128i*)ptr, bb);
#if 1
__m128i sm = _mm_mulhi_epi16(aa, mm);
__m128i cc = _mm_slli_epi16(sm, 1);
#else
__m128i lo = _mm_mullo_epi16(aa, mm);
__m128i hi = _mm_mulhi_epi16(aa, mm);
__m128i c0 = _mm_unpacklo_epi16(lo, hi);
__m128i c1 = _mm_unpackhi_epi16(lo, hi);
__m128i d0 = _mm_srai_epi32(c0, 10);
__m128i d1 = _mm_srai_epi32(c1, 10);
__m128i cc = _mm_packs_epi32(d0, d1);
#endif
_mm_storeu_si128((__m128i*)ptr, cc);
ptr += 8;
}

volume /= (1 << 16) / MIX_MAX_VOLUME;
#if 1
volume /= (unsigned)((1 << 15) / MIX_MAX_VOLUME);
#endif
len /= sizeof(Sint16);
while (len--) {
vol = SDL_SwapLE16(*ptr);
Expand All @@ -953,21 +1007,36 @@ static SDL_bool SDL_TARGETING("avx2") _Eff_volume_s16lbs_AVX2(void* stream, unsi
if (volume == MIX_MAX_VOLUME)
return SDL_TRUE;

//static_assert((MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0, "_Eff_volume_s16lbs_AVX expects MIX_MAX_VOLUME to be a power of 2.");
//static_assert(MIX_MAX_VOLUME <= (1 << 16), "_Eff_volume_s16lbs_AVX expects MIX_MAX_VOLUME to be low.");
volume *= (1 << 16) / MIX_MAX_VOLUME;

#if 1
SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_volume_s16lbs_AVX2_MIX_MAX_VOLUME, (MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0);
SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_volume_s16lbs_AVX2_low, MIX_MAX_VOLUME <= (1 << 15));
volume *= (1 << 15) / MIX_MAX_VOLUME;
#else
SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_volume_s16lbs_AVX2_MIX_MAX_VOLUME, MIX_MAX_VOLUME == (1 << 10));
#endif
__m256i mm = _mm256_set1_epi16(volume);

while (len >= 32) {
len -= 32;
__m256i aa = _mm256_loadu_si256((const __m256i*)ptr);
__m256i bb = _mm256_mulhi_epi16(aa, mm);
_mm256_storeu_si256((__m256i*)ptr, bb);
#if 1
__m256i sm = _mm256_mulhi_epi16(aa, mm);
__m256i cc = _mm256_slli_epi16(sm, 1);
#else
__m256i lo = _mm256_mullo_epi16(aa, mm);
__m256i hi = _mm256_mulhi_epi16(aa, mm);
__m256i c0 = _mm256_unpacklo_epi16(lo, hi);
__m256i c1 = _mm256_unpackhi_epi16(lo, hi);
__m256i d0 = _mm256_srai_epi32(c0, 10);
__m256i d1 = _mm256_srai_epi32(c1, 10);
__m256i cc = _mm256_packs_epi32(d0, d1);
#endif
_mm256_storeu_si256((__m256i*)ptr, cc);
ptr += 16;
}

volume /= (1 << 16) / MIX_MAX_VOLUME;
#if 1
volume /= (unsigned)((1 << 15) / MIX_MAX_VOLUME);
#endif
len /= sizeof(Sint16);
while (len--) {
vol = SDL_SwapLE16(*ptr);
Expand Down

0 comments on commit 912399e

Please sign in to comment.