diff --git a/3rdParty/SDL2_mixer/src/effect_position.c b/3rdParty/SDL2_mixer/src/effect_position.c index fbffe04ce6e..bebfbb5fff0 100644 --- a/3rdParty/SDL2_mixer/src/effect_position.c +++ b/3rdParty/SDL2_mixer/src/effect_position.c @@ -743,7 +743,7 @@ static void SDLCALL _Eff_position_u16lsb_c6(int chan, void *stream, int len, voi #ifdef FULL // FIX_EFF static void SDLCALL _Eff_position_s16lsb(int chan, void *stream, int len, void *udata) #else -//static_assert(SDL_MAX_SINT16 * MIX_MAX_POS_EFFECT * MIX_MAX_VOLUME <= SDL_MAX_SINT32, "Volume might overflow when the effects are calculated."); +SDL_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_MIX_MAX_VOLUME, SDL_MAX_SINT16 * MIX_MAX_POS_EFFECT * MIX_MAX_VOLUME <= SDL_MAX_SINT32); #define ADJUST_SIDE_VOLUME(s, v) (s = (s*v)/(MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT)) static SDL_bool _Eff_position_s16lsb(void* stream, unsigned len, void* udata) #endif @@ -765,9 +765,10 @@ static SDL_bool _Eff_position_s16lsb(void* stream, unsigned len, void* udata) //if (left_f < 1.0f / SDL_MAX_SINT16 && right_f < 1.0f / SDL_MAX_SINT16) // return SDL_FALSE; int voll, volr; - const int left = channel->effect.left_vol * channel->volume; - const int right = channel->effect.right_vol * channel->volume; - if (left == 0 && right == 0) + const int volume = ((Mix_Channel*)udata)->volume; + const int left = channel->effect.left_vol * volume; + const int right = channel->effect.right_vol * volume; + if (volume == 0) return SDL_FALSE; #endif #if 0 @@ -823,26 +824,43 @@ static SDL_bool SDL_TARGETING("sse2") _Eff_position_s16lsb_SSE2(void* stream, un /* 16 signed bits (lsb) * 2 channels. */ Sint16* ptr = (Sint16*)stream; Mix_Channel* channel = (Mix_Channel*)udata; - int left = channel->effect.left_vol * channel->volume; - int right = channel->effect.right_vol * channel->volume; - if (left == 0 && right == 0) + const int volume = ((Mix_Channel*)udata)->volume; + int left = channel->effect.left_vol * volume; + int right = channel->effect.right_vol * volume; + if (volume == 0) return SDL_FALSE; - //static_assert((MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0, "_Eff_position_s16lsb_SSE2 expects MIX_MAX_VOLUME to be a power of 2."); - //static_assert((MIX_MAX_POS_EFFECT & (MIX_MAX_POS_EFFECT - 1)) == 0, "_Eff_position_s16lsb_SSE2 expects MIX_MAX_POS_EFFECT to be a power of 2."); - //static_assert((MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT) <= (1 << 16), "_Eff_position_s16lsb_SSE2 expects MIX_MAX_VOLUME to be low."); - left *= (1 << 16) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT); - right *= (1 << 16) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT); - - __m128i ma = _mm_set1_epi16(left); - __m128i mb = _mm_set1_epi16(right); - __m128i mm = _mm_unpacklo_epi16(ma, mb); +#if 1 + SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_SSE2_MIX_MAX_VOLUME, (MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0); + SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_SSE2_MIX_MAX_POS_EFFECT, (MIX_MAX_POS_EFFECT & (MIX_MAX_POS_EFFECT - 1)) == 0); + SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_SSE2_low, (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT) <= (1 << 15)); + left *= (1 << 15) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT); + right *= (1 << 15) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT); + if ((int16_t)left < 0) { + left = INT16_MAX; + } + if ((int16_t)right < 0) { + right = INT16_MAX; + } +#endif + __m128i mm = _mm_set1_epi32(left | (right << 16)); while (len >= 16) { len -= 16; __m128i aa = _mm_loadu_si128((const __m128i*)ptr); - __m128i bb = _mm_mulhi_epi16(aa, mm); - _mm_storeu_si128((__m128i*)ptr, bb); +#if 1 + __m128i sm = _mm_mulhi_epi16(aa, mm); + __m128i cc = _mm_slli_epi16(sm, 1); +#else + __m128i lo = _mm_mullo_epi16(aa, mm); + __m128i hi = _mm_mulhi_epi16(aa, mm); + __m128i c0 = _mm_unpacklo_epi16(lo, hi); + __m128i c1 = _mm_unpackhi_epi16(lo, hi); + __m128i d0 = _mm_srai_epi32(c0, 10 + 4); + __m128i d1 = _mm_srai_epi32(c1, 10 + 4); + __m128i cc = _mm_packs_epi32(d0, d1); +#endif + _mm_storeu_si128((__m128i*)ptr, cc); ptr += 8; } @@ -858,26 +876,47 @@ static SDL_bool SDL_TARGETING("avx2") _Eff_position_s16lsb_AVX2(void* stream, un /* 16 signed bits (lsb) * 2 channels. */ Sint16* ptr = (Sint16*)stream; Mix_Channel* channel = (Mix_Channel*)udata; - int left = channel->effect.left_vol * channel->volume; - int right = channel->effect.right_vol * channel->volume; - if (left == 0 && right == 0) + const int volume = ((Mix_Channel*)udata)->volume; + int left = channel->effect.left_vol * volume; + int right = channel->effect.right_vol * volume; + if (volume == 0) return SDL_FALSE; - //static_assert((MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0, "_Eff_position_s16lsb_AVX2 expects MIX_MAX_VOLUME to be a power of 2."); - //static_assert((MIX_MAX_POS_EFFECT & (MIX_MAX_POS_EFFECT - 1)) == 0, "_Eff_position_s16lsb_AVX2 expects MIX_MAX_POS_EFFECT to be a power of 2."); - //static_assert((MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT) <= (1 << 16), "_Eff_position_s16lsb_AVX2 expects MIX_MAX_VOLUME to be low."); - left *= (1 << 16) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT); - right *= (1 << 16) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT); +#if 1 + SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_AVX2_MIX_MAX_VOLUME, (MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0); + SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_AVX2_MIX_MAX_POS_EFFECT, (MIX_MAX_POS_EFFECT & (MIX_MAX_POS_EFFECT - 1)) == 0); + SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_AVX2_low, (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT) <= (1 << 15)); + left *= (1 << 15) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT); + right *= (1 << 15) / (MIX_MAX_VOLUME * MIX_MAX_POS_EFFECT); + if ((int16_t)left < 0) { + left = INT16_MAX; + } + if ((int16_t)right < 0) { + right = INT16_MAX; + } +#else + SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_AVX2_MIX_MAX_VOLUME, MIX_MAX_VOLUME == (1 << 10)); + SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_position_s16lsb_AVX2_MIX_MAX_POS_EFFECT, MIX_MAX_POS_EFFECT == (1 << 4)); +#endif - __m256i ma = _mm256_set1_epi16(left); - __m256i mb = _mm256_set1_epi16(right); - __m256i mm = _mm256_unpacklo_epi16(ma, mb); + __m256i mm = _mm256_set1_epi32(left | (right << 16)); while (len >= 32) { len -= 32; __m256i aa = _mm256_loadu_si256((const __m256i*)ptr); - __m256i bb = _mm256_mulhi_epi16(aa, mm); - _mm256_storeu_si256((__m256i*)ptr, bb); +#if 1 + __m256i sm = _mm256_mulhi_epi16(aa, mm); + __m256i cc = _mm256_slli_epi16(sm, 1); +#else + __m256i lo = _mm256_mullo_epi16(aa, mm); + __m256i hi = _mm256_mulhi_epi16(aa, mm); + __m256i c0 = _mm256_unpacklo_epi16(lo, hi); + __m256i c1 = _mm256_unpackhi_epi16(lo, hi); + __m256i d0 = _mm256_srai_epi32(c0, 10 + 4); + __m256i d1 = _mm256_srai_epi32(c1, 10 + 4); + __m256i cc = _mm256_packs_epi32(d0, d1); +#endif + _mm256_storeu_si256((__m256i*)ptr, cc); ptr += 16; } @@ -917,21 +956,36 @@ static SDL_bool SDL_TARGETING("sse2") _Eff_volume_s16lbs_SSE2(void* stream, unsi if (volume == MIX_MAX_VOLUME) return SDL_TRUE; - //static_assert((MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0, "_Eff_volume_s16lbs_SSE2 expects MIX_MAX_VOLUME to be a power of 2."); - //static_assert(MIX_MAX_VOLUME <= (1 << 16), "_Eff_volume_s16lbs_SSE2 expects MIX_MAX_VOLUME to be low."); - volume *= (1 << 16) / MIX_MAX_VOLUME; - +#if 1 + SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_volume_s16lbs_SSE2_MIX_MAX_VOLUME, (MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0); + SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_volume_s16lbs_SSE2_low, MIX_MAX_VOLUME <= (1 << 15)); + volume *= (1 << 15) / MIX_MAX_VOLUME; +#else + SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_volume_s16lbs_SSE2_MIX_MAX_VOLUME, MIX_MAX_VOLUME == (1 << 10)); +#endif __m128i mm = _mm_set1_epi16(volume); while (len >= 16) { len -= 16; __m128i aa = _mm_loadu_si128((const __m128i*)ptr); - __m128i bb = _mm_mulhi_epi16(aa, mm); - _mm_storeu_si128((__m128i*)ptr, bb); +#if 1 + __m128i sm = _mm_mulhi_epi16(aa, mm); + __m128i cc = _mm_slli_epi16(sm, 1); +#else + __m128i lo = _mm_mullo_epi16(aa, mm); + __m128i hi = _mm_mulhi_epi16(aa, mm); + __m128i c0 = _mm_unpacklo_epi16(lo, hi); + __m128i c1 = _mm_unpackhi_epi16(lo, hi); + __m128i d0 = _mm_srai_epi32(c0, 10); + __m128i d1 = _mm_srai_epi32(c1, 10); + __m128i cc = _mm_packs_epi32(d0, d1); +#endif + _mm_storeu_si128((__m128i*)ptr, cc); ptr += 8; } - - volume /= (1 << 16) / MIX_MAX_VOLUME; +#if 1 + volume /= (unsigned)((1 << 15) / MIX_MAX_VOLUME); +#endif len /= sizeof(Sint16); while (len--) { vol = SDL_SwapLE16(*ptr); @@ -953,21 +1007,36 @@ static SDL_bool SDL_TARGETING("avx2") _Eff_volume_s16lbs_AVX2(void* stream, unsi if (volume == MIX_MAX_VOLUME) return SDL_TRUE; - //static_assert((MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0, "_Eff_volume_s16lbs_AVX expects MIX_MAX_VOLUME to be a power of 2."); - //static_assert(MIX_MAX_VOLUME <= (1 << 16), "_Eff_volume_s16lbs_AVX expects MIX_MAX_VOLUME to be low."); - volume *= (1 << 16) / MIX_MAX_VOLUME; - +#if 1 + SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_volume_s16lbs_AVX2_MIX_MAX_VOLUME, (MIX_MAX_VOLUME & (MIX_MAX_VOLUME - 1)) == 0); + SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_volume_s16lbs_AVX2_low, MIX_MAX_VOLUME <= (1 << 15)); + volume *= (1 << 15) / MIX_MAX_VOLUME; +#else + SDL_INLINE_COMPILE_TIME_ASSERT(_Eff_volume_s16lbs_AVX2_MIX_MAX_VOLUME, MIX_MAX_VOLUME == (1 << 10)); +#endif __m256i mm = _mm256_set1_epi16(volume); while (len >= 32) { len -= 32; __m256i aa = _mm256_loadu_si256((const __m256i*)ptr); - __m256i bb = _mm256_mulhi_epi16(aa, mm); - _mm256_storeu_si256((__m256i*)ptr, bb); +#if 1 + __m256i sm = _mm256_mulhi_epi16(aa, mm); + __m256i cc = _mm256_slli_epi16(sm, 1); +#else + __m256i lo = _mm256_mullo_epi16(aa, mm); + __m256i hi = _mm256_mulhi_epi16(aa, mm); + __m256i c0 = _mm256_unpacklo_epi16(lo, hi); + __m256i c1 = _mm256_unpackhi_epi16(lo, hi); + __m256i d0 = _mm256_srai_epi32(c0, 10); + __m256i d1 = _mm256_srai_epi32(c1, 10); + __m256i cc = _mm256_packs_epi32(d0, d1); +#endif + _mm256_storeu_si256((__m256i*)ptr, cc); ptr += 16; } - - volume /= (1 << 16) / MIX_MAX_VOLUME; +#if 1 + volume /= (unsigned)((1 << 15) / MIX_MAX_VOLUME); +#endif len /= sizeof(Sint16); while (len--) { vol = SDL_SwapLE16(*ptr);