Skip to content

Commit

Permalink
SSE transform math: remove 2 unnecessary mask ops
Browse files Browse the repository at this point in the history
In 2 cases we were forming a vector from (a.xyz, b.w) but it turns out
the "junk" w component of a already contained the desired b.w value, so
we can just use a is-is.
  • Loading branch information
slipher committed Dec 5, 2024
1 parent a324945 commit cc3592e
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions src/engine/qcommon/q_shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,8 @@ inline vec_t VectorNormalize2( const vec3_t v, vec3_t out )
__m128 sum2 = _mm_add_ps( sum1, sseSwizzle( sum1, ZWXY ) );
return sum2;
}

// returns 0 in w component if input w's are finite
inline __m128 sseCrossProduct( __m128 a, __m128 b ) {
__m128 a_yzx = sseSwizzle( a, YZXW );
__m128 b_yzx = sseSwizzle( b, YZXW );
Expand Down Expand Up @@ -1239,6 +1241,7 @@ inline vec_t VectorNormalize2( const vec3_t v, vec3_t out )
t = _mm_mul_ps( h, t );
return _mm_mul_ps( q, t );
}
// rotates (3-dimensional) vec. vec's w component is unchanged
inline __m128 sseQuatTransform( __m128 q, __m128 vec ) {
__m128 t, t2;
t = sseCrossProduct( q, vec );
Expand Down Expand Up @@ -1303,10 +1306,8 @@ inline vec_t VectorNormalize2( const vec3_t v, vec3_t out )
}
inline void TransAddRotationQuat( const quat_t quat, transform_t *t ) {
__m128 q = _mm_loadu_ps( quat );
__m128 transformed = sseQuatTransform( q, t->sseTransScale );
t->sseRot = sseQuatMul( q, t->sseRot );
t->sseTransScale = _mm_or_ps( _mm_and_ps( transformed, mask_XYZ0() ),
_mm_and_ps( t->sseTransScale, mask_000W() ) );
t->sseTransScale = sseQuatTransform( q, t->sseTransScale );
}
inline void TransInsScale( float factor, transform_t *t ) {
t->scale *= factor;
Expand Down Expand Up @@ -1337,8 +1338,6 @@ inline vec_t VectorNormalize2( const vec3_t v, vec3_t out )
__m128 bRot = b->sseRot;
__m128 bTS = b->sseTransScale;
__m128 tmp = sseQuatTransform( bRot, aTS );
tmp = _mm_or_ps( _mm_and_ps( tmp, mask_XYZ0() ),
_mm_and_ps( aTS, mask_000W() ) );
tmp = _mm_mul_ps( tmp, sseSwizzle( bTS, WWWW ) );
out->sseTransScale = _mm_add_ps( tmp, _mm_and_ps( bTS, mask_XYZ0() ) );
out->sseRot = sseQuatMul( bRot, aRot );
Expand Down

0 comments on commit cc3592e

Please sign in to comment.