Skip to content

Commit

Permalink
Add SSE4.1 rounding implementations
Browse files Browse the repository at this point in the history
SSE4.1 has intrinsics for ceil() and round() operators, so let's use
them.
  • Loading branch information
ebassi committed Jan 23, 2024
1 parent fafb99a commit 0959b6f
Showing 1 changed file with 23 additions and 4 deletions.
27 changes: 23 additions & 4 deletions include/graphene-simd4f.h
Original file line number Diff line number Diff line change
Expand Up @@ -476,22 +476,33 @@ typedef GRAPHENE_ALIGN16 union {
(graphene_simd4f_t) _mm_xor_ps ((s), _mm_load_ps (__mask.f)); \
}))

# define graphene_simd4f_ceil(s) \
# if defined(GRAPHENE_USE_SSE4_1)
# define graphene_simd4f_ceil(s) \
(__extension__ ({ \
(graphene_simd4f_t) _mm_ceil_ps ((s)); \
}))
# define graphene_simd4f_floor(s) \
(__extension__ ({ \
(graphene_simd4f_t) _mm_floor_ps ((s)); \
}))
# else
# define graphene_simd4f_ceil(s) \
(__extension__ ({ \
const float __ceil_x = ceilf (graphene_simd4f_get_x ((s))); \
const float __ceil_y = ceilf (graphene_simd4f_get_y ((s))); \
const float __ceil_z = ceilf (graphene_simd4f_get_z ((s))); \
const float __ceil_w = ceilf (graphene_simd4f_get_w ((s))); \
(graphene_simd4f_t) graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); \
}))
# define graphene_simd4f_floor(s) \
# define graphene_simd4f_floor(s) \
(__extension__ ({ \
const float __floor_x = floorf (graphene_simd4f_get_x ((s))); \
const float __floor_y = floorf (graphene_simd4f_get_y ((s))); \
const float __floor_z = floorf (graphene_simd4f_get_z ((s))); \
const float __floor_w = floorf (graphene_simd4f_get_w ((s))); \
(graphene_simd4f_t) graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); \
}))
# endif

/* On MSVC, we use static inlines */
# elif defined (_MSC_VER) /* Visual Studio SSE intrinsics */
Expand Down Expand Up @@ -793,27 +804,35 @@ _simd4f_neg (const graphene_simd4f_t s)
return _mm_xor_ps (s, _mm_load_ps (__mask.f));
}

# define graphene_simd4f_ceil(s) _simd4f_ceil(s)
# define graphene_simd4f_floor(s) _simd4f_floor(s)
#define graphene_simd4f_ceil(s) _simd4f_ceil(s)
#define graphene_simd4f_floor(s) _simd4f_floor(s)

static inline graphene_simd4f_t
_simd4f_ceil (const graphene_simd4f_t s)
{
#if defined(GRAPHENE_USE_SSE4_1)
return _mm_ceil_ps (s);
#else
const float __ceil_x = ceilf (graphene_simd4f_get_x (s));
const float __ceil_y = ceilf (graphene_simd4f_get_y (s));
const float __ceil_z = ceilf (graphene_simd4f_get_z (s));
const float __ceil_w = ceilf (graphene_simd4f_get_w (s));
return graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w);
#endif
}

static inline graphene_simd4f_t
_simd4f_floor (const graphene_simd4f_t s)
{
#if defined(GRAPHENE_USE_SSE4_1)
return _mm_floor_ps (s);
#else
const float __floor_x = floorf (graphene_simd4f_get_x (s));
const float __floor_y = floorf (graphene_simd4f_get_y (s));
const float __floor_z = floorf (graphene_simd4f_get_z (s));
const float __floor_w = floorf (graphene_simd4f_get_w (s));
return graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w);
#endif
}

#else /* SSE intrinsics-not GCC or Visual Studio */
Expand Down

0 comments on commit 0959b6f

Please sign in to comment.