From fafb99aa26b8372988367d2ef1f3d30bedf77dad Mon Sep 17 00:00:00 2001 From: Emmanuele Bassi Date: Tue, 23 Jan 2024 15:03:05 +0000 Subject: [PATCH 1/2] Add rounding operators for graphene_simd4f_t Both ceil() and floor(). Use the naive implementation, for the time being. --- doc/graphene-sections.txt | 2 + include/graphene-simd4f.h | 108 ++++++++++++++++++++++++++++++++++++++ src/graphene-simd4f.c | 44 ++++++++++++++++ tests/simd.c | 33 ++++++++++++ 4 files changed, 187 insertions(+) diff --git a/doc/graphene-sections.txt b/doc/graphene-sections.txt index a38cd87..bb85ba6 100644 --- a/doc/graphene-sections.txt +++ b/doc/graphene-sections.txt @@ -439,6 +439,8 @@ graphene_simd4f_is_zero4 graphene_simd4f_is_zero3 graphene_simd4f_is_zero2 graphene_simd4f_interpolate +graphene_simd4f_ceil +graphene_simd4f_floor graphene_simd4f_union_t graphene_simd4i_union_t diff --git a/include/graphene-simd4f.h b/include/graphene-simd4f.h index ea39062..285a561 100644 --- a/include/graphene-simd4f.h +++ b/include/graphene-simd4f.h @@ -174,6 +174,11 @@ bool graphene_simd4f_cmp_gt (const graphene_simd4f_t GRAPHENE_AVAILABLE_IN_1_0 graphene_simd4f_t graphene_simd4f_neg (const graphene_simd4f_t s); +GRAPHENE_AVAILABLE_IN_1_12 +graphene_simd4f_t graphene_simd4f_ceil (const graphene_simd4f_t s); +GRAPHENE_AVAILABLE_IN_1_12 +graphene_simd4f_t graphene_simd4f_floor (const graphene_simd4f_t s); + #if !defined(__GI_SCANNER__) && defined(GRAPHENE_USE_SSE) /* SSE2 implementation of SIMD 4f */ @@ -471,6 +476,23 @@ typedef GRAPHENE_ALIGN16 union { (graphene_simd4f_t) _mm_xor_ps ((s), _mm_load_ps (__mask.f)); \ })) +# define graphene_simd4f_ceil(s) \ + (__extension__ ({ \ + const float __ceil_x = ceilf (graphene_simd4f_get_x ((s))); \ + const float __ceil_y = ceilf (graphene_simd4f_get_y ((s))); \ + const float __ceil_z = ceilf (graphene_simd4f_get_z ((s))); \ + const float __ceil_w = ceilf (graphene_simd4f_get_w ((s))); \ + (graphene_simd4f_t) graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); \ + })) +# define graphene_simd4f_floor(s) \ + (__extension__ ({ \ + const float __floor_x = floorf (graphene_simd4f_get_x ((s))); \ + const float __floor_y = floorf (graphene_simd4f_get_y ((s))); \ + const float __floor_z = floorf (graphene_simd4f_get_z ((s))); \ + const float __floor_w = floorf (graphene_simd4f_get_w ((s))); \ + (graphene_simd4f_t) graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); \ + })) + /* On MSVC, we use static inlines */ # elif defined (_MSC_VER) /* Visual Studio SSE intrinsics */ @@ -771,6 +793,29 @@ _simd4f_neg (const graphene_simd4f_t s) return _mm_xor_ps (s, _mm_load_ps (__mask.f)); } +# define graphene_simd4f_ceil(s) _simd4f_ceil(s) +# define graphene_simd4f_floor(s) _simd4f_floor(s) + +static inline graphene_simd4f_t +_simd4f_ceil (const graphene_simd4f_t s) +{ + const float __ceil_x = ceilf (graphene_simd4f_get_x (s)); + const float __ceil_y = ceilf (graphene_simd4f_get_y (s)); + const float __ceil_z = ceilf (graphene_simd4f_get_z (s)); + const float __ceil_w = ceilf (graphene_simd4f_get_w (s)); + return graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); +} + +static inline graphene_simd4f_t +_simd4f_floor (const graphene_simd4f_t s) +{ + const float __floor_x = floorf (graphene_simd4f_get_x (s)); + const float __floor_y = floorf (graphene_simd4f_get_y (s)); + const float __floor_z = floorf (graphene_simd4f_get_z (s)); + const float __floor_w = floorf (graphene_simd4f_get_w (s)); + return graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); +} + #else /* SSE intrinsics-not GCC or Visual Studio */ # error "Need GCC-compatible or Visual Studio compiler for SSE extensions." @@ -1076,6 +1121,24 @@ typedef int graphene_simd4i_t __attribute__((vector_size (16))); graphene_simd4f_mul (__s, __minus_one); \ })) +# define graphene_simd4f_ceil(s) \ + (__extension__ ({ \ + const float __ceil_x = ceilf (graphene_simd4f_get_x ((s))); \ + const float __ceil_y = ceilf (graphene_simd4f_get_y ((s))); \ + const float __ceil_z = ceilf (graphene_simd4f_get_z ((s))); \ + const float __ceil_w = ceilf (graphene_simd4f_get_w ((s))); \ + (graphene_simd4f_t) graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); \ + })) + +# define graphene_simd4f_floor(s) \ + (__extension__ ({ \ + const float __floor_x = floorf (graphene_simd4f_get_x ((s))); \ + const float __floor_y = floorf (graphene_simd4f_get_y ((s))); \ + const float __floor_z = floorf (graphene_simd4f_get_z ((s))); \ + const float __floor_w = floorf (graphene_simd4f_get_w ((s))); \ + (graphene_simd4f_t) graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); \ + })) + #elif !defined(__GI_SCANNER__) && defined(GRAPHENE_USE_ARM_NEON) /* ARM Neon implementation of SIMD4f */ @@ -1398,6 +1461,24 @@ typedef float32x2_t graphene_simd2f_t; (graphene_simd4f_t) vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 ((s)), __mask)); \ })) +# define graphene_simd4f_ceil(s) \ + (__extension__ ({ \ + const float __ceil_x = ceilf (graphene_simd4f_get_x ((s))); \ + const float __ceil_y = ceilf (graphene_simd4f_get_y ((s))); \ + const float __ceil_z = ceilf (graphene_simd4f_get_z ((s))); \ + const float __ceil_w = ceilf (graphene_simd4f_get_w ((s))); \ + (graphene_simd4f_t) graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); \ + })) + +# define graphene_simd4f_floor(s) \ + (__extension__ ({ \ + const float __floor_x = floorf (graphene_simd4f_get_x ((s))); \ + const float __floor_y = floorf (graphene_simd4f_get_y ((s))); \ + const float __floor_z = floorf (graphene_simd4f_get_z ((s))); \ + const float __floor_w = floorf (graphene_simd4f_get_w ((s))); \ + (graphene_simd4f_t) graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); \ + })) + #elif defined _MSC_VER /* Visual Studio ARM */ # define graphene_simd4f_init(x,y,z,w) _simd4f_init(x,y,z,w) @@ -1717,6 +1798,29 @@ _simd4f_neg (const graphene_simd4f_t s) return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 ((s)), __mask)); } +# define graphene_simd4f_ceil(s) _simd4f_ceil(s) +# define graphene_simd4f_floor(s) _simd4f_floor(s) + +static inline graphene_simd4f_t +_simd4f_ceil (const graphene_simd4f_t s) +{ + const float __ceil_x = ceilf (graphene_simd4f_get_x (s)); + const float __ceil_y = ceilf (graphene_simd4f_get_y (s)); + const float __ceil_z = ceilf (graphene_simd4f_get_z (s)); + const float __ceil_w = ceilf (graphene_simd4f_get_w (s)); + return graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); +} + +static inline graphene_simd4f_t +_simd4f_floor (const graphene_simd4f_t s) +{ + const float __floor_x = floorf (graphene_simd4f_get_x (s)); + const float __floor_y = floorf (graphene_simd4f_get_y (s)); + const float __floor_z = floorf (graphene_simd4f_get_z (s)); + const float __floor_w = floorf (graphene_simd4f_get_w (s)); + return graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); +} + #else /* ARM NEON intrinsics-not GCC or Visual Studio */ # error "Need GCC-compatible or Visual Studio compiler for ARM NEON extensions." @@ -1829,6 +1933,10 @@ _simd4f_neg (const graphene_simd4f_t s) (graphene_simd4f_cmp_gt ((a), (b))) #define graphene_simd4f_neg(s) \ (graphene_simd4f_neg ((s))) +#define graphene_simd4f_ceil(s) \ + (graphene_simd4f_ceil ((s))) +#define graphene_simd4f_floor(s) \ + (graphene_simd4f_floor ((s))) #else # error "Unsupported simd4f implementation." diff --git a/src/graphene-simd4f.c b/src/graphene-simd4f.c index 816de00..00c545b 100644 --- a/src/graphene-simd4f.c +++ b/src/graphene-simd4f.c @@ -1041,6 +1041,38 @@ graphene_simd4f_t return graphene_simd4f_neg (s); } +/** + * graphene_simd4f_ceil: + * @s: a #graphene_simd4f_t + * + * Rounds each component of the vector @s up to the nearest integer value. + * + * Returns: the rounded up vector + * + * Since: 1.12 + */ +graphene_simd4f_t +(graphene_simd4f_ceil) (const graphene_simd4f_t s) +{ + return graphene_simd4f_ceil (s); +} + +/** + * graphene_simd4f_floor: + * @s: a #graphene_simd4f_t + * + * Rounds each component of the vector @s down to the nearest integer value. + * + * Returns: the rounded down vector + * + * Since: 1.12 + */ +graphene_simd4f_t +(graphene_simd4f_floor) (const graphene_simd4f_t s) +{ + return graphene_simd4f_floor (s); +} + #else /* GRAPHENE_USE_SCALAR */ graphene_simd4f_t @@ -1472,4 +1504,16 @@ graphene_simd4f_t return graphene_simd4f_init (-s.x, -s.y, -s.z, -s.w); } +graphene_simd4f_t +(graphene_simd4f_ceil) (const graphene_simd4f_t s) +{ + return graphene_simd4f_init (ceilf (s.x), ceilf (s.y), ceilf (s.z), ceilf (s.w)); +} + +graphene_simd4f_t +(graphene_simd4f_floor) (const graphene_simd4f_t s) +{ + return graphene_simd4f_init (floorf (s.x), floorf (s.y), floorf (s.z), floorf (s.w)); +} + #endif /* GRAPHENE_USE_SCALAR */ diff --git a/tests/simd.c b/tests/simd.c index 8c75041..0bafb30 100644 --- a/tests/simd.c +++ b/tests/simd.c @@ -320,6 +320,36 @@ simd_operators_reciprocal (void) NULL); } +static void +simd_operators_ceil (void) +{ + graphene_simd4f_t a, b, check; + + check = graphene_simd4f_init (2.0f, 3.0f, 4.0f, 5.0f); + a = graphene_simd4f_init (1.7f, 2.4f, 3.6f, 4.2f); + b = graphene_simd4f_ceil (a); + + mutest_expect ("ceil() to round up to the nearest integer", + mutest_bool_value (graphene_simd4f_cmp_eq (b, check)), + mutest_to_be_true, + NULL); +} + +static void +simd_operators_floor (void) +{ + graphene_simd4f_t a, b, check; + + check = graphene_simd4f_init (1.0f, 2.0f, 3.0f, 4.0f); + a = graphene_simd4f_init (1.7f, 2.4f, 3.6f, 4.2f); + b = graphene_simd4f_floor (a); + + mutest_expect ("floor() to round down to the nearest integer", + mutest_bool_value (graphene_simd4f_cmp_eq (b, check)), + mutest_to_be_true, + NULL); +} + static void simd_suite (void) { @@ -339,6 +369,9 @@ simd_suite (void) mutest_it ("can compute the maximum vector and scalar", simd_operators_max); mutest_it ("can compute the reciprocal of vector", simd_operators_reciprocal); + + mutest_it ("can round up vector components", simd_operators_ceil); + mutest_it ("can round down vector components", simd_operators_floor); } MUTEST_MAIN ( From 0959b6f659bd1db3f31da3c6b04e0378c14cbe86 Mon Sep 17 00:00:00 2001 From: Emmanuele Bassi Date: Tue, 23 Jan 2024 15:10:46 +0000 Subject: [PATCH 2/2] Add SSE4.1 rounding implementations SSE4.1 has intrinsics for ceil() and round() operators, so let's use them. --- include/graphene-simd4f.h | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/include/graphene-simd4f.h b/include/graphene-simd4f.h index 285a561..758343d 100644 --- a/include/graphene-simd4f.h +++ b/include/graphene-simd4f.h @@ -476,7 +476,17 @@ typedef GRAPHENE_ALIGN16 union { (graphene_simd4f_t) _mm_xor_ps ((s), _mm_load_ps (__mask.f)); \ })) -# define graphene_simd4f_ceil(s) \ +# if defined(GRAPHENE_USE_SSE4_1) +# define graphene_simd4f_ceil(s) \ + (__extension__ ({ \ + (graphene_simd4f_t) _mm_ceil_ps ((s)); \ + })) +# define graphene_simd4f_floor(s) \ + (__extension__ ({ \ + (graphene_simd4f_t) _mm_floor_ps ((s)); \ + })) +# else +# define graphene_simd4f_ceil(s) \ (__extension__ ({ \ const float __ceil_x = ceilf (graphene_simd4f_get_x ((s))); \ const float __ceil_y = ceilf (graphene_simd4f_get_y ((s))); \ @@ -484,7 +494,7 @@ typedef GRAPHENE_ALIGN16 union { const float __ceil_w = ceilf (graphene_simd4f_get_w ((s))); \ (graphene_simd4f_t) graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); \ })) -# define graphene_simd4f_floor(s) \ +# define graphene_simd4f_floor(s) \ (__extension__ ({ \ const float __floor_x = floorf (graphene_simd4f_get_x ((s))); \ const float __floor_y = floorf (graphene_simd4f_get_y ((s))); \ @@ -492,6 +502,7 @@ typedef GRAPHENE_ALIGN16 union { const float __floor_w = floorf (graphene_simd4f_get_w ((s))); \ (graphene_simd4f_t) graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); \ })) +# endif /* On MSVC, we use static inlines */ # elif defined (_MSC_VER) /* Visual Studio SSE intrinsics */ @@ -793,27 +804,35 @@ _simd4f_neg (const graphene_simd4f_t s) return _mm_xor_ps (s, _mm_load_ps (__mask.f)); } -# define graphene_simd4f_ceil(s) _simd4f_ceil(s) -# define graphene_simd4f_floor(s) _simd4f_floor(s) +#define graphene_simd4f_ceil(s) _simd4f_ceil(s) +#define graphene_simd4f_floor(s) _simd4f_floor(s) static inline graphene_simd4f_t _simd4f_ceil (const graphene_simd4f_t s) { +#if defined(GRAPHENE_USE_SSE4_1) + return _mm_ceil_ps (s); +#else const float __ceil_x = ceilf (graphene_simd4f_get_x (s)); const float __ceil_y = ceilf (graphene_simd4f_get_y (s)); const float __ceil_z = ceilf (graphene_simd4f_get_z (s)); const float __ceil_w = ceilf (graphene_simd4f_get_w (s)); return graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); +#endif } static inline graphene_simd4f_t _simd4f_floor (const graphene_simd4f_t s) { +#if defined(GRAPHENE_USE_SSE4_1) + return _mm_floor_ps (s); +#else const float __floor_x = floorf (graphene_simd4f_get_x (s)); const float __floor_y = floorf (graphene_simd4f_get_y (s)); const float __floor_z = floorf (graphene_simd4f_get_z (s)); const float __floor_w = floorf (graphene_simd4f_get_w (s)); return graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); +#endif } #else /* SSE intrinsics-not GCC or Visual Studio */