Skip to content

Commit

Permalink
arm: add support for detecting SIMD (NEON) availability (WIP)
Browse files Browse the repository at this point in the history
Requires getauxval() which is available at least in Linux/Android
with recent versions of the libc and that is therefore behind a
configure like macro.

A similar function from FreeBSD>=12, Windows and NetBSD is used
for each case.

While at it, consolidate the code to use the same externally
visible flag that is used in x86 for SSE2.
  • Loading branch information
carenas committed Dec 11, 2022
1 parent 88e0dcf commit cf0effb
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 17 deletions.
3 changes: 3 additions & 0 deletions API_CHANGES
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
This file is the short summary of the API changes:

10.12.2022 - Non-backward compatible
SLJIT_HAS_SSE2 renamed to SLJIT_HAS_SIMD

10.11.2022 - Non-backward compatible
Extract the pre/post update operations from
sljit_emit_mem to sljit_emit_mem_update
Expand Down
10 changes: 6 additions & 4 deletions sljit_src/sljitConfigInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -580,10 +580,12 @@ typedef double sljit_f64;

#endif /* !SLJIT_FPU_UNALIGNED */

#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
/* Auto detect SSE2 support using CPUID.
On 64 bit x86 cpus, sse2 must be present. */
#define SLJIT_DETECT_SSE2 1
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || \
(defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
/* Auto detect availability of SSE2 (using CPUID) or NEON.
Mandatory on 64 bit x86 or ARM (aarch64) cpus.
Optionally available for 32 bit since pentium or armv7. */
#define SLJIT_DETECT_SIMD 1
#endif

/*****************************************************************************************/
Expand Down
6 changes: 4 additions & 2 deletions sljit_src/sljitLir.h
Original file line number Diff line number Diff line change
Expand Up @@ -632,9 +632,11 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler
/* [Emulated] Prefetch instruction is available (emulated as a nop). */
#define SLJIT_HAS_PREFETCH 7

#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|| (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM)
/* [Not emulated] SSE2 support is available on x86. */
#define SLJIT_HAS_SSE2 100
/* [Not emulated] NEON support is available on ARM. */
#define SLJIT_HAS_SIMD 100
#endif

SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type);
Expand Down
70 changes: 70 additions & 0 deletions sljit_src/sljitNativeARM_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,69 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
#endif
}

#define CPU_FEATURE_NEON (1UL << 12)
static unsigned long cpu_feature_list;

#if (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD)
#if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)
#include <sys/auxv.h>
#endif

#ifdef __NetBSD__
#include <sys/param.h>
#include <sys/sysctl.h>
#endif

static void get_cpu_features(void)
{
if (cpu_feature_list)
return;

#if defined(__ARM_ARCH) && __ARM_ARCH == 8
/* TODO: confirm if optional with armv9 */
/* mandatory for armv8 */
cpu_feature_list = CPU_FEATURE_NEON;
#elif defined(HAVE_GETAUXVAL)
cpu_feature_list = getauxval(AT_HWCAP);
if (errno == ENOENT)
cpu_feature_list = 1;
#elif defined(__OpenBSD__)
/* required feature */
cpu_feature_list = CPU_FEATURE_NEON;
#elif defined(__APPLE__) && defined (__ARM_NEON__)
cpu_feature_list = CPU_FEATURE_NEON;
#elif defined(_WIN32)
#ifndef FP_ARM_NEON_INSTRUCTIONS_AVAILABLE
#define FP_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
#endif
if (IsProcessorFeaturePresent(FP_ARM_NEON_INSTRUCTIONS_AVAILABLE))
cpu_feature_list = CPU_FEATURE_NEON;
#elif defined(__FreeBSD__) && defined(HAVE_ELF_AUX_INFO)
unsigned long buf;

if (elf_aux_info(AT_HWCAP, (void *)&buf, (int)sizeof(buf)))
cpu_feature_list = 1;
return;
}

if (buf & CPU_FEATURE_NEON)
cpu_feature_list = buf;
#elif defined(__NetBSD__) || defined(__FreeBSD__)
int neon;
size_t len = sizeof(int);

if (sysctlbyname("machdep.neon_present", &neon, &len, NULL, 0) < 0) {
cpu_feature_list = 1;
return;
}

if (neon)
cpu_feature_list = CPU_FEATURE_NEON;
#endif
}

#endif /* SLJIT_DETECT_SIMD */

/* Last register + 1. */
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
Expand Down Expand Up @@ -973,6 +1036,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
return 2;
#endif

case SLJIT_HAS_SIMD:
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) && \
(defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD)
if (!cpu_feature_list)
get_cpu_features();
#endif /* SLJIT_CONFIG_ARM_V7 && SLJIT_DETECT_SIMD */
return (cpu_feature_list & CPU_FEATURE_NEON) != 0;
default:
return 0;
}
Expand Down
1 change: 1 addition & 0 deletions sljit_src/sljitNativeARM_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
return 1;
#endif

case SLJIT_HAS_SIMD:
case SLJIT_HAS_CLZ:
case SLJIT_HAS_CTZ:
case SLJIT_HAS_ROT:
Expand Down
18 changes: 9 additions & 9 deletions sljit_src/sljitNativeX86_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
built-in CPU features. Therefore they can be overwritten by different threads
if they detect the CPU features in the same time. */
#define CPU_FEATURE_DETECTED 0x001
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
#if (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD)
#define CPU_FEATURE_SSE2 0x002
#endif
#define CPU_FEATURE_LZCNT 0x004
Expand Down Expand Up @@ -444,7 +444,7 @@ static void get_cpu_features(void)

#endif /* _MSC_VER && _MSC_VER >= 1400 */

#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
#if (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD)
if (value & 0x4000000)
feature_list |= CPU_FEATURE_SSE2;
#endif
Expand Down Expand Up @@ -738,13 +738,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
case SLJIT_HAS_FPU:
#ifdef SLJIT_IS_FPU_AVAILABLE
return SLJIT_IS_FPU_AVAILABLE;
#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
#elif (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD)
if (cpu_feature_list == 0)
get_cpu_features();
return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
#else /* SLJIT_DETECT_SSE2 */
#else /* !SLJIT_DETECT_SIMD */
return 1;
#endif /* SLJIT_DETECT_SSE2 */
#endif /* SLJIT_IS_FPU_AVAILABLE */

#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
case SLJIT_HAS_VIRTUAL_REGISTERS:
Expand Down Expand Up @@ -772,14 +772,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
case SLJIT_HAS_PREFETCH:
return 1;

case SLJIT_HAS_SSE2:
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
case SLJIT_HAS_SIMD:
#if (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD)
if (cpu_feature_list == 0)
get_cpu_features();
return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
#else /* !SLJIT_DETECT_SSE2 */
#else /* !SLJIT_DETECT_SIMD */
return 1;
#endif /* SLJIT_DETECT_SSE2 */
#endif /* SLJIT_DETECT_SIMD */

default:
return 0;
Expand Down
21 changes: 19 additions & 2 deletions test_src/sljitTest.c
Original file line number Diff line number Diff line change
Expand Up @@ -10636,10 +10636,18 @@ static void test85(void)

int sljit_test(int argc, char* argv[])
{
sljit_s32 has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0');
int fpu;
int simd = 0;
char features[24];
int has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0');
verbose = has_arg && argv[1][1] == 'v';
silent = has_arg && argv[1][1] == 's';

#if (defined(SLJIT_CONFIG_ARM) && SLJIT_CONFIG_ARM) \
|| (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
simd = sljit_has_cpu_feature(SLJIT_HAS_SIMD);
#endif

if (!verbose && !silent)
printf("Pass -v to enable verbose, -s to disable this hint.\n\n");

Expand Down Expand Up @@ -10743,7 +10751,16 @@ int sljit_test(int argc, char* argv[])
printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " ");
else
printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are " COLOR_RED "FAILED" COLOR_DEFAULT " ", TEST_COUNT - successful_tests, (TEST_COUNT - successful_tests) * 100 / TEST_COUNT);
printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "%s\n", sljit_get_platform_name(), sljit_has_cpu_feature(SLJIT_HAS_FPU) ? " (with fpu)" : " (without fpu)");

fpu = sljit_has_cpu_feature(SLJIT_HAS_FPU);
if (simd && fpu)
strcpy(features, " (with: fpu, simd)");
else if (fpu)
strcpy(features, " (with fpu)");
else
strcpy(features, " (without fpu)");

printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "%s\n", sljit_get_platform_name(), features);

return TEST_COUNT - successful_tests;

Expand Down

0 comments on commit cf0effb

Please sign in to comment.