From cf0effb520432d4d5d3ee1509b3bcb851e3f1fe7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Thu, 8 Dec 2022 17:09:37 -0800 Subject: [PATCH] arm: add support for detecting SIMD (NEON) availability (WIP) Requires getauxval() which is available at least in Linux/Android with recent versions of the libc and that is therefore behind a configure like macro. A similar function from FreeBSD>=12, Windows and NetBSD is used for each case. While at it, consolidate the code to use the same externally visible flag that is used in x86 for SSE2. --- API_CHANGES | 3 ++ sljit_src/sljitConfigInternal.h | 10 +++-- sljit_src/sljitLir.h | 6 ++- sljit_src/sljitNativeARM_32.c | 70 +++++++++++++++++++++++++++++++ sljit_src/sljitNativeARM_64.c | 1 + sljit_src/sljitNativeX86_common.c | 18 ++++---- test_src/sljitTest.c | 21 +++++++++- 7 files changed, 112 insertions(+), 17 deletions(-) diff --git a/API_CHANGES b/API_CHANGES index 55f05130..880023aa 100644 --- a/API_CHANGES +++ b/API_CHANGES @@ -1,5 +1,8 @@ This file is the short summary of the API changes: +10.12.2022 - Non-backward compatible + SLJIT_HAS_SSE2 renamed to SLJIT_HAS_SIMD + 10.11.2022 - Non-backward compatible Extract the pre/post update operations from sljit_emit_mem to sljit_emit_mem_update diff --git a/sljit_src/sljitConfigInternal.h b/sljit_src/sljitConfigInternal.h index b4c06e17..7c0a47c3 100644 --- a/sljit_src/sljitConfigInternal.h +++ b/sljit_src/sljitConfigInternal.h @@ -580,10 +580,12 @@ typedef double sljit_f64; #endif /* !SLJIT_FPU_UNALIGNED */ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -/* Auto detect SSE2 support using CPUID. - On 64 bit x86 cpus, sse2 must be present. */ -#define SLJIT_DETECT_SSE2 1 +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || \ + (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +/* Auto detect availability of SSE2 (using CPUID) or NEON. + Mandatory on 64 bit x86 or ARM (aarch64) cpus. + Optionally available for 32 bit since pentium or armv7. */ +#define SLJIT_DETECT_SIMD 1 #endif /*****************************************************************************************/ diff --git a/sljit_src/sljitLir.h b/sljit_src/sljitLir.h index e2db2149..df6299b0 100644 --- a/sljit_src/sljitLir.h +++ b/sljit_src/sljitLir.h @@ -632,9 +632,11 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler /* [Emulated] Prefetch instruction is available (emulated as a nop). */ #define SLJIT_HAS_PREFETCH 7 -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) /* [Not emulated] SSE2 support is available on x86. */ -#define SLJIT_HAS_SSE2 100 +/* [Not emulated] NEON support is available on ARM. */ +#define SLJIT_HAS_SIMD 100 #endif SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type); diff --git a/sljit_src/sljitNativeARM_32.c b/sljit_src/sljitNativeARM_32.c index 54b8ade0..183ce9d0 100644 --- a/sljit_src/sljitNativeARM_32.c +++ b/sljit_src/sljitNativeARM_32.c @@ -41,6 +41,69 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) #endif } +#define CPU_FEATURE_NEON (1UL << 12) +static unsigned long cpu_feature_list; + +#if (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD) +#if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO) +#include +#endif + +#ifdef __NetBSD__ +#include +#include +#endif + +static void get_cpu_features(void) +{ + if (cpu_feature_list) + return; + +#if defined(__ARM_ARCH) && __ARM_ARCH == 8 + /* TODO: confirm if optional with armv9 */ + /* mandatory for armv8 */ + cpu_feature_list = CPU_FEATURE_NEON; +#elif defined(HAVE_GETAUXVAL) + cpu_feature_list = getauxval(AT_HWCAP); + if (errno == ENOENT) + cpu_feature_list = 1; +#elif defined(__OpenBSD__) + /* required feature */ + cpu_feature_list = CPU_FEATURE_NEON; +#elif defined(__APPLE__) && defined (__ARM_NEON__) + cpu_feature_list = CPU_FEATURE_NEON; +#elif defined(_WIN32) +#ifndef FP_ARM_NEON_INSTRUCTIONS_AVAILABLE +#define FP_ARM_NEON_INSTRUCTIONS_AVAILABLE 19 +#endif + if (IsProcessorFeaturePresent(FP_ARM_NEON_INSTRUCTIONS_AVAILABLE)) + cpu_feature_list = CPU_FEATURE_NEON; +#elif defined(__FreeBSD__) && defined(HAVE_ELF_AUX_INFO) + unsigned long buf; + + if (elf_aux_info(AT_HWCAP, (void *)&buf, (int)sizeof(buf))) + cpu_feature_list = 1; + return; + } + + if (buf & CPU_FEATURE_NEON) + cpu_feature_list = buf; +#elif defined(__NetBSD__) || defined(__FreeBSD__) + int neon; + size_t len = sizeof(int); + + if (sysctlbyname("machdep.neon_present", &neon, &len, NULL, 0) < 0) { + cpu_feature_list = 1; + return; + } + + if (neon) + cpu_feature_list = CPU_FEATURE_NEON; +#endif +} + +#endif /* SLJIT_DETECT_SIMD */ + /* Last register + 1. */ #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) @@ -973,6 +1036,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) return 2; #endif + case SLJIT_HAS_SIMD: +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) && \ + (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD) + if (!cpu_feature_list) + get_cpu_features(); +#endif /* SLJIT_CONFIG_ARM_V7 && SLJIT_DETECT_SIMD */ + return (cpu_feature_list & CPU_FEATURE_NEON) != 0; default: return 0; } diff --git a/sljit_src/sljitNativeARM_64.c b/sljit_src/sljitNativeARM_64.c index 89f747e7..2fa5e7a2 100644 --- a/sljit_src/sljitNativeARM_64.c +++ b/sljit_src/sljitNativeARM_64.c @@ -392,6 +392,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) return 1; #endif + case SLJIT_HAS_SIMD: case SLJIT_HAS_CLZ: case SLJIT_HAS_CTZ: case SLJIT_HAS_ROT: diff --git a/sljit_src/sljitNativeX86_common.c b/sljit_src/sljitNativeX86_common.c index 2d6f4a81..801215c9 100644 --- a/sljit_src/sljitNativeX86_common.c +++ b/sljit_src/sljitNativeX86_common.c @@ -294,7 +294,7 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { built-in CPU features. Therefore they can be overwritten by different threads if they detect the CPU features in the same time. */ #define CPU_FEATURE_DETECTED 0x001 -#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +#if (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD) #define CPU_FEATURE_SSE2 0x002 #endif #define CPU_FEATURE_LZCNT 0x004 @@ -444,7 +444,7 @@ static void get_cpu_features(void) #endif /* _MSC_VER && _MSC_VER >= 1400 */ -#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +#if (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD) if (value & 0x4000000) feature_list |= CPU_FEATURE_SSE2; #endif @@ -738,13 +738,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_FPU: #ifdef SLJIT_IS_FPU_AVAILABLE return SLJIT_IS_FPU_AVAILABLE; -#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +#elif (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD) if (cpu_feature_list == 0) get_cpu_features(); return (cpu_feature_list & CPU_FEATURE_SSE2) != 0; -#else /* SLJIT_DETECT_SSE2 */ +#else /* !SLJIT_DETECT_SIMD */ return 1; -#endif /* SLJIT_DETECT_SSE2 */ +#endif /* SLJIT_IS_FPU_AVAILABLE */ #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) case SLJIT_HAS_VIRTUAL_REGISTERS: @@ -772,14 +772,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_PREFETCH: return 1; - case SLJIT_HAS_SSE2: -#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + case SLJIT_HAS_SIMD: +#if (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD) if (cpu_feature_list == 0) get_cpu_features(); return (cpu_feature_list & CPU_FEATURE_SSE2) != 0; -#else /* !SLJIT_DETECT_SSE2 */ +#else /* !SLJIT_DETECT_SIMD */ return 1; -#endif /* SLJIT_DETECT_SSE2 */ +#endif /* SLJIT_DETECT_SIMD */ default: return 0; diff --git a/test_src/sljitTest.c b/test_src/sljitTest.c index adf6a4b0..64e50db8 100644 --- a/test_src/sljitTest.c +++ b/test_src/sljitTest.c @@ -10636,10 +10636,18 @@ static void test85(void) int sljit_test(int argc, char* argv[]) { - sljit_s32 has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0'); + int fpu; + int simd = 0; + char features[24]; + int has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0'); verbose = has_arg && argv[1][1] == 'v'; silent = has_arg && argv[1][1] == 's'; +#if (defined(SLJIT_CONFIG_ARM) && SLJIT_CONFIG_ARM) \ + || (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + simd = sljit_has_cpu_feature(SLJIT_HAS_SIMD); +#endif + if (!verbose && !silent) printf("Pass -v to enable verbose, -s to disable this hint.\n\n"); @@ -10743,7 +10751,16 @@ int sljit_test(int argc, char* argv[]) printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " "); else printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are " COLOR_RED "FAILED" COLOR_DEFAULT " ", TEST_COUNT - successful_tests, (TEST_COUNT - successful_tests) * 100 / TEST_COUNT); - printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "%s\n", sljit_get_platform_name(), sljit_has_cpu_feature(SLJIT_HAS_FPU) ? " (with fpu)" : " (without fpu)"); + + fpu = sljit_has_cpu_feature(SLJIT_HAS_FPU); + if (simd && fpu) + strcpy(features, " (with: fpu, simd)"); + else if (fpu) + strcpy(features, " (with fpu)"); + else + strcpy(features, " (without fpu)"); + + printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "%s\n", sljit_get_platform_name(), features); return TEST_COUNT - successful_tests;