Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

arm: add support for detecting NEON (WIP) #144

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions API_CHANGES
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
This file is the short summary of the API changes:

10.12.2022 - Non-backward compatible
SLJIT_HAS_SSE2 renamed to SLJIT_HAS_SIMD

10.11.2022 - Non-backward compatible
Extract the pre/post update operations from
sljit_emit_mem to sljit_emit_mem_update
Expand Down
10 changes: 6 additions & 4 deletions sljit_src/sljitConfigInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -580,10 +580,12 @@ typedef double sljit_f64;

#endif /* !SLJIT_FPU_UNALIGNED */

#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
/* Auto detect SSE2 support using CPUID.
On 64 bit x86 cpus, sse2 must be present. */
#define SLJIT_DETECT_SSE2 1
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || \
(defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
/* Auto detect availability of SSE2 (using CPUID) or NEON.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be worth doing the checks here to avoid adding the "detect" support for cases that are known not to require it (ex: OpenBSD or macOS)

Mandatory on 64 bit x86 or ARM (aarch64) cpus.
Optionally available for 32 bit since pentium or armv7. */
#define SLJIT_DETECT_SIMD 1
#endif

/*****************************************************************************************/
Expand Down
6 changes: 4 additions & 2 deletions sljit_src/sljitLir.h
Original file line number Diff line number Diff line change
Expand Up @@ -632,9 +632,11 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler
/* [Emulated] Prefetch instruction is available (emulated as a nop). */
#define SLJIT_HAS_PREFETCH 7

#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|| (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM)
/* [Not emulated] SSE2 support is available on x86. */
#define SLJIT_HAS_SSE2 100
/* [Not emulated] NEON support is available on ARM. */
#define SLJIT_HAS_SIMD 100
#endif

SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type);
Expand Down
70 changes: 70 additions & 0 deletions sljit_src/sljitNativeARM_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,69 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
#endif
}

#define CPU_FEATURE_NEON (1UL << 12)
static unsigned long cpu_feature_list;

#if (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD)
#if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)
#include <sys/auxv.h>
carenas marked this conversation as resolved.
Show resolved Hide resolved
#endif

#ifdef __NetBSD__
carenas marked this conversation as resolved.
Show resolved Hide resolved
#include <sys/param.h>
#include <sys/sysctl.h>
#endif

static void get_cpu_features(void)
{
if (cpu_feature_list)
return;

#if defined(__ARM_ARCH) && __ARM_ARCH == 8
/* TODO: confirm if optional with armv9 */
Copy link
Contributor Author

@carenas carenas Dec 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if true (because NEON is being "replaced" by SVE2, then this might also apply to the 64 bit version and therefore some of it might need to move to a "common" file

/* mandatory for armv8 */
cpu_feature_list = CPU_FEATURE_NEON;
#elif defined(HAVE_GETAUXVAL)
cpu_feature_list = getauxval(AT_HWCAP);
if (errno == ENOENT)
cpu_feature_list = 1;
#elif defined(__OpenBSD__)
/* required feature */
cpu_feature_list = CPU_FEATURE_NEON;
#elif defined(__APPLE__) && defined (__ARM_NEON__)
cpu_feature_list = CPU_FEATURE_NEON;
#elif defined(_WIN32)
#ifndef FP_ARM_NEON_INSTRUCTIONS_AVAILABLE
#define FP_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
#endif
if (IsProcessorFeaturePresent(FP_ARM_NEON_INSTRUCTIONS_AVAILABLE))
cpu_feature_list = CPU_FEATURE_NEON;
#elif defined(__FreeBSD__) && defined(HAVE_ELF_AUX_INFO)
unsigned long buf;

if (elf_aux_info(AT_HWCAP, (void *)&buf, (int)sizeof(buf)))
cpu_feature_list = 1;
return;
}

if (buf & CPU_FEATURE_NEON)
cpu_feature_list = buf;
#elif defined(__NetBSD__) || defined(__FreeBSD__)
int neon;
size_t len = sizeof(int);

if (sysctlbyname("machdep.neon_present", &neon, &len, NULL, 0) < 0) {
cpu_feature_list = 1;
return;
}

if (neon)
cpu_feature_list = CPU_FEATURE_NEON;
#endif
}

#endif /* SLJIT_DETECT_SIMD */

/* Last register + 1. */
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
Expand Down Expand Up @@ -973,6 +1036,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
return 2;
#endif

case SLJIT_HAS_SIMD:
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) && \
(defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD)
if (!cpu_feature_list)
get_cpu_features();
#endif /* SLJIT_CONFIG_ARM_V7 && SLJIT_DETECT_SIMD */
return (cpu_feature_list & CPU_FEATURE_NEON) != 0;
default:
return 0;
}
Expand Down
1 change: 1 addition & 0 deletions sljit_src/sljitNativeARM_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
return 1;
#endif

case SLJIT_HAS_SIMD:
case SLJIT_HAS_CLZ:
case SLJIT_HAS_CTZ:
case SLJIT_HAS_ROT:
Expand Down
18 changes: 9 additions & 9 deletions sljit_src/sljitNativeX86_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
built-in CPU features. Therefore they can be overwritten by different threads
if they detect the CPU features in the same time. */
#define CPU_FEATURE_DETECTED 0x001
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
#if (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD)
#define CPU_FEATURE_SSE2 0x002
#endif
#define CPU_FEATURE_LZCNT 0x004
Expand Down Expand Up @@ -444,7 +444,7 @@ static void get_cpu_features(void)

#endif /* _MSC_VER && _MSC_VER >= 1400 */

#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
#if (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD)
if (value & 0x4000000)
feature_list |= CPU_FEATURE_SSE2;
#endif
Expand Down Expand Up @@ -738,13 +738,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
case SLJIT_HAS_FPU:
#ifdef SLJIT_IS_FPU_AVAILABLE
return SLJIT_IS_FPU_AVAILABLE;
#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
#elif (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD)
if (cpu_feature_list == 0)
get_cpu_features();
return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
#else /* SLJIT_DETECT_SSE2 */
#else /* !SLJIT_DETECT_SIMD */
return 1;
#endif /* SLJIT_DETECT_SSE2 */
#endif /* SLJIT_IS_FPU_AVAILABLE */

#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
case SLJIT_HAS_VIRTUAL_REGISTERS:
Expand Down Expand Up @@ -772,14 +772,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
case SLJIT_HAS_PREFETCH:
return 1;

case SLJIT_HAS_SSE2:
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
case SLJIT_HAS_SIMD:
#if (defined SLJIT_DETECT_SIMD && SLJIT_DETECT_SIMD)
if (cpu_feature_list == 0)
get_cpu_features();
return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
#else /* !SLJIT_DETECT_SSE2 */
#else /* !SLJIT_DETECT_SIMD */
return 1;
#endif /* SLJIT_DETECT_SSE2 */
#endif /* SLJIT_DETECT_SIMD */

default:
return 0;
Expand Down
21 changes: 19 additions & 2 deletions test_src/sljitTest.c
Original file line number Diff line number Diff line change
Expand Up @@ -10636,10 +10636,18 @@ static void test85(void)

int sljit_test(int argc, char* argv[])
{
sljit_s32 has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0');
int fpu;
int simd = 0;
char features[24];
int has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0');
verbose = has_arg && argv[1][1] == 'v';
silent = has_arg && argv[1][1] == 's';

#if (defined(SLJIT_CONFIG_ARM) && SLJIT_CONFIG_ARM) \
|| (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
simd = sljit_has_cpu_feature(SLJIT_HAS_SIMD);
#endif

if (!verbose && !silent)
printf("Pass -v to enable verbose, -s to disable this hint.\n\n");

Expand Down Expand Up @@ -10743,7 +10751,16 @@ int sljit_test(int argc, char* argv[])
printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " ");
else
printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are " COLOR_RED "FAILED" COLOR_DEFAULT " ", TEST_COUNT - successful_tests, (TEST_COUNT - successful_tests) * 100 / TEST_COUNT);
printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "%s\n", sljit_get_platform_name(), sljit_has_cpu_feature(SLJIT_HAS_FPU) ? " (with fpu)" : " (without fpu)");

fpu = sljit_has_cpu_feature(SLJIT_HAS_FPU);
if (simd && fpu)
strcpy(features, " (with: fpu, simd)");
else if (fpu)
strcpy(features, " (with fpu)");
else
strcpy(features, " (without fpu)");
Comment on lines +10756 to +10761
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this might misrepreset the case where (!fpu && simd), but that is by design as that combination (while possible) is not something that is available in the market yet.

SLJIT_IS_FPU_AVAILABLE and its usecases is also missing and worth discussing, but IMHO might be able to wait until SIMD support is added to sljit proper.


printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "%s\n", sljit_get_platform_name(), features);

return TEST_COUNT - successful_tests;

Expand Down