Skip to content

Commit

Permalink
ggml: Added run-time detection of neon, i8mm and sve
Browse files Browse the repository at this point in the history
Adds run-time detection of the Arm instructions set features
neon, i8mm and sve for Linux and Apple build targets.
  • Loading branch information
eddnjjn committed Sep 17, 2024
1 parent d54c21d commit aab436c
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 34 deletions.
3 changes: 3 additions & 0 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -2479,6 +2479,9 @@ extern "C" {
GGML_API int ggml_cpu_has_cann (void);
GGML_API int ggml_cpu_has_llamafile (void);

// get the sve vector length in bytes
GGML_API int ggml_cpu_get_sve_cnt(void);

//
// Internal types and functions exposed for tests and benchmarks
//
Expand Down
28 changes: 14 additions & 14 deletions ggml/src/ggml-aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -546,8 +546,8 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
UNUSED(blocklen);

#if defined(__ARM_FEATURE_SVE)
if (ggml_sve_cnt_b == QK8_0) {
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
if (ggml_cpu_get_sve_cnt() == QK8_0) {
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_cpu_get_sve_cnt() == QK8_0)) &&
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
}
#endif
Expand Down Expand Up @@ -658,8 +658,8 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
UNUSED(blocklen);

#if defined(__ARM_FEATURE_SVE)
if (ggml_sve_cnt_b == QK8_0) {
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
if (ggml_cpu_get_sve_cnt() == QK8_0) {
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_cpu_get_sve_cnt() == QK8_0)) &&
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
}
#endif
Expand Down Expand Up @@ -776,7 +776,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
UNUSED(blocklen);

#if defined(__ARM_FEATURE_SVE) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
if (ggml_sve_cnt_b == QK8_0) {
if (ggml_cpu_get_sve_cnt() == QK8_0) {
const void * b_ptr = vx;
const void * a_ptr = vy;
float * res_ptr = s;
Expand Down Expand Up @@ -842,12 +842,12 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
return;
}
else if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
GGML_ASSERT((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
GGML_ASSERT((ggml_cpu_has_sve() && (ggml_cpu_get_sve_cnt() == QK8_0)) &&
"__ARM_FEATURE_SVE for vector size of 256-bits not defined, use the Q4_0_4_8 quantization format for optimal "
"performance");
}
else if (ggml_cpu_has_neon()) {
GGML_ASSERT(((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) || ggml_cpu_has_matmul_int8()) &&
GGML_ASSERT(((ggml_cpu_has_sve() && (ggml_cpu_get_sve_cnt() == QK8_0)) || ggml_cpu_has_matmul_int8()) &&
"__ARM_FEATURE_SVE for vector size of 256-bits and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 "
"quantization format for optimal performance");
}
Expand Down Expand Up @@ -997,8 +997,8 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
UNUSED(blocklen);

#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
if (ggml_sve_cnt_b == QK8_0) {
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
if (ggml_cpu_get_sve_cnt() == QK8_0) {
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_cpu_get_sve_cnt() == QK8_0)) &&
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
}
#endif
Expand Down Expand Up @@ -1518,8 +1518,8 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
UNUSED(blocklen);

#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
if (ggml_sve_cnt_b == QK8_0) {
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
if (ggml_cpu_get_sve_cnt() == QK8_0) {
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_cpu_get_sve_cnt() == QK8_0)) &&
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
}
#endif
Expand Down Expand Up @@ -1980,7 +1980,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
UNUSED(blocklen);

#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
if (ggml_sve_cnt_b == QK8_0) {
if (ggml_cpu_get_sve_cnt() == QK8_0) {
const void * b_ptr = vx;
const void * a_ptr = vy;
float * res_ptr = s;
Expand Down Expand Up @@ -2391,12 +2391,12 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
return;
}
else if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
GGML_ASSERT((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
GGML_ASSERT((ggml_cpu_has_sve() && (ggml_cpu_get_sve_cnt() == QK8_0)) &&
"__ARM_FEATURE_SVE for vector size of 256-bits not defined, use the Q4_0_4_8 quantization format for optimal "
"performance");
}
else if (ggml_cpu_has_neon()) {
GGML_ASSERT(((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) || ggml_cpu_has_matmul_int8()) &&
GGML_ASSERT(((ggml_cpu_has_sve() && (ggml_cpu_get_sve_cnt() == QK8_0)) || ggml_cpu_has_matmul_int8()) &&
"__ARM_FEATURE_SVE for vector size of 256-bits and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 "
"quantization format for optimal performance");
}
Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-quants.c
Original file line number Diff line number Diff line change
Expand Up @@ -4012,7 +4012,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
svfloat32_t sumv0 = svdup_n_f32(0.0f);
svfloat32_t sumv1 = svdup_n_f32(0.0f);

const int vector_length = ggml_sve_cnt_b*8;
const int vector_length = ggml_cpu_get_sve_cnt()*8;

// VLA Implementation using switch case
switch (vector_length) {
Expand Down Expand Up @@ -5596,7 +5596,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
svfloat32_t sumv0 = svdup_n_f32(0.0f);
svfloat32_t sumv1 = svdup_n_f32(0.0f);

const int vector_length = ggml_sve_cnt_b*8;
const int vector_length = ggml_cpu_get_sve_cnt()*8;

//VLA Implemenation for SVE
switch (vector_length) {
Expand Down
4 changes: 0 additions & 4 deletions ggml/src/ggml-quants.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,6 @@ void iq2xs_free_impl(enum ggml_type type);
void iq3xs_init_impl(int grid_size);
void iq3xs_free_impl(int grid_size);

#if defined(__ARM_FEATURE_SVE)
extern int ggml_sve_cnt_b;
#endif

#ifdef __cplusplus
}
#endif
98 changes: 84 additions & 14 deletions ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,15 @@
#include <unistd.h>
#endif

#if defined(__ARM_FEATURE_SVE)
int ggml_sve_cnt_b = 0;
#if defined(__aarch64__)
struct ggml_aarch64_features_type {
int has_neon;
int has_i8mm;
int has_sve;
int sve_cnt;
} ggml_aarch64_features = {-1, -1, -1, 0};
#endif

#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
#undef GGML_USE_LLAMAFILE
#endif
Expand Down Expand Up @@ -3643,6 +3649,65 @@ static inline int ggml_up(int n, int m) {

////////////////////////////////////////////////////////////////////////////////

#if defined(__aarch64__)

#if defined(__linux__)
#include <sys/auxv.h>
#elif defined(__APPLE__)
#include <sys/sysctl.h>
#endif

static void ggml_init_aarch64_features(void) {
#if defined(__linux__)
uint32_t hwcap = getauxval(AT_HWCAP);
uint32_t hwcap2 = getauxval(AT_HWCAP2);

ggml_aarch64_features.has_neon = !!(hwcap & HWCAP_ASIMD);
ggml_aarch64_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
ggml_aarch64_features.has_sve = !!(hwcap & HWCAP_SVE);
#if defined(__ARM_FEATURE_SVE)
ggml_aarch64_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
#endif
#elif defined(__APPLE__)
int oldp = 0;
size_t size = sizeof(oldp);
if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) {
oldp = 0;
}
ggml_aarch64_features.has_neon = oldp;

if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
oldp = 0;
}
ggml_aarch64_features.has_i8mm = oldp;

ggml_aarch64_features.has_sve = 0;
ggml_aarch64_features.sve_cnt = 0;
#else
// Run-time CPU feature detection not implemented for this platform, fallback to compile time
#if defined(__ARM_NEON)
ggml_aarch64_features.has_neon = 1;
#else
ggml_aarch64_features.has_neon = 0;
#endif

#if defined(__ARM_FEATURE_MATMUL_INT8)
ggml_aarch64_features.has_i8mm = 1;
#else
ggml_aarch64_features.has_i8mm = 0;
#endif

#if defined(__ARM_FEATURE_SVE)
ggml_aarch64_features.has_sve = 1;
ggml_aarch64_features.sve_cnt = 16;
#else
ggml_aarch64_features.has_sve = 0;
ggml_aarch64_features.sve_cnt = 0;
#endif
#endif
}
#endif

struct ggml_context * ggml_init(struct ggml_init_params params) {
// make this function thread safe
ggml_critical_section_start();
Expand Down Expand Up @@ -3693,6 +3758,10 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
}

#if defined(__aarch64__)
ggml_init_aarch64_features();
#endif

is_first_call = false;
}

Expand Down Expand Up @@ -3741,12 +3810,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {

GGML_ASSERT_ALIGNED(ctx->mem_buffer);

#if defined(__ARM_FEATURE_SVE)
if (!ggml_sve_cnt_b) {
ggml_sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
}
#endif

GGML_PRINT_DEBUG("%s: context initialized\n", __func__);

ggml_critical_section_end();
Expand Down Expand Up @@ -23265,16 +23328,16 @@ int ggml_cpu_has_fma(void) {
}

int ggml_cpu_has_neon(void) {
#if defined(__ARM_NEON)
return 1;
#if defined(__aarch64__)
return ggml_aarch64_features.has_neon;
#else
return 0;
#endif
}

int ggml_cpu_has_sve(void) {
#if defined(__ARM_FEATURE_SVE)
return 1;
#if defined(__aarch64__)
return ggml_aarch64_features.has_sve;
#else
return 0;
#endif
Expand Down Expand Up @@ -23421,11 +23484,18 @@ int ggml_cpu_has_vsx(void) {
}

int ggml_cpu_has_matmul_int8(void) {
#if defined(__ARM_FEATURE_MATMUL_INT8)
return 1;
#if defined(__aarch64__)
return ggml_aarch64_features.has_i8mm;
#else
return 0;
#endif
}

int ggml_cpu_get_sve_cnt(void) {
#if defined(__aarch64__)
return ggml_aarch64_features.sve_cnt;
#else
return 0;
#endif
}
////////////////////////////////////////////////////////////////////////////////

0 comments on commit aab436c

Please sign in to comment.