Skip to content

Commit

Permalink
Implement some vec instructions with NEON
Browse files Browse the repository at this point in the history
  • Loading branch information
nimelehin committed Dec 13, 2020
1 parent dccc553 commit 89dfac7
Showing 1 changed file with 24 additions and 0 deletions.
24 changes: 24 additions & 0 deletions emu/vec.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#ifdef __ARM_NEON__
#include <arm_neon.h>
#endif
#include <math.h>
#include <string.h>

Expand Down Expand Up @@ -91,8 +94,15 @@ void vec_shiftr_q128(NO_CPU, union xmm_reg *amount, union xmm_reg *dst) {
}

void vec_add_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
#ifdef __ARM_NEON__
uint8x16_t neon_dst = vld1q_u8(dst->u8);
uint8x16_t neon_src = vld1q_u8(src->u8);
uint8x16_t neon_res = vaddq_u8(neon_dst, neon_src);
vst1q_u8(dst->u8, neon_res);
#else
for (unsigned i = 0; i < array_size(src->u8); i++)
dst->u8[i] += src->u8[i];
#endif
}
void vec_add_d128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u32); i++)
Expand Down Expand Up @@ -142,9 +152,16 @@ void vec_xor64(NO_CPU, union mm_reg *src, union mm_reg *dst) {
}

void vec_min_ub128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
#ifdef __ARM_NEON__
uint8x16_t neon_dst = vld1q_u8(dst->u8);
uint8x16_t neon_src = vld1q_u8(src->u8);
uint8x16_t neon_res = vminq_u8(neon_dst, neon_src);
vst1q_u8(dst->u8, neon_res);
#else
for (unsigned i = 0; i < array_size(src->u8); i++)
if (src->u8[i] < dst->u8[i])
dst->u8[i] = src->u8[i];
#endif
}

static bool cmpd(double a, double b, int type) {
Expand Down Expand Up @@ -253,8 +270,15 @@ void vec_shuffle_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint
}

void vec_compare_eqb128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
#ifdef __ARM_NEON__
uint8x16_t neon_dst = vld1q_u8(dst->u8);
uint8x16_t neon_src = vld1q_u8(src->u8);
uint8x16_t neon_res = vceqq_u8(neon_dst, neon_src);
vst1q_u8(dst->u8, neon_res);
#else
for (unsigned i = 0; i < array_size(src->u8); i++)
dst->u8[i] = dst->u8[i] == src->u8[i] ? ~0 : 0;
#endif
}
void vec_compare_eqd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u32); i++)
Expand Down

0 comments on commit 89dfac7

Please sign in to comment.