Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

target/ppc: Drop VECTOR_FOR_INORDER_I #95

Open
wants to merge 6 commits into
base: ferst-tcg-config_128
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions target/ppc/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -307,9 +307,9 @@ DEF_HELPER_FLAGS_1(vclzlsbb, TCG_CALL_NO_RWG, tl, avr)
DEF_HELPER_FLAGS_1(vctzlsbb, TCG_CALL_NO_RWG, tl, avr)
DEF_HELPER_FLAGS_3(vbpermd, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(vbpermq, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(vpmsumb, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(vpmsumh, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(vpmsumw, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(VPMSUMB, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(VPMSUMH, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(VPMSUMW, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(VPMSUMD, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_2(vextublx, TCG_CALL_NO_RWG, tl, tl, avr)
DEF_HELPER_FLAGS_2(vextuhlx, TCG_CALL_NO_RWG, tl, tl, avr)
Expand Down
3 changes: 3 additions & 0 deletions target/ppc/insn32.decode
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,9 @@ DSCRIQ 111111 ..... ..... ...... 001100010 . @Z22_tap_sh_rc

## Vector Exclusive-OR-based Instructions

VPMSUMB 000100 ..... ..... ..... 10000001000 @VX
VPMSUMH 000100 ..... ..... ..... 10001001000 @VX
VPMSUMW 000100 ..... ..... ..... 10010001000 @VX
VPMSUMD 000100 ..... ..... ..... 10011001000 @VX

## Vector Integer Instructions
Expand Down
72 changes: 28 additions & 44 deletions target/ppc/int_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@ void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
prod[i] = (int32_t)a->s8[i] * b->u8[i];
}

VECTOR_FOR_INORDER_I(i, s32) {
for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
prod[4 * i + 2] + prod[4 * i + 3];
}
Expand All @@ -1029,7 +1029,7 @@ void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
prod[i] = a->s16[i] * b->s16[i];
}

VECTOR_FOR_INORDER_I(i, s32) {
for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
}
}
Expand All @@ -1045,7 +1045,7 @@ void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
prod[i] = (int32_t)a->s16[i] * b->s16[i];
}

VECTOR_FOR_INORDER_I(i, s32) {
for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];

r->u32[i] = cvtsdsw(t, &sat);
Expand All @@ -1065,7 +1065,7 @@ void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
prod[i] = a->u8[i] * b->u8[i];
}

VECTOR_FOR_INORDER_I(i, u32) {
for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
prod[4 * i + 2] + prod[4 * i + 3];
}
Expand All @@ -1080,7 +1080,7 @@ void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
prod[i] = a->u16[i] * b->u16[i];
}

VECTOR_FOR_INORDER_I(i, u32) {
for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
}
}
Expand All @@ -1096,7 +1096,7 @@ void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
prod[i] = a->u16[i] * b->u16[i];
}

VECTOR_FOR_INORDER_I(i, s32) {
for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];

r->u32[i] = cvtuduw(t, &sat);
Expand Down Expand Up @@ -1303,28 +1303,15 @@ XXGENPCV(XXGENPCVDM, 8)
#undef XXGENPCV_LE_COMP
#undef XXGENPCV

#if HOST_BIG_ENDIAN
#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
#define VBPERMD_INDEX(i) (i)
#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
#else
#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
#define VBPERMD_INDEX(i) (1 - i)
#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
#define EXTRACT_BIT(avr, i, index) \
(extract64((avr)->u64[1 - i], 63 - index, 1))
#endif

void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
int i, j;
ppc_avr_t result = { .u64 = { 0, 0 } };
VECTOR_FOR_INORDER_I(i, u64) {
for (i = 0; i < ARRAY_SIZE(result.u64); i++) {
for (j = 0; j < 8; j++) {
int index = VBPERMQ_INDEX(b, (i * 8) + j);
if (index < 64 && EXTRACT_BIT(a, i, index)) {
result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
int index = b->VsrB((i * 8) + j);
if (index < 64 && extract64(a->VsrD(i), 63 - index, 1)) {
result.VsrD(i) |= (0x80 >> j);
}
}
}
Expand All @@ -1336,12 +1323,12 @@ void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
int i;
uint64_t perm = 0;

VECTOR_FOR_INORDER_I(i, u8) {
int index = VBPERMQ_INDEX(b, i);
for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
int index = b->VsrB(i);

if (index < 128) {
uint64_t mask = (1ull << (63 - (index & 0x3F)));
if (a->u64[VBPERMQ_DW(index)] & mask) {
if (a->VsrD((index & 0x40) != 0) & mask) {
perm |= (0x8000 >> i);
}
}
Expand All @@ -1351,16 +1338,13 @@ void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
r->VsrD(1) = 0;
}

#undef VBPERMQ_INDEX
#undef VBPERMQ_DW

#define PMSUM(name, srcfld, trgfld, trgtyp) \
void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
{ \
int i, j; \
trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
\
VECTOR_FOR_INORDER_I(i, srcfld) { \
for (i = 0; i < ARRAY_SIZE(r->srcfld); i++) { \
prod[i] = 0; \
for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
if (a->srcfld[i] & (1ull << j)) { \
Expand All @@ -1369,14 +1353,14 @@ void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
} \
} \
\
VECTOR_FOR_INORDER_I(i, trgfld) { \
for (i = 0; i < ARRAY_SIZE(r->trgfld); i++) { \
r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
} \
}

PMSUM(vpmsumb, u8, u16, uint16_t)
PMSUM(vpmsumh, u16, u32, uint32_t)
PMSUM(vpmsumw, u32, u64, uint64_t)
PMSUM(VPMSUMB, u8, u16, uint16_t)
PMSUM(VPMSUMH, u16, u32, uint32_t)
PMSUM(VPMSUMW, u32, u64, uint64_t)
#undef PMSUM

void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
Expand Down Expand Up @@ -1411,8 +1395,8 @@ void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
const ppc_avr_t *x[2] = { b, a };
#endif

VECTOR_FOR_INORDER_I(i, u64) {
VECTOR_FOR_INORDER_I(j, u32) {
for (i = 0; i < ARRAY_SIZE(result.u64); i++) {
for (j = 0; j < ARRAY_SIZE(result.u32); j++) {
uint32_t e = x[i]->u32[j];

result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
Expand All @@ -1433,7 +1417,7 @@ void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
ppc_avr_t *a0 = PKBIG ? a : b; \
ppc_avr_t *a1 = PKBIG ? b : a; \
\
VECTOR_FOR_INORDER_I(i, from) { \
for (i = 0; i < ARRAY_SIZE(result.from); i++) { \
result.to[i] = cvt(a0->from[i], &sat); \
result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
} \
Expand Down Expand Up @@ -2876,7 +2860,7 @@ uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
{
int i;
VECTOR_FOR_INORDER_I(i, u8) {
for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
r->u8[i] = AES_sbox[a->u8[i]];
}
}
Expand All @@ -2886,7 +2870,7 @@ void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
ppc_avr_t result;
int i;

VECTOR_FOR_INORDER_I(i, u32) {
for (i = 0; i < ARRAY_SIZE(result.u32); i++) {
result.VsrW(i) = b->VsrW(i) ^
(AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
Expand All @@ -2901,7 +2885,7 @@ void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
ppc_avr_t result;
int i;

VECTOR_FOR_INORDER_I(i, u8) {
for (i = 0; i < ARRAY_SIZE(result.u8); i++) {
result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
}
*r = result;
Expand All @@ -2914,11 +2898,11 @@ void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
int i;
ppc_avr_t tmp;

VECTOR_FOR_INORDER_I(i, u8) {
for (i = 0; i < ARRAY_SIZE(tmp.u8); i++) {
tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
}

VECTOR_FOR_INORDER_I(i, u32) {
for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
r->VsrW(i) =
AES_imc[tmp.VsrB(4 * i + 0)][0] ^
AES_imc[tmp.VsrB(4 * i + 1)][1] ^
Expand All @@ -2932,7 +2916,7 @@ void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
ppc_avr_t result;
int i;

VECTOR_FOR_INORDER_I(i, u8) {
for (i = 0; i < ARRAY_SIZE(result.u8); i++) {
result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
}
*r = result;
Expand Down
6 changes: 3 additions & 3 deletions target/ppc/translate/vmx-impl.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -2708,9 +2708,6 @@ GEN_VXFORM_DUAL(vclzd, PPC_NONE, PPC2_ALTIVEC_207, \
GEN_VXFORM(vbpermd, 6, 23);
GEN_VXFORM(vbpermq, 6, 21);
GEN_VXFORM_TRANS(vgbbd, 6, 20);
GEN_VXFORM(vpmsumb, 4, 16)
GEN_VXFORM(vpmsumh, 4, 17)
GEN_VXFORM(vpmsumw, 4, 18)

#define GEN_BCD(op) \
static void gen_##op(DisasContext *ctx) \
Expand Down Expand Up @@ -3092,6 +3089,9 @@ static bool do_vx_helper(DisasContext *ctx, arg_VX *a,
TRANS_FLAGS2(ALTIVEC_207, VADDCUQ, do_vx_helper, gen_helper_VADDCUQ)
TRANS_FLAGS2(ALTIVEC_207, VADDUQM, do_vx_helper, gen_helper_VADDUQM)

TRANS_FLAGS2(ALTIVEC_207, VPMSUMB, do_vx_helper, gen_helper_VPMSUMB)
TRANS_FLAGS2(ALTIVEC_207, VPMSUMH, do_vx_helper, gen_helper_VPMSUMH)
TRANS_FLAGS2(ALTIVEC_207, VPMSUMW, do_vx_helper, gen_helper_VPMSUMW)
TRANS_FLAGS2(ALTIVEC_207, VPMSUMD, do_vx_helper, gen_helper_VPMSUMD)

TRANS_FLAGS2(ALTIVEC_207, VSUBCUQ, do_vx_helper, gen_helper_VSUBCUQ)
Expand Down
3 changes: 0 additions & 3 deletions target/ppc/translate/vmx-ops.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -230,9 +230,6 @@ GEN_VXFORM_DUAL(vclzd, vpopcntd, 1, 31, PPC_NONE, PPC2_ALTIVEC_207),
GEN_VXFORM_300(vbpermd, 6, 23),
GEN_VXFORM_207(vbpermq, 6, 21),
GEN_VXFORM_207(vgbbd, 6, 20),
GEN_VXFORM_207(vpmsumb, 4, 16),
GEN_VXFORM_207(vpmsumh, 4, 17),
GEN_VXFORM_207(vpmsumw, 4, 18),

GEN_VXFORM_207(vsbox, 4, 23),

Expand Down