From eff03a5bc8115d8c1f92fd7703b9ac7a5e6eaa05 Mon Sep 17 00:00:00 2001
From: Matheus Ferst <matheus.ferst@eldorado.org.br>
Date: Wed, 11 May 2022 16:51:47 -0300
Subject: [PATCH 1/6] target/ppc: drop VECTOR_FOR_INORDER_I usage in
 vpmsum[bhw]

Move vpmsum[bhw] to decodetree and drop the use of VECTOR_FOR_INORDER_I
in helpers, since there is no need to access the elements in any
particular order.
---
 target/ppc/helper.h                 |  6 +++---
 target/ppc/insn32.decode            |  3 +++
 target/ppc/int_helper.c             | 12 ++++++------
 target/ppc/translate/vmx-impl.c.inc |  6 +++---
 target/ppc/translate/vmx-ops.c.inc  |  3 ---
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index c3fadd34fd1b..3e3cd0e26980 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -307,9 +307,9 @@ DEF_HELPER_FLAGS_1(vclzlsbb, TCG_CALL_NO_RWG, tl, avr)
 DEF_HELPER_FLAGS_1(vctzlsbb, TCG_CALL_NO_RWG, tl, avr)
 DEF_HELPER_FLAGS_3(vbpermd, TCG_CALL_NO_RWG, void, avr, avr, avr)
 DEF_HELPER_FLAGS_3(vbpermq, TCG_CALL_NO_RWG, void, avr, avr, avr)
-DEF_HELPER_FLAGS_3(vpmsumb, TCG_CALL_NO_RWG, void, avr, avr, avr)
-DEF_HELPER_FLAGS_3(vpmsumh, TCG_CALL_NO_RWG, void, avr, avr, avr)
-DEF_HELPER_FLAGS_3(vpmsumw, TCG_CALL_NO_RWG, void, avr, avr, avr)
+DEF_HELPER_FLAGS_3(VPMSUMB, TCG_CALL_NO_RWG, void, avr, avr, avr)
+DEF_HELPER_FLAGS_3(VPMSUMH, TCG_CALL_NO_RWG, void, avr, avr, avr)
+DEF_HELPER_FLAGS_3(VPMSUMW, TCG_CALL_NO_RWG, void, avr, avr, avr)
 DEF_HELPER_FLAGS_3(VPMSUMD, TCG_CALL_NO_RWG, void, avr, avr, avr)
 DEF_HELPER_FLAGS_2(vextublx, TCG_CALL_NO_RWG, tl, tl, avr)
 DEF_HELPER_FLAGS_2(vextuhlx, TCG_CALL_NO_RWG, tl, tl, avr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 7f6ac992cd97..f659f63b45c9 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -428,6 +428,9 @@ DSCRIQ          111111 ..... ..... ...... 001100010 .   @Z22_tap_sh_rc
 
 ## Vector Exclusive-OR-based Instructions
 
+VPMSUMB         000100 ..... ..... ..... 10000001000    @VX
+VPMSUMH         000100 ..... ..... ..... 10001001000    @VX
+VPMSUMW         000100 ..... ..... ..... 10010001000    @VX
 VPMSUMD         000100 ..... ..... ..... 10011001000    @VX
 
 ## Vector Integer Instructions
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 6e5293e1be4a..ed971222abc6 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1360,7 +1360,7 @@ void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
     int i, j;                                                 \
     trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])];    \
                                                               \
-    VECTOR_FOR_INORDER_I(i, srcfld) {                         \
+    for (i = 0; i < ARRAY_SIZE(r->srcfld); i++) {             \
         prod[i] = 0;                                          \
         for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
             if (a->srcfld[i] & (1ull << j)) {                 \
@@ -1369,14 +1369,14 @@ void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
         }                                                     \
     }                                                         \
                                                               \
-    VECTOR_FOR_INORDER_I(i, trgfld) {                         \
+    for (i = 0; i < ARRAY_SIZE(r->trgfld); i++) {             \
         r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1];         \
     }                                                         \
 }
-
-PMSUM(vpmsumb, u8, u16, uint16_t)
-PMSUM(vpmsumh, u16, u32, uint32_t)
-PMSUM(vpmsumw, u32, u64, uint64_t)
+PMSUM(VPMSUMB, u8, u16, uint16_t)
+PMSUM(VPMSUMH, u16, u32, uint32_t)
+PMSUM(VPMSUMW, u32, u64, uint64_t)
+#undef PMSUM
 
 void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 {
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index a0f7befffee6..85cb1ab4ce57 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -2708,9 +2708,6 @@ GEN_VXFORM_DUAL(vclzd, PPC_NONE, PPC2_ALTIVEC_207, \
 GEN_VXFORM(vbpermd, 6, 23);
 GEN_VXFORM(vbpermq, 6, 21);
 GEN_VXFORM_TRANS(vgbbd, 6, 20);
-GEN_VXFORM(vpmsumb, 4, 16)
-GEN_VXFORM(vpmsumh, 4, 17)
-GEN_VXFORM(vpmsumw, 4, 18)
 
 #define GEN_BCD(op)                                 \
 static void gen_##op(DisasContext *ctx)             \
@@ -3092,6 +3089,9 @@ static bool do_vx_helper(DisasContext *ctx, arg_VX *a,
 TRANS_FLAGS2(ALTIVEC_207, VADDCUQ, do_vx_helper, gen_helper_VADDCUQ)
 TRANS_FLAGS2(ALTIVEC_207, VADDUQM, do_vx_helper, gen_helper_VADDUQM)
 
+TRANS_FLAGS2(ALTIVEC_207, VPMSUMB, do_vx_helper, gen_helper_VPMSUMB)
+TRANS_FLAGS2(ALTIVEC_207, VPMSUMH, do_vx_helper, gen_helper_VPMSUMH)
+TRANS_FLAGS2(ALTIVEC_207, VPMSUMW, do_vx_helper, gen_helper_VPMSUMW)
 TRANS_FLAGS2(ALTIVEC_207, VPMSUMD, do_vx_helper, gen_helper_VPMSUMD)
 
 TRANS_FLAGS2(ALTIVEC_207, VSUBCUQ, do_vx_helper, gen_helper_VSUBCUQ)
diff --git a/target/ppc/translate/vmx-ops.c.inc b/target/ppc/translate/vmx-ops.c.inc
index a3a0fd065005..d9cbd4d68a4d 100644
--- a/target/ppc/translate/vmx-ops.c.inc
+++ b/target/ppc/translate/vmx-ops.c.inc
@@ -230,9 +230,6 @@ GEN_VXFORM_DUAL(vclzd, vpopcntd, 1, 31, PPC_NONE, PPC2_ALTIVEC_207),
 GEN_VXFORM_300(vbpermd, 6, 23),
 GEN_VXFORM_207(vbpermq, 6, 21),
 GEN_VXFORM_207(vgbbd, 6, 20),
-GEN_VXFORM_207(vpmsumb, 4, 16),
-GEN_VXFORM_207(vpmsumh, 4, 17),
-GEN_VXFORM_207(vpmsumw, 4, 18),
 
 GEN_VXFORM_207(vsbox, 4, 23),
 

From 2f2a2416553e34de0aabe520da3bf181a4f1680b Mon Sep 17 00:00:00 2001
From: Matheus Ferst <matheus.ferst@eldorado.org.br>
Date: Fri, 27 May 2022 11:31:36 -0300
Subject: [PATCH 2/6] target/ppc: drop VECTOR_FOR_INORDER_I usage in
 vmsum[msu][bh][ms]

We just need to operate over each element, no need for any particular
order.
---
 target/ppc/int_helper.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index ed971222abc6..3acb95aa7586 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1014,7 +1014,7 @@ void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
         prod[i] = (int32_t)a->s8[i] * b->u8[i];
     }
 
-    VECTOR_FOR_INORDER_I(i, s32) {
+    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
         r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
             prod[4 * i + 2] + prod[4 * i + 3];
     }
@@ -1029,7 +1029,7 @@ void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
         prod[i] = a->s16[i] * b->s16[i];
     }
 
-    VECTOR_FOR_INORDER_I(i, s32) {
+    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
         r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
     }
 }
@@ -1045,7 +1045,7 @@ void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
         prod[i] = (int32_t)a->s16[i] * b->s16[i];
     }
 
-    VECTOR_FOR_INORDER_I(i, s32) {
+    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
         int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
 
         r->u32[i] = cvtsdsw(t, &sat);
@@ -1065,7 +1065,7 @@ void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
         prod[i] = a->u8[i] * b->u8[i];
     }
 
-    VECTOR_FOR_INORDER_I(i, u32) {
+    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
         r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
             prod[4 * i + 2] + prod[4 * i + 3];
     }
@@ -1080,7 +1080,7 @@ void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
         prod[i] = a->u16[i] * b->u16[i];
     }
 
-    VECTOR_FOR_INORDER_I(i, u32) {
+    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
         r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
     }
 }
@@ -1096,7 +1096,7 @@ void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
         prod[i] = a->u16[i] * b->u16[i];
     }
 
-    VECTOR_FOR_INORDER_I(i, s32) {
+    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
         uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
 
         r->u32[i] = cvtuduw(t, &sat);

From b29bd3d1e690c13dfcc9663ebc91736f899866b1 Mon Sep 17 00:00:00 2001
From: Matheus Ferst <matheus.ferst@eldorado.org.br>
Date: Fri, 27 May 2022 11:34:29 -0300
Subject: [PATCH 3/6] target/ppc: drop VECTOR_FOR_INORDER_I usage in Vector AES
 insns

We just need to operate over each element, no need for any particular
order.
---
 target/ppc/int_helper.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 3acb95aa7586..86d20b5fd3de 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -2876,7 +2876,7 @@ uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
 {
     int i;
-    VECTOR_FOR_INORDER_I(i, u8) {
+    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
         r->u8[i] = AES_sbox[a->u8[i]];
     }
 }
@@ -2886,7 +2886,7 @@ void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
     ppc_avr_t result;
     int i;
 
-    VECTOR_FOR_INORDER_I(i, u32) {
+    for (i = 0; i < ARRAY_SIZE(result.u32); i++) {
         result.VsrW(i) = b->VsrW(i) ^
             (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
              AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
@@ -2901,7 +2901,7 @@ void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
     ppc_avr_t result;
     int i;
 
-    VECTOR_FOR_INORDER_I(i, u8) {
+    for (i = 0; i < ARRAY_SIZE(result.u8); i++) {
         result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
     }
     *r = result;
@@ -2914,11 +2914,11 @@ void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
     int i;
     ppc_avr_t tmp;
 
-    VECTOR_FOR_INORDER_I(i, u8) {
+    for (i = 0; i < ARRAY_SIZE(tmp.u8); i++) {
         tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
     }
 
-    VECTOR_FOR_INORDER_I(i, u32) {
+    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
         r->VsrW(i) =
             AES_imc[tmp.VsrB(4 * i + 0)][0] ^
             AES_imc[tmp.VsrB(4 * i + 1)][1] ^
@@ -2932,7 +2932,7 @@ void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
     ppc_avr_t result;
     int i;
 
-    VECTOR_FOR_INORDER_I(i, u8) {
+    for (i = 0; i < ARRAY_SIZE(result.u8); i++) {
         result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
     }
     *r = result;

From 09e38f9b7efc6ea137eeacd975a6553d3dc446db Mon Sep 17 00:00:00 2001
From: Matheus Ferst <matheus.ferst@eldorado.org.br>
Date: Mon, 30 May 2022 09:27:14 -0300
Subject: [PATCH 4/6] target/ppc: drop VECTOR_FOR_INORDER_I usage in vbperm[dq]

We just need to operate over each element, no need for any particular
order. Also, use Vsr[BD] instead of the VBPERM[DQ]_INDEX macros, they
are essentially the same thing.
---
 target/ppc/int_helper.c | 30 +++++++-----------------------
 1 file changed, 7 insertions(+), 23 deletions(-)

diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 86d20b5fd3de..26c7b1590890 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1303,28 +1303,15 @@ XXGENPCV(XXGENPCVDM, 8)
 #undef XXGENPCV_LE_COMP
 #undef XXGENPCV
 
-#if HOST_BIG_ENDIAN
-#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
-#define VBPERMD_INDEX(i) (i)
-#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
-#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
-#else
-#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
-#define VBPERMD_INDEX(i) (1 - i)
-#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
-#define EXTRACT_BIT(avr, i, index) \
-        (extract64((avr)->u64[1 - i], 63 - index, 1))
-#endif
-
 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 {
     int i, j;
     ppc_avr_t result = { .u64 = { 0, 0 } };
-    VECTOR_FOR_INORDER_I(i, u64) {
+    for (i = 0; i < ARRAY_SIZE(result.u64); i++) {
         for (j = 0; j < 8; j++) {
-            int index = VBPERMQ_INDEX(b, (i * 8) + j);
-            if (index < 64 && EXTRACT_BIT(a, i, index)) {
-                result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
+            int index = b->VsrB((i * 8) + j);
+            if (index < 64 && extract64(a->VsrD(i), 63 - index, 1)) {
+                result.VsrD(i) |= (0x80 >> j);
             }
         }
     }
@@ -1336,12 +1323,12 @@ void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
     int i;
     uint64_t perm = 0;
 
-    VECTOR_FOR_INORDER_I(i, u8) {
-        int index = VBPERMQ_INDEX(b, i);
+    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
+        int index = b->VsrB(i);
 
         if (index < 128) {
             uint64_t mask = (1ull << (63 - (index & 0x3F)));
-            if (a->u64[VBPERMQ_DW(index)] & mask) {
+            if (a->VsrD((index & 0x40) != 0) & mask) {
                 perm |= (0x8000 >> i);
             }
         }
@@ -1351,9 +1338,6 @@ void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
     r->VsrD(1) = 0;
 }
 
-#undef VBPERMQ_INDEX
-#undef VBPERMQ_DW
-
 #define PMSUM(name, srcfld, trgfld, trgtyp)                   \
 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
 {                                                             \

From 887b6765bac84d5031335398b61ed4855ad5fa15 Mon Sep 17 00:00:00 2001
From: Matheus Ferst <matheus.ferst@eldorado.org.br>
Date: Tue, 31 May 2022 09:05:44 -0300
Subject: [PATCH 5/6] target/ppc: drop VECTOR_FOR_INORDER_I usage in vpkpx

The helper does not operate in place, elements already processed are not
reused in the calculations, and the index is only used to operate over
each element, so there is no order requirement.
---
 target/ppc/int_helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 26c7b1590890..faa2291dfe54 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1395,8 +1395,8 @@ void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
     const ppc_avr_t *x[2] = { b, a };
 #endif
 
-    VECTOR_FOR_INORDER_I(i, u64) {
-        VECTOR_FOR_INORDER_I(j, u32) {
+    for (i = 0; i < ARRAY_SIZE(result.u64); i++) {
+        for (j = 0; j < ARRAY_SIZE(result.u32); j++) {
             uint32_t e = x[i]->u32[j];
 
             result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |

From 35c6feee282ada1e9449b1ae775f4291b13ef31f Mon Sep 17 00:00:00 2001
From: Matheus Ferst <matheus.ferst@eldorado.org.br>
Date: Tue, 31 May 2022 09:15:22 -0300
Subject: [PATCH 6/6] target/ppc: drop VECTOR_FOR_INORDER_I usage in
 vpk[su][hwd][su][sm]

As in vpkpx, the helpers do not operate in place, elements already
processed are not reused in the calculations, and the index is only
used to operate over each element, so the use of VECTOR_FOR_INORDER_I is
not required. This macro is also removed, as this is its last use.
---
 target/ppc/int_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index faa2291dfe54..0b01705a0255 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1417,7 +1417,7 @@ void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
         ppc_avr_t *a0 = PKBIG ? a : b;                                  \
         ppc_avr_t *a1 = PKBIG ? b : a;                                  \
                                                                         \
-        VECTOR_FOR_INORDER_I(i, from) {                                 \
+        for (i = 0; i < ARRAY_SIZE(result.from); i++) {                 \
             result.to[i] = cvt(a0->from[i], &sat);                      \
             result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
         }                                                               \