From e923320ee8498ffda5d3579d2800e81469be6684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Tue, 26 Sep 2023 02:55:15 -0700 Subject: [PATCH] ppc: support 32bit AIX and OSX Since c0270dc (Support floating point arguments by more ABIs., 2017-10-21) tests had been failing in 32bit AIX, because the ABI it uses is slightly different than the one implemented. Reuse the 64bit implementation for 32bit AIX, but add the ability to skip two 32bit general registers for each 64bit floating point argument. To support a fourth argument when all three previous ones are doubles r9 had to be make accessible as a scratch register, so add r11 to replace it as TMP_REG1, with the caveat it might be clobbered by inter module calls. --- sljit_src/sljitConfigInternal.h | 2 +- sljit_src/sljitNativePPC_64.c | 55 -------------------- sljit_src/sljitNativePPC_common.c | 86 +++++++++++++++++++++++++++---- test_src/sljitTest.c | 62 ++++++++++++++++------ 4 files changed, 122 insertions(+), 83 deletions(-) diff --git a/sljit_src/sljitConfigInternal.h b/sljit_src/sljitConfigInternal.h index 9f1fc1b1..08007dd7 100644 --- a/sljit_src/sljitConfigInternal.h +++ b/sljit_src/sljitConfigInternal.h @@ -570,7 +570,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) -#define SLJIT_NUMBER_OF_REGISTERS 23 +#define SLJIT_NUMBER_OF_REGISTERS 24 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 17 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18 diff --git a/sljit_src/sljitNativePPC_64.c b/sljit_src/sljitNativePPC_64.c index b3cf9d07..bf8ba54b 100644 --- a/sljit_src/sljitNativePPC_64.c +++ b/sljit_src/sljitNativePPC_64.c @@ -504,61 +504,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; } -static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) -{ - sljit_s32 arg_count = 0; - sljit_s32 word_arg_count = 0; - sljit_s32 types = 0; - sljit_s32 reg = 0; - - if (src) - reg = *src & REG_MASK; - - arg_types >>= SLJIT_ARG_SHIFT; - - while (arg_types) { - types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - - switch (arg_types & SLJIT_ARG_MASK) { - case SLJIT_ARG_TYPE_F64: - case SLJIT_ARG_TYPE_F32: - arg_count++; - break; - default: - arg_count++; - word_arg_count++; - - if (arg_count != word_arg_count && arg_count == reg) { - FAIL_IF(push_inst(compiler, OR | S(reg) | A(TMP_CALL_REG) | B(reg))); - *src = TMP_CALL_REG; - } - break; - } - - arg_types >>= SLJIT_ARG_SHIFT; - } - - while (types) { - switch (types & SLJIT_ARG_MASK) { - case SLJIT_ARG_TYPE_F64: - case SLJIT_ARG_TYPE_F32: - arg_count--; - break; - default: - if (arg_count != word_arg_count) - FAIL_IF(push_inst(compiler, OR | S(word_arg_count) | A(arg_count) | B(word_arg_count))); - - arg_count--; - word_arg_count--; - break; - } - - types >>= SLJIT_ARG_SHIFT; - } - - return SLJIT_SUCCESS; -} - static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw init_value) { FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48))); diff --git a/sljit_src/sljitNativePPC_common.c b/sljit_src/sljitNativePPC_common.c index 54977f02..d1fc2c9d 100644 --- a/sljit_src/sljitNativePPC_common.c +++ b/sljit_src/sljitNativePPC_common.c @@ -104,8 +104,8 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) -static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { - 0, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 9, 10, 31, 12 +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { + 0, 3, 4, 5, 6, 7, 8, 9, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 11, 10, 31, 12 }; static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { @@ -804,9 +804,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 i, tmp, base, offset; sljit_s32 word_arg_count = 0; sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined(SLJIT_CONFIG_PPC_64) && SLJIT_CONFIG_PPC_64) || defined(_AIX) || defined(__APPLE__) sljit_s32 arg_count = 0; -#endif +#endif /* SLJIT_CONFIG_PPC_64 || AIX32 || OSX */ CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); @@ -884,7 +884,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi while (arg_types > 0) { if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined(SLJIT_CONFIG_PPC_64) && SLJIT_CONFIG_PPC_64) || defined(_AIX) || defined(__APPLE__) do { if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { tmp = SLJIT_S0 - saved_arg_count; @@ -905,7 +905,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi word_arg_count++; } -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined(SLJIT_CONFIG_PPC_64) && SLJIT_CONFIG_PPC_64) || defined(_AIX) || defined(__APPLE__) +#if defined(SLJIT_CONFIG_PPC_32) && SLJIT_CONFIG_PPC_32 + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) + arg_count++; +#endif /* AIX32 || OSX */ arg_count++; #endif arg_types >>= SLJIT_ARG_SHIFT; @@ -2433,6 +2437,68 @@ static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, sljit_s32 type } } +#if defined(_AIX) || defined(__APPLE__) || (defined(SLJIT_CONFIG_PPC_64) && SLJIT_CONFIG_PPC_64) + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +{ + sljit_s32 arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 types = 0; + sljit_s32 reg = src ? (*src & REG_MASK) : -1; + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + + arg_count++; + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: +#if (defined(_AIX) || defined(__APPLE__)) && (defined(SLJIT_CONFIG_PPC_32) && SLJIT_CONFIG_PPC_32) + arg_count++; +#endif /* AIX32 || OSX */ + /* fallthrough */ + case SLJIT_ARG_TYPE_F32: + break; + default: + word_arg_count++; + + if (arg_count != word_arg_count && arg_count == reg) { + FAIL_IF(push_inst(compiler, OR | S(reg) | A(TMP_CALL_REG) | B(reg))); + *src = TMP_CALL_REG; + } + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: +#if (defined(_AIX) || defined(__APPLE__)) && (defined(SLJIT_CONFIG_PPC_32) && SLJIT_CONFIG_PPC_32) + arg_count--; +#endif /* AIX32 || OSX */ + /* fallthrough */ + case SLJIT_ARG_TYPE_F32: + break; + default: + if (arg_count != word_arg_count) + FAIL_IF(push_inst(compiler, OR | S(word_arg_count) | A(arg_count) | B(word_arg_count))); + + word_arg_count--; + break; + } + arg_count--; + + types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#endif /* AIX || OSX || SLJIT_CONFIG_PPC_64 */ + SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { struct sljit_jump *jump; @@ -2476,10 +2542,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined(SLJIT_CONFIG_PPC_64) && SLJIT_CONFIG_PPC_64) || defined(_AIX) || defined(__APPLE__) if ((type & 0xff) != SLJIT_CALL_REG_ARG) PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); -#endif +#endif /* SLJIT_CONFIG_PPC_64 || AIX32 || OSX */ if (type & SLJIT_CALL_RETURN) { PTR_FAIL_IF(emit_stack_frame_release(compiler, 0)); @@ -2560,10 +2626,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi type = SLJIT_JUMP; } -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined(SLJIT_CONFIG_PPC_64) && SLJIT_CONFIG_PPC_64) || defined(_AIX) || defined(__APPLE__) if ((type & 0xff) != SLJIT_CALL_REG_ARG) FAIL_IF(call_with_args(compiler, arg_types, &src)); -#endif +#endif /* SLJIT_CONFIG_PPC_64 || AIX32 || OSX */ SLJIT_SKIP_CHECKS(compiler); return sljit_emit_ijump(compiler, type, src, srcw); diff --git a/test_src/sljitTest.c b/test_src/sljitTest.c index db8026e3..836ece5a 100644 --- a/test_src/sljitTest.c +++ b/test_src/sljitTest.c @@ -6119,7 +6119,12 @@ static sljit_f32 test58_f5(sljit_f32 a, sljit_f64 b, sljit_s32 c) return (sljit_f32)((sljit_f64)a + b + (sljit_f64)c); } -static sljit_sw test58_f6(sljit_f64 a, sljit_sw b) +static sljit_f64 test58_f6(sljit_f64 a, sljit_f64 b, sljit_uw c) +{ + return a * b * (sljit_f64)c; +} + +static sljit_sw test58_f7(sljit_f64 a, sljit_sw b) { return (sljit_sw)(a + (sljit_f64)b); } @@ -6130,7 +6135,7 @@ static void test58(void) executable_code code; struct sljit_compiler* compiler; struct sljit_jump* jump = NULL; - sljit_f64 dbuf[7]; + sljit_f64 dbuf[8]; sljit_f32 sbuf[7]; sljit_sw wbuf[2]; @@ -6153,7 +6158,7 @@ static void test58(void) dbuf[3] = 0.0; dbuf[4] = 0.0; dbuf[5] = 0.0; - dbuf[6] = -18.0; + dbuf[7] = -18.0; sbuf[0] = 6.75f; sbuf[1] = -3.5f; @@ -6164,7 +6169,7 @@ static void test58(void) wbuf[0] = 0; wbuf[1] = 0; - sljit_emit_enter(compiler, 0, SLJIT_ARGS3(VOID, P, P, P), 3, 3, 4, 0, sizeof(sljit_sw)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS3(VOID, P, P, P), 4, 3, 4, 0, sizeof(sljit_sw)); sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); @@ -6210,19 +6215,27 @@ static void test58(void) sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32)); sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 0); sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(F32, F32, F64, 32), SLJIT_MEM1(SLJIT_SP), 0); /* sbuf[4] */ sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_f32), SLJIT_FR0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test58_f6)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 10000000); + sljit_set_target(sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS3(F64, F64, F64, W)), SLJIT_FUNC_UADDR(test58_f6)); + /* dbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test58_f7)); sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, F64, W), SLJIT_R0, 0); /* wbuf[0] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 0, SLJIT_R0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f64)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 319); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test58_f6)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test58_f7)); sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, F64, W), SLJIT_R1, 0); /* wbuf[1] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_sw), SLJIT_R0, 0); @@ -6237,12 +6250,13 @@ static void test58(void) FAILED(dbuf[1] != 8.5, "test58 case 1 failed\n"); FAILED(dbuf[3] != 0.5, "test58 case 2 failed\n"); - FAILED(sbuf[3] != 17.75, "test58 case 3 failed\n"); + FAILED(sbuf[3] != 17.75f, "test58 case 3 failed\n"); FAILED(dbuf[4] != 11.75, "test58 case 4 failed\n"); FAILED(dbuf[5] != -9.5, "test58 case 5 failed\n"); - FAILED(sbuf[4] != 12, "test58 case 6 failed\n"); - FAILED(wbuf[0] != SLJIT_FUNC_ADDR(test58_f6) - 18, "test58 case 7 failed\n"); - FAILED(wbuf[1] != 301, "test58 case 8 failed\n"); + FAILED(sbuf[4] != 12.0f, "test58 case 6 failed\n"); + FAILED(dbuf[6] != -945000000.0, "test58 case 7 failed\n"); + FAILED(wbuf[0] != SLJIT_FUNC_ADDR(test58_f7) - 18, "test58 case 8 failed\n"); + FAILED(wbuf[1] != 301, "test58 case 9 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; @@ -6268,6 +6282,11 @@ static sljit_f32 test59_f4(sljit_f32 a, sljit_f64 b, sljit_f32 c, sljit_f64 d) return (sljit_f32)(a + b + c + (sljit_f64)d); } +static sljit_f64 test59_f5(sljit_f64 a, sljit_f64 b, sljit_f64 c, sljit_s32 d) +{ + return a + b + c + (sljit_f64)d; +} + static void test59(void) { /* Check function calls with four arguments. */ @@ -6275,7 +6294,7 @@ static void test59(void) struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); struct sljit_jump* jump = NULL; sljit_sw wbuf[5]; - sljit_f64 dbuf[3]; + sljit_f64 dbuf[4]; sljit_f32 sbuf[4]; if (verbose) @@ -6292,10 +6311,10 @@ static void test59(void) dbuf[1] = 6.125; dbuf[2] = 4.25; - sbuf[0] = 0.75; - sbuf[1] = -1.5; - sbuf[2] = 0.0; - sbuf[3] = 0.0; + sbuf[0] = 0.75f; + sbuf[1] = -1.5f; + sbuf[2] = 0.0f; + sbuf[3] = 0.0f; } FAILED(!compiler, "cannot create compiler\n"); @@ -6353,6 +6372,14 @@ static void test59(void) sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(F32, F32, F64, F32, F64), SLJIT_R0, 0); /* sbuf[2] */ sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S2), 3 * sizeof(sljit_f32), SLJIT_FR0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R0, 0, SLJIT_IMM, 42); + sljit_set_target(sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS4(F64, F64, F64, F64, 32)), SLJIT_FUNC_UADDR(test59_f5)); + /* dbuf[3] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64), SLJIT_FR0, 0); } sljit_emit_return_void(compiler); @@ -6371,6 +6398,7 @@ static void test59(void) FAILED(wbuf[4] != -88, "test59 case 4 failed\n"); FAILED(sbuf[2] != 79.75f, "test59 case 5 failed\n"); FAILED(sbuf[3] != 8.625f, "test59 case 6 failed\n"); + FAILED(dbuf[3] != 57.5, "test59 case 7 failed\n"); } sljit_free_code(code.code, NULL);