Skip to content

Commit

Permalink
ppc: support 32bit AIX and OSX
Browse files Browse the repository at this point in the history
Since c0270dc (Support floating point arguments by more ABIs., 2017-10-21)
tests had been failing in 32bit AIX, because the ABI it uses is slightly
different than the one implemented.

Reuse the 64bit implementation for 32bit AIX, but add the ability to skip
two 32bit general registers for each 64bit floating point argument.

To support a fourth argument when all three previous ones are doubles
r9 had to be make accessible as a scratch register, so add r11 to replace it
as TMP_REG1, with the caveat it might be clobbered by inter module calls.
  • Loading branch information
carenas committed Oct 3, 2023
1 parent 292bf2e commit e923320
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 83 deletions.
2 changes: 1 addition & 1 deletion sljit_src/sljitConfigInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);

#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)

#define SLJIT_NUMBER_OF_REGISTERS 23
#define SLJIT_NUMBER_OF_REGISTERS 24
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18
Expand Down
55 changes: 0 additions & 55 deletions sljit_src/sljitNativePPC_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -504,61 +504,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return SLJIT_SUCCESS;
}

static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src)
{
sljit_s32 arg_count = 0;
sljit_s32 word_arg_count = 0;
sljit_s32 types = 0;
sljit_s32 reg = 0;

if (src)
reg = *src & REG_MASK;

arg_types >>= SLJIT_ARG_SHIFT;

while (arg_types) {
types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);

switch (arg_types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64:
case SLJIT_ARG_TYPE_F32:
arg_count++;
break;
default:
arg_count++;
word_arg_count++;

if (arg_count != word_arg_count && arg_count == reg) {
FAIL_IF(push_inst(compiler, OR | S(reg) | A(TMP_CALL_REG) | B(reg)));
*src = TMP_CALL_REG;
}
break;
}

arg_types >>= SLJIT_ARG_SHIFT;
}

while (types) {
switch (types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64:
case SLJIT_ARG_TYPE_F32:
arg_count--;
break;
default:
if (arg_count != word_arg_count)
FAIL_IF(push_inst(compiler, OR | S(word_arg_count) | A(arg_count) | B(word_arg_count)));

arg_count--;
word_arg_count--;
break;
}

types >>= SLJIT_ARG_SHIFT;
}

return SLJIT_SUCCESS;
}

static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw init_value)
{
FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48)));
Expand Down
86 changes: 76 additions & 10 deletions sljit_src/sljitNativePPC_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)

static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
0, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 9, 10, 31, 12
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
0, 3, 4, 5, 6, 7, 8, 9, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 11, 10, 31, 12
};

static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
Expand Down Expand Up @@ -804,9 +804,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
sljit_s32 i, tmp, base, offset;
sljit_s32 word_arg_count = 0;
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
#if (defined(SLJIT_CONFIG_PPC_64) && SLJIT_CONFIG_PPC_64) || defined(_AIX) || defined(__APPLE__)
sljit_s32 arg_count = 0;
#endif
#endif /* SLJIT_CONFIG_PPC_64 || AIX32 || OSX */

CHECK_ERROR();
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
Expand Down Expand Up @@ -884,7 +884,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi

while (arg_types > 0) {
if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
#if (defined(SLJIT_CONFIG_PPC_64) && SLJIT_CONFIG_PPC_64) || defined(_AIX) || defined(__APPLE__)
do {
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
tmp = SLJIT_S0 - saved_arg_count;
Expand All @@ -905,7 +905,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
word_arg_count++;
}

#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
#if (defined(SLJIT_CONFIG_PPC_64) && SLJIT_CONFIG_PPC_64) || defined(_AIX) || defined(__APPLE__)
#if defined(SLJIT_CONFIG_PPC_32) && SLJIT_CONFIG_PPC_32
if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
arg_count++;
#endif /* AIX32 || OSX */
arg_count++;
#endif
arg_types >>= SLJIT_ARG_SHIFT;
Expand Down Expand Up @@ -2433,6 +2437,68 @@ static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, sljit_s32 type
}
}

#if defined(_AIX) || defined(__APPLE__) || (defined(SLJIT_CONFIG_PPC_64) && SLJIT_CONFIG_PPC_64)

static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src)
{
sljit_s32 arg_count = 0;
sljit_s32 word_arg_count = 0;
sljit_s32 types = 0;
sljit_s32 reg = src ? (*src & REG_MASK) : -1;

arg_types >>= SLJIT_ARG_SHIFT;

while (arg_types) {
types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);

arg_count++;
switch (arg_types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64:
#if (defined(_AIX) || defined(__APPLE__)) && (defined(SLJIT_CONFIG_PPC_32) && SLJIT_CONFIG_PPC_32)
arg_count++;
#endif /* AIX32 || OSX */
/* fallthrough */
case SLJIT_ARG_TYPE_F32:
break;
default:
word_arg_count++;

if (arg_count != word_arg_count && arg_count == reg) {
FAIL_IF(push_inst(compiler, OR | S(reg) | A(TMP_CALL_REG) | B(reg)));
*src = TMP_CALL_REG;
}
break;
}

arg_types >>= SLJIT_ARG_SHIFT;
}

while (types) {
switch (types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64:
#if (defined(_AIX) || defined(__APPLE__)) && (defined(SLJIT_CONFIG_PPC_32) && SLJIT_CONFIG_PPC_32)
arg_count--;
#endif /* AIX32 || OSX */
/* fallthrough */
case SLJIT_ARG_TYPE_F32:
break;
default:
if (arg_count != word_arg_count)
FAIL_IF(push_inst(compiler, OR | S(word_arg_count) | A(arg_count) | B(word_arg_count)));

word_arg_count--;
break;
}
arg_count--;

types >>= SLJIT_ARG_SHIFT;
}

return SLJIT_SUCCESS;
}

#endif /* AIX || OSX || SLJIT_CONFIG_PPC_64 */

SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
{
struct sljit_jump *jump;
Expand Down Expand Up @@ -2476,10 +2542,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));

#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
#if (defined(SLJIT_CONFIG_PPC_64) && SLJIT_CONFIG_PPC_64) || defined(_AIX) || defined(__APPLE__)
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
#endif
#endif /* SLJIT_CONFIG_PPC_64 || AIX32 || OSX */

if (type & SLJIT_CALL_RETURN) {
PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
Expand Down Expand Up @@ -2560,10 +2626,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
type = SLJIT_JUMP;
}

#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
#if (defined(SLJIT_CONFIG_PPC_64) && SLJIT_CONFIG_PPC_64) || defined(_AIX) || defined(__APPLE__)
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
FAIL_IF(call_with_args(compiler, arg_types, &src));
#endif
#endif /* SLJIT_CONFIG_PPC_64 || AIX32 || OSX */

SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_ijump(compiler, type, src, srcw);
Expand Down
62 changes: 45 additions & 17 deletions test_src/sljitTest.c
Original file line number Diff line number Diff line change
Expand Up @@ -6119,7 +6119,12 @@ static sljit_f32 test58_f5(sljit_f32 a, sljit_f64 b, sljit_s32 c)
return (sljit_f32)((sljit_f64)a + b + (sljit_f64)c);
}

static sljit_sw test58_f6(sljit_f64 a, sljit_sw b)
static sljit_f64 test58_f6(sljit_f64 a, sljit_f64 b, sljit_uw c)
{
return a * b * (sljit_f64)c;
}

static sljit_sw test58_f7(sljit_f64 a, sljit_sw b)
{
return (sljit_sw)(a + (sljit_f64)b);
}
Expand All @@ -6130,7 +6135,7 @@ static void test58(void)
executable_code code;
struct sljit_compiler* compiler;
struct sljit_jump* jump = NULL;
sljit_f64 dbuf[7];
sljit_f64 dbuf[8];
sljit_f32 sbuf[7];
sljit_sw wbuf[2];

Expand All @@ -6153,7 +6158,7 @@ static void test58(void)
dbuf[3] = 0.0;
dbuf[4] = 0.0;
dbuf[5] = 0.0;
dbuf[6] = -18.0;
dbuf[7] = -18.0;

sbuf[0] = 6.75f;
sbuf[1] = -3.5f;
Expand All @@ -6164,7 +6169,7 @@ static void test58(void)
wbuf[0] = 0;
wbuf[1] = 0;

sljit_emit_enter(compiler, 0, SLJIT_ARGS3(VOID, P, P, P), 3, 3, 4, 0, sizeof(sljit_sw));
sljit_emit_enter(compiler, 0, SLJIT_ARGS3(VOID, P, P, P), 4, 3, 4, 0, sizeof(sljit_sw));

sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0);
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32));
Expand Down Expand Up @@ -6210,19 +6215,27 @@ static void test58(void)
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32));
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 0);
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(F32, F32, F64, 32), SLJIT_MEM1(SLJIT_SP), 0);
/* sbuf[4] */
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_f32), SLJIT_FR0, 0);

sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test58_f6));
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f64));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 10000000);
sljit_set_target(sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS3(F64, F64, F64, W)), SLJIT_FUNC_UADDR(test58_f6));
/* dbuf[6] */
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64), SLJIT_FR0, 0);

sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f64));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test58_f7));
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, F64, W), SLJIT_R0, 0);
/* wbuf[0] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 0, SLJIT_R0, 0);

sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64));
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f64));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 319);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test58_f6));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test58_f7));
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, F64, W), SLJIT_R1, 0);
/* wbuf[1] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_sw), SLJIT_R0, 0);
Expand All @@ -6237,12 +6250,13 @@ static void test58(void)

FAILED(dbuf[1] != 8.5, "test58 case 1 failed\n");
FAILED(dbuf[3] != 0.5, "test58 case 2 failed\n");
FAILED(sbuf[3] != 17.75, "test58 case 3 failed\n");
FAILED(sbuf[3] != 17.75f, "test58 case 3 failed\n");
FAILED(dbuf[4] != 11.75, "test58 case 4 failed\n");
FAILED(dbuf[5] != -9.5, "test58 case 5 failed\n");
FAILED(sbuf[4] != 12, "test58 case 6 failed\n");
FAILED(wbuf[0] != SLJIT_FUNC_ADDR(test58_f6) - 18, "test58 case 7 failed\n");
FAILED(wbuf[1] != 301, "test58 case 8 failed\n");
FAILED(sbuf[4] != 12.0f, "test58 case 6 failed\n");
FAILED(dbuf[6] != -945000000.0, "test58 case 7 failed\n");
FAILED(wbuf[0] != SLJIT_FUNC_ADDR(test58_f7) - 18, "test58 case 8 failed\n");
FAILED(wbuf[1] != 301, "test58 case 9 failed\n");

sljit_free_code(code.code, NULL);
successful_tests++;
Expand All @@ -6268,14 +6282,19 @@ static sljit_f32 test59_f4(sljit_f32 a, sljit_f64 b, sljit_f32 c, sljit_f64 d)
return (sljit_f32)(a + b + c + (sljit_f64)d);
}

static sljit_f64 test59_f5(sljit_f64 a, sljit_f64 b, sljit_f64 c, sljit_s32 d)
{
return a + b + c + (sljit_f64)d;
}

static void test59(void)
{
/* Check function calls with four arguments. */
executable_code code;
struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL);
struct sljit_jump* jump = NULL;
sljit_sw wbuf[5];
sljit_f64 dbuf[3];
sljit_f64 dbuf[4];
sljit_f32 sbuf[4];

if (verbose)
Expand All @@ -6292,10 +6311,10 @@ static void test59(void)
dbuf[1] = 6.125;
dbuf[2] = 4.25;

sbuf[0] = 0.75;
sbuf[1] = -1.5;
sbuf[2] = 0.0;
sbuf[3] = 0.0;
sbuf[0] = 0.75f;
sbuf[1] = -1.5f;
sbuf[2] = 0.0f;
sbuf[3] = 0.0f;
}

FAILED(!compiler, "cannot create compiler\n");
Expand Down Expand Up @@ -6353,6 +6372,14 @@ static void test59(void)
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(F32, F32, F64, F32, F64), SLJIT_R0, 0);
/* sbuf[2] */
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S2), 3 * sizeof(sljit_f32), SLJIT_FR0, 0);

sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64));
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 0);
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f64));
sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R0, 0, SLJIT_IMM, 42);
sljit_set_target(sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS4(F64, F64, F64, F64, 32)), SLJIT_FUNC_UADDR(test59_f5));
/* dbuf[3] */
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64), SLJIT_FR0, 0);
}

sljit_emit_return_void(compiler);
Expand All @@ -6371,6 +6398,7 @@ static void test59(void)
FAILED(wbuf[4] != -88, "test59 case 4 failed\n");
FAILED(sbuf[2] != 79.75f, "test59 case 5 failed\n");
FAILED(sbuf[3] != 8.625f, "test59 case 6 failed\n");
FAILED(dbuf[3] != 57.5, "test59 case 7 failed\n");
}

sljit_free_code(code.code, NULL);
Expand Down

0 comments on commit e923320

Please sign in to comment.