diff --git a/make/autoconf/build-aux/autoconf-config.guess b/make/autoconf/build-aux/autoconf-config.guess index 1091acc872f..15ee4389269 100644 --- a/make/autoconf/build-aux/autoconf-config.guess +++ b/make/autoconf/build-aux/autoconf-config.guess @@ -1000,9 +1000,6 @@ EOF ppc:Linux:*:*) echo powerpc-unknown-linux-gnu exit ;; - riscv64:Linux:*:*) - echo riscv64-unknown-linux-gnu - exit ;; s390:Linux:*:* | s390x:Linux:*:*) echo ${UNAME_MACHINE}-ibm-linux exit ;; diff --git a/make/autoconf/build-aux/autoconf-config.sub b/make/autoconf/build-aux/autoconf-config.sub index b78cc3a3b3b..1aab2b303e3 100644 --- a/make/autoconf/build-aux/autoconf-config.sub +++ b/make/autoconf/build-aux/autoconf-config.sub @@ -302,7 +302,6 @@ case $basic_machine in | pdp10 | pdp11 | pj | pjl \ | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ | pyramid \ - | riscv64 \ | score \ | sh | sh[1234] | sh[24]a | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ | sh64 | sh64le \ @@ -384,7 +383,6 @@ case $basic_machine in | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ | pyramid-* \ - | riscv64-* \ | romp-* | rs6000-* \ | sh-* | sh[1234]-* | sh[24]a-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4 index 3a1d1efd19a..54e05363df7 100644 --- a/make/autoconf/hotspot.m4 +++ b/make/autoconf/hotspot.m4 @@ -371,7 +371,7 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], if HOTSPOT_CHECK_JVM_FEATURE(shenandoahgc); then if test "x$OPENJDK_TARGET_CPU_ARCH" = "xx86" || \ test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \ - test "x$OPENJDK_TARGET_CPU_ARCH" = "xriscv"; then + test "x$OPENJDK_TARGET_CPU" = "xriscv64"; then AC_MSG_RESULT([yes]) else DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES shenandoahgc" diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4 index 4defcdb270b..5c49fd9285d 100644 --- a/make/autoconf/libraries.m4 +++ b/make/autoconf/libraries.m4 @@ -1,5 +1,5 @@ # -# Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -130,10 +130,9 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES], BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lthread" fi - # Programs which use C11 or C++11 atomics, like #include , - # generally must link against -latomic on RISC-V + # Because RISC-V only has word-sized atomics, it requries libatomic where + # other common architectures do not. So link libatomic by default. if test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xriscv64; then - BASIC_JDKLIB_LIBS="$BASIC_JDKLIB_LIBS -latomic" BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic" fi diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4 index fa3f6b6f126..bb4d516a377 100644 --- a/make/autoconf/platform.m4 +++ b/make/autoconf/platform.m4 @@ -1,5 +1,5 @@ # -# Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk index 2d9f33eb754..6b6ca5b1b8e 100644 --- a/make/hotspot/gensrc/GensrcAdlc.gmk +++ b/make/hotspot/gensrc/GensrcAdlc.gmk @@ -1,5 +1,5 @@ # -# Copyright (c) 2013, 2021, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -150,6 +150,12 @@ ifeq ($(call check-jvm-feature, compiler2), true) $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \ ))) + ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv) + AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \ + ))) + endif + ifeq ($(call check-jvm-feature, shenandoahgc), true) AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \ diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index 61e3048a944..3c1003c1b05 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp index a781fea7668..bbf96086fd4 100644 --- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp index 95fbbe8c424..6c97e9d31fa 100644 --- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2019, SAP SE. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2021 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp index 9da28e37bef..31c63abe71d 100644 --- a/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp +++ b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,7 +34,6 @@ #include "utilities/debug.hpp" #include "utilities/macros.hpp" - int AbstractInterpreter::BasicType_as_index(BasicType type) { int i = 0; switch (type) { @@ -102,7 +101,7 @@ int AbstractInterpreter::size_activation(int max_stack, // frame do we need to allow max_stack words. (is_top_frame ? max_stack : temps + extra_args); - // On riscv64 we always keep the stack pointer 16-aligned, so we + // On riscv we always keep the stack pointer 16-aligned, so we // must round up here. size = align_up(size, 2); @@ -134,10 +133,9 @@ void AbstractInterpreter::layout_activation(Method* method, #endif interpreter_frame->interpreter_frame_set_method(method); - // NOTE the difference in using sender_sp and - // interpreter_frame_sender_sp interpreter_frame_sender_sp is - // the original sp of the caller (the unextended_sp) and - // sender_sp is fp+8/16 (32bit/64bit) + // NOTE the difference in using sender_sp and interpreter_frame_sender_sp + // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) + // and sender_sp is fp intptr_t* locals = NULL; if (caller->is_interpreted_frame()) { locals = caller->interpreter_frame_last_sp() + caller_actual_parameters - 1; @@ -171,6 +169,7 @@ void AbstractInterpreter::layout_activation(Method* method, interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); } + *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); *interpreter_frame->interpreter_frame_mirror_addr() = diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp index 323df0af865..a5f688cda1f 100644 --- a/src/hotspot/cpu/riscv/assembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -21,6 +21,7 @@ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. + * */ #include @@ -34,7 +35,6 @@ #include "memory/resourceArea.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/sharedRuntime.hpp" -#include "nativeInst_riscv.hpp" int AbstractAssembler::code_fill_byte() { return 0; @@ -80,11 +80,16 @@ void Assembler::subw(Register Rd, Register Rn, int64_t decrement, Register temp) } } -void Assembler::li(Register Rd, int64_t imm) { +void Assembler::zext_w(Register Rd, Register Rs) { + add_uw(Rd, Rs, zr); +} + +void Assembler::_li(Register Rd, int64_t imm) { // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff int shift = 12; int64_t upper = imm, lower = imm; - // Split imm to a lower 12-bit sign-extended part and the remainder, because addi will sign-extend the lower imm. + // Split imm to a lower 12-bit sign-extended part and the remainder, + // because addi will sign-extend the lower imm. lower = ((int32_t)imm << 20) >> 20; upper -= lower; @@ -98,8 +103,7 @@ void Assembler::li(Register Rd, int64_t imm) { if (lower != 0) { addi(Rd, Rd, lower); } - } - else { + } else { // 32-bit integer Register hi_Rd = zr; if (upper != 0) { @@ -113,8 +117,8 @@ void Assembler::li(Register Rd, int64_t imm) { } void Assembler::li64(Register Rd, int64_t imm) { - // Load upper 32 bits. Upper = imm[63:32], but if imm[31] = 1 or (imm[31:28] == 0x7ff && imm[19] == 1), - // upper = imm[63:32] + 1. + // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or + // (imm[31:28] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1. int64_t lower = imm & 0xffffffff; lower -= ((lower << 44) >> 44); int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; @@ -209,13 +213,13 @@ void Assembler::ret() { #define INSN(NAME, REGISTER) \ void Assembler::NAME(const Address &adr, Register temp) { \ - switch(adr.getMode()) { \ + switch (adr.getMode()) { \ case Address::literal: { \ code_section()->relocate(pc(), adr.rspec()); \ NAME(adr.target(), temp); \ break; \ } \ - case Address::base_plus_offset:{ \ + case Address::base_plus_offset: { \ int32_t offset = 0; \ baseOffset(temp, adr, offset); \ jalr(REGISTER, temp, offset); \ @@ -366,4 +370,3 @@ Address::Address(address target, relocInfo::relocType rtype) : _base(noreg), _of ShouldNotReachHere(); } } - diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp index 928ece613c6..dc01c6112d0 100644 --- a/src/hotspot/cpu/riscv/assembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,8 +29,9 @@ #include "asm/register.hpp" #include "assembler_riscv.inline.hpp" +#include "metaprogramming/enableIf.hpp" -#define registerSize 64 +#define XLEN 64 // definitions of various symbolic names for machine registers @@ -40,10 +41,10 @@ class Argument { public: enum { - n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...) - n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... ) + n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...) + n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... ) - n_int_register_parameters_j = 8, // x11, ... x17, x10 (rj_rarg0, j_rarg1, ...) + n_int_register_parameters_j = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...) n_float_register_parameters_j = 8 // f10, f11, ... f17 (j_farg0, j_farg1, ...) }; }; @@ -67,7 +68,21 @@ REGISTER_DECLARATION(FloatRegister, c_farg5, f15); REGISTER_DECLARATION(FloatRegister, c_farg6, f16); REGISTER_DECLARATION(FloatRegister, c_farg7, f17); -// java function register(caller-save registers) +// Symbolically name the register arguments used by the Java calling convention. +// We have control over the convention for java so we can do what we please. +// What pleases us is to offset the java calling convention so that when +// we call a suitable jni method the arguments are lined up and we don't +// have to do much shuffling. A suitable jni method is non-static and a +// small number of arguments. +// +// |------------------------------------------------------------------------| +// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7 | +// |------------------------------------------------------------------------| +// | x10 x11 x12 x13 x14 x15 x16 x17 | +// |------------------------------------------------------------------------| +// | j_rarg7 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6 | +// |------------------------------------------------------------------------| + REGISTER_DECLARATION(Register, j_rarg0, c_rarg1); REGISTER_DECLARATION(Register, j_rarg1, c_rarg2); REGISTER_DECLARATION(Register, j_rarg2, c_rarg3); @@ -77,6 +92,8 @@ REGISTER_DECLARATION(Register, j_rarg5, c_rarg6); REGISTER_DECLARATION(Register, j_rarg6, c_rarg7); REGISTER_DECLARATION(Register, j_rarg7, c_rarg0); +// Java floating args are passed as per C + REGISTER_DECLARATION(FloatRegister, j_farg0, f10); REGISTER_DECLARATION(FloatRegister, j_farg1, f11); REGISTER_DECLARATION(FloatRegister, j_farg2, f12); @@ -93,15 +110,15 @@ REGISTER_DECLARATION(Register, gp, x3); // thread pointer REGISTER_DECLARATION(Register, tp, x4); +// registers used to hold VM data either temporarily within a method +// or across method calls + // volatile (caller-save) registers // current method -- must be in a call-clobbered register REGISTER_DECLARATION(Register, xmethod, x31); // return address REGISTER_DECLARATION(Register, ra, x1); -// link rigster -REGISTER_DECLARATION(Register, lr, x1); - // non-volatile (callee-save) registers @@ -118,9 +135,6 @@ REGISTER_DECLARATION(Register, xmonitors, x25); // locals on stack REGISTER_DECLARATION(Register, xlocals, x24); -/* If you use x4(tp) as java thread pointer according to the instruction manual, - * it overlaps with the register used by c++ thread. - */ // java thread pointer REGISTER_DECLARATION(Register, xthread, x23); // bytecode pointer @@ -130,13 +144,13 @@ REGISTER_DECLARATION(Register, xdispatch, x21); // Java stack pointer REGISTER_DECLARATION(Register, esp, x20); -// tempory register(caller-save registers) +// temporary register(caller-save registers) REGISTER_DECLARATION(Register, t0, x5); REGISTER_DECLARATION(Register, t1, x6); REGISTER_DECLARATION(Register, t2, x7); const Register g_INTArgReg[Argument::n_int_register_parameters_c] = { - c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7 + c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7 }; const FloatRegister g_FPArgReg[Argument::n_float_register_parameters_c] = { @@ -168,22 +182,22 @@ class Address { Address() : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { } Address(Register r) - : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { } + : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { } Address(Register r, int o) - : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } + : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } Address(Register r, long o) - : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } + : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } Address(Register r, long long o) - : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } + : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } Address(Register r, unsigned int o) - : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } + : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } Address(Register r, unsigned long o) - : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } + : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } Address(Register r, unsigned long long o) - : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } + : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } #ifdef ASSERT Address(Register r, ByteSize disp) - : _base(r), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(NULL) { } + : _base(r), _index(noreg), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(0) { } #endif Address(address target, RelocationHolder const& rspec) : _base(noreg), @@ -208,7 +222,7 @@ class Address { return _mode; } - bool uses(Register reg) const { return _base == reg;} + bool uses(Register reg) const { return _base == reg; } const address target() const { return _target; } const RelocationHolder& rspec() const { return _rspec; } ~Address() { @@ -258,21 +272,11 @@ class InternalAddress: public Address { ~InternalAddress() {} }; -const int FPUStateSizeInWords = 32 * 2; - class Assembler : public AbstractAssembler { public: enum { instruction_size = 4 }; - //---< calculate length of instruction >--- - // We just use the values set above. - // instruction must start at passed address - static unsigned int instr_len(unsigned char *instr) { return instruction_size; } - - //---< longest instructions >--- - static unsigned int instr_maxlen() { return instruction_size; } - enum RoundingMode { rne = 0b000, // round to Nearest, ties to Even rtz = 0b001, // round towards Zero @@ -282,33 +286,33 @@ class Assembler : public AbstractAssembler { rdy = 0b111, // in instruction's rm field, selects dynamic rounding mode.In Rounding Mode register, Invalid. }; - void baseOffset32(Register temp, const Address &adr, int32_t &offset) { - assert(temp != noreg, "temp must not be empty register!"); - guarantee(adr.base() != temp, "should use different registers!"); + void baseOffset32(Register Rd, const Address &adr, int32_t &offset) { + assert(Rd != noreg, "Rd must not be empty register!"); + guarantee(Rd != adr.base(), "should use different registers!"); if (is_offset_in_range(adr.offset(), 32)) { int32_t imm = adr.offset(); int32_t upper = imm, lower = imm; lower = (imm << 20) >> 20; upper -= lower; - lui(temp, upper); + lui(Rd, upper); offset = lower; } else { - movptr_with_offset(temp, (address)(uintptr_t)adr.offset(), offset); + movptr_with_offset(Rd, (address)(uintptr_t)adr.offset(), offset); } - add(temp, temp, adr.base()); + add(Rd, Rd, adr.base()); } - void baseOffset(Register temp, const Address &adr, int32_t &offset) { + void baseOffset(Register Rd, const Address &adr, int32_t &offset) { if (is_offset_in_range(adr.offset(), 12)) { - assert(temp != noreg, "temp must not be empty register!"); - addi(temp, adr.base(), adr.offset()); + assert(Rd != noreg, "Rd must not be empty register!"); + addi(Rd, adr.base(), adr.offset()); offset = 0; } else { - baseOffset32(temp, adr, offset); + baseOffset32(Rd, adr, offset); } } - void li(Register Rd, int64_t imm); // optimized load immediate + void _li(Register Rd, int64_t imm); // optimized load immediate void li32(Register Rd, int32_t imm); void li64(Register Rd, int64_t imm); void movptr(Register Rd, address addr); @@ -316,7 +320,7 @@ class Assembler : public AbstractAssembler { void movptr(Register Rd, uintptr_t imm64); void ifence(); void j(const address &dest, Register temp = t0); - void j(const Address &adr, Register temp = t0) ; + void j(const Address &adr, Register temp = t0); void j(Label &l, Register temp = t0); void jal(Label &l, Register temp = t0); void jal(const address &dest, Register temp = t0); @@ -385,7 +389,7 @@ class Assembler : public AbstractAssembler { emit_int32((jint)insn); } - void halt() { + void _halt() { emit_int32(0); } @@ -402,18 +406,18 @@ class Assembler : public AbstractAssembler { emit(insn); \ } - INSN(add, 0b0110011, 0b000, 0b0000000); - INSN(sub, 0b0110011, 0b000, 0b0100000); - INSN(andr, 0b0110011, 0b111, 0b0000000); - INSN(orr, 0b0110011, 0b110, 0b0000000); - INSN(xorr, 0b0110011, 0b100, 0b0000000); + INSN(_add, 0b0110011, 0b000, 0b0000000); + INSN(_sub, 0b0110011, 0b000, 0b0100000); + INSN(_andr, 0b0110011, 0b111, 0b0000000); + INSN(_orr, 0b0110011, 0b110, 0b0000000); + INSN(_xorr, 0b0110011, 0b100, 0b0000000); INSN(sll, 0b0110011, 0b001, 0b0000000); INSN(sra, 0b0110011, 0b101, 0b0100000); INSN(srl, 0b0110011, 0b101, 0b0000000); INSN(slt, 0b0110011, 0b010, 0b0000000); INSN(sltu, 0b0110011, 0b011, 0b0000000); - INSN(addw, 0b0111011, 0b000, 0b0000000); - INSN(subw, 0b0111011, 0b000, 0b0100000); + INSN(_addw, 0b0111011, 0b000, 0b0000000); + INSN(_subw, 0b0111011, 0b000, 0b0100000); INSN(sllw, 0b0111011, 0b001, 0b0000000); INSN(sraw, 0b0111011, 0b101, 0b0100000); INSN(srlw, 0b0111011, 0b101, 0b0000000); @@ -431,9 +435,6 @@ class Assembler : public AbstractAssembler { INSN(remw, 0b0111011, 0b110, 0b0000001); INSN(remuw, 0b0111011, 0b111, 0b0000001); - // Vector Configuration Instruction - INSN(vsetvl, 0b1010111, 0b111, 0b1000000); - #undef INSN #define INSN_ENTRY_RELOC(result_type, header) \ @@ -443,11 +444,11 @@ class Assembler : public AbstractAssembler { "only internal_word_type relocs make sense here"); \ code_section()->relocate(inst_mark(), InternalAddress(dest).rspec()); -// Load/store register (all modes) + // Load/store register (all modes) #define INSN(NAME, op, funct3) \ void NAME(Register Rd, Register Rs, const int32_t offset) { \ - unsigned insn = 0; \ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + unsigned insn = 0; \ int32_t val = offset & 0xfff; \ patch((address)&insn, 6, 0, op); \ patch((address)&insn, 14, 12, funct3); \ @@ -455,7 +456,19 @@ class Assembler : public AbstractAssembler { patch_reg((address)&insn, 7, Rd); \ patch((address)&insn, 31, 20, val); \ emit(insn); \ - } \ + } + + INSN(lb, 0b0000011, 0b000); + INSN(lbu, 0b0000011, 0b100); + INSN(lh, 0b0000011, 0b001); + INSN(lhu, 0b0000011, 0b101); + INSN(_lw, 0b0000011, 0b010); + INSN(lwu, 0b0000011, 0b110); + INSN(_ld, 0b0000011, 0b011); + +#undef INSN + +#define INSN(NAME) \ void NAME(Register Rd, address dest) { \ assert_cond(dest != NULL); \ int64_t distance = (dest - pc()); \ @@ -472,13 +485,13 @@ class Assembler : public AbstractAssembler { NAME(Rd, dest); \ } \ void NAME(Register Rd, const Address &adr, Register temp = t0) { \ - switch(adr.getMode()) { \ + switch (adr.getMode()) { \ case Address::literal: { \ code_section()->relocate(pc(), adr.rspec()); \ NAME(Rd, adr.target()); \ break; \ } \ - case Address::base_plus_offset:{ \ + case Address::base_plus_offset: { \ if (is_offset_in_range(adr.offset(), 12)) { \ NAME(Rd, adr.base(), adr.offset()); \ } else { \ @@ -501,20 +514,20 @@ class Assembler : public AbstractAssembler { wrap_label(Rd, L, &Assembler::NAME); \ } - INSN(lb, 0b0000011, 0b000); - INSN(lbu, 0b0000011, 0b100); - INSN(ld, 0b0000011, 0b011); - INSN(lh, 0b0000011, 0b001); - INSN(lhu, 0b0000011, 0b101); - INSN(lw, 0b0000011, 0b010); - INSN(lwu, 0b0000011, 0b110); + INSN(lb); + INSN(lbu); + INSN(lh); + INSN(lhu); + INSN(lw); + INSN(lwu); + INSN(ld); #undef INSN #define INSN(NAME, op, funct3) \ void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ - unsigned insn = 0; \ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + unsigned insn = 0; \ uint32_t val = offset & 0xfff; \ patch((address)&insn, 6, 0, op); \ patch((address)&insn, 14, 12, funct3); \ @@ -522,7 +535,14 @@ class Assembler : public AbstractAssembler { patch_reg((address)&insn, 7, Rd); \ patch((address)&insn, 31, 20, val); \ emit(insn); \ - } \ + } + + INSN(flw, 0b0000111, 0b010); + INSN(_fld, 0b0000111, 0b011); + +#undef INSN + +#define INSN(NAME) \ void NAME(FloatRegister Rd, address dest, Register temp = t0) { \ assert_cond(dest != NULL); \ int64_t distance = (dest - pc()); \ @@ -539,13 +559,13 @@ class Assembler : public AbstractAssembler { NAME(Rd, dest, temp); \ } \ void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) { \ - switch(adr.getMode()) { \ + switch (adr.getMode()) { \ case Address::literal: { \ code_section()->relocate(pc(), adr.rspec()); \ NAME(Rd, adr.target(), temp); \ break; \ } \ - case Address::base_plus_offset:{ \ + case Address::base_plus_offset: { \ if (is_offset_in_range(adr.offset(), 12)) { \ NAME(Rd, adr.base(), adr.offset()); \ } else { \ @@ -560,14 +580,14 @@ class Assembler : public AbstractAssembler { } \ } - INSN(flw, 0b0000111, 0b010); - INSN(fld, 0b0000111, 0b011); + INSN(flw); + INSN(fld); #undef INSN #define INSN(NAME, op, funct3) \ void NAME(Register Rs1, Register Rs2, const int64_t offset) { \ - unsigned insn = 0; \ guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid."); \ + unsigned insn = 0; \ uint32_t val = offset & 0x1fff; \ uint32_t val11 = (val >> 11) & 0x1; \ uint32_t val12 = (val >> 12) & 0x1; \ @@ -582,7 +602,18 @@ class Assembler : public AbstractAssembler { patch((address)&insn, 30, 25, high); \ patch((address)&insn, 31, val12); \ emit(insn); \ - } \ + } + + INSN(_beq, 0b1100011, 0b000); + INSN(_bne, 0b1100011, 0b001); + INSN(bge, 0b1100011, 0b101); + INSN(bgeu, 0b1100011, 0b111); + INSN(blt, 0b1100011, 0b100); + INSN(bltu, 0b1100011, 0b110); + +#undef INSN + +#define INSN(NAME) \ void NAME(Register Rs1, Register Rs2, const address dest) { \ assert_cond(dest != NULL); \ int64_t offset = (dest - pc()); \ @@ -593,12 +624,12 @@ class Assembler : public AbstractAssembler { NAME(Rs1, Rs2, dest); \ } - INSN(beq, 0b1100011, 0b000); - INSN(bge, 0b1100011, 0b101); - INSN(bgeu, 0b1100011, 0b111); - INSN(blt, 0b1100011, 0b100); - INSN(bltu, 0b1100011, 0b110); - INSN(bne, 0b1100011, 0b001); + INSN(beq); + INSN(bne); + INSN(bge); + INSN(bgeu); + INSN(blt); + INSN(bltu); #undef INSN @@ -618,8 +649,8 @@ class Assembler : public AbstractAssembler { #define INSN(NAME, REGISTER, op, funct3) \ void NAME(REGISTER Rs1, Register Rs2, const int32_t offset) { \ - unsigned insn = 0; \ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + unsigned insn = 0; \ uint32_t val = offset & 0xfff; \ uint32_t low = val & 0x1f; \ uint32_t high = (val >> 5) & 0x7f; \ @@ -631,16 +662,27 @@ class Assembler : public AbstractAssembler { patch((address)&insn, 31, 25, high); \ emit(insn); \ } \ + + INSN(sb, Register, 0b0100011, 0b000); + INSN(sh, Register, 0b0100011, 0b001); + INSN(_sw, Register, 0b0100011, 0b010); + INSN(_sd, Register, 0b0100011, 0b011); + INSN(fsw, FloatRegister, 0b0100111, 0b010); + INSN(_fsd, FloatRegister, 0b0100111, 0b011); + +#undef INSN + +#define INSN(NAME, REGISTER) \ INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, relocInfo::relocType rtype, Register temp = t0)) \ NAME(Rs, dest, temp); \ } - INSN(sb, Register, 0b0100011, 0b000); - INSN(sh, Register, 0b0100011, 0b001); - INSN(sw, Register, 0b0100011, 0b010); - INSN(sd, Register, 0b0100011, 0b011); - INSN(fsw, FloatRegister, 0b0100111, 0b010); - INSN(fsd, FloatRegister, 0b0100111, 0b011); + INSN(sb, Register); + INSN(sh, Register); + INSN(sw, Register); + INSN(sd, Register); + INSN(fsw, FloatRegister); + INSN(fsd, FloatRegister); #undef INSN @@ -659,14 +701,14 @@ class Assembler : public AbstractAssembler { } \ } \ void NAME(Register Rs, const Address &adr, Register temp = t0) { \ - switch(adr.getMode()) { \ + switch (adr.getMode()) { \ case Address::literal: { \ assert_different_registers(Rs, temp); \ code_section()->relocate(pc(), adr.rspec()); \ NAME(Rs, adr.target(), temp); \ break; \ } \ - case Address::base_plus_offset:{ \ + case Address::base_plus_offset: { \ if (is_offset_in_range(adr.offset(), 12)) { \ NAME(Rs, adr.base(), adr.offset()); \ } else { \ @@ -703,13 +745,13 @@ class Assembler : public AbstractAssembler { } \ } \ void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) { \ - switch(adr.getMode()) { \ + switch (adr.getMode()) { \ case Address::literal: { \ code_section()->relocate(pc(), adr.rspec()); \ NAME(Rs, adr.target(), temp); \ break; \ } \ - case Address::base_plus_offset:{ \ + case Address::base_plus_offset: { \ if (is_offset_in_range(adr.offset(), 12)) { \ NAME(Rs, adr.base(), adr.offset()); \ } else { \ @@ -769,8 +811,8 @@ class Assembler : public AbstractAssembler { #define INSN(NAME, op) \ void NAME(Register Rd, const int32_t offset) { \ - unsigned insn = 0; \ guarantee(is_imm_in_range(offset, 20, 1), "offset is invalid."); \ + unsigned insn = 0; \ patch((address)&insn, 6, 0, op); \ patch_reg((address)&insn, 7, Rd); \ patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff)); \ @@ -778,7 +820,13 @@ class Assembler : public AbstractAssembler { patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff)); \ patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1)); \ emit(insn); \ - } \ + } + + INSN(_jal, 0b1101111); + +#undef INSN + +#define INSN(NAME) \ void NAME(Register Rd, const address dest, Register temp = t0) { \ assert_cond(dest != NULL); \ int64_t offset = dest - pc(); \ @@ -796,7 +844,7 @@ class Assembler : public AbstractAssembler { wrap_label(Rd, L, temp, &Assembler::NAME); \ } - INSN(jal, 0b1101111); + INSN(jal); #undef INSN @@ -804,8 +852,8 @@ class Assembler : public AbstractAssembler { #define INSN(NAME, op, funct) \ void NAME(Register Rd, Register Rs, const int32_t offset) { \ - unsigned insn = 0; \ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + unsigned insn = 0; \ patch((address)&insn, 6, 0, op); \ patch_reg((address)&insn, 7, Rd); \ patch((address)&insn, 14, 12, funct); \ @@ -815,7 +863,7 @@ class Assembler : public AbstractAssembler { emit(insn); \ } - INSN(jalr, 0b1100111, 0b000); + INSN(_jalr, 0b1100111, 0b000); #undef INSN @@ -851,7 +899,8 @@ class Assembler : public AbstractAssembler { INSN(fence_i, 0b0001111, 0b001, 0b000000000000); INSN(ecall, 0b1110011, 0b000, 0b000000000000); - INSN(ebreak, 0b1110011, 0b000, 0b000000000001); + INSN(_ebreak, 0b1110011, 0b000, 0b000000000001); + #undef INSN enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11}; @@ -959,12 +1008,12 @@ enum operand_size { int8, int16, int32, uint32, int64 }; emit(insn); \ } - INSN(addi, 0b0010011, 0b000); - INSN(slti, 0b0010011, 0b010); - INSN(addiw, 0b0011011, 0b000); - INSN(and_imm12, 0b0010011, 0b111); - INSN(ori, 0b0010011, 0b110); - INSN(xori, 0b0010011, 0b100); + INSN(_addi, 0b0010011, 0b000); + INSN(slti, 0b0010011, 0b010); + INSN(_addiw, 0b0011011, 0b000); + INSN(_and_imm12, 0b0010011, 0b111); + INSN(ori, 0b0010011, 0b110); + INSN(xori, 0b0010011, 0b100); #undef INSN @@ -998,9 +1047,9 @@ enum operand_size { int8, int16, int32, uint32, int64 }; emit(insn); \ } - INSN(slli, 0b0010011, 0b001, 0b000000); - INSN(srai, 0b0010011, 0b101, 0b010000); - INSN(srli, 0b0010011, 0b101, 0b000000); + INSN(_slli, 0b0010011, 0b001, 0b000000); + INSN(_srai, 0b0010011, 0b101, 0b010000); + INSN(_srli, 0b0010011, 0b101, 0b000000); #undef INSN @@ -1036,7 +1085,7 @@ enum operand_size { int8, int16, int32, uint32, int64 }; emit(insn); \ } - INSN(lui, 0b0110111); + INSN(_lui, 0b0110111); INSN(auipc, 0b0010111); #undef INSN @@ -1223,6 +1272,9 @@ enum operand_size { int8, int16, int32, uint32, int64 }; #undef INSN +// ========================== +// RISC-V Vector Extension +// ========================== enum SEW { e8 = 0b000, e16 = 0b001, @@ -1265,7 +1317,7 @@ static Assembler::SEW elemtype_to_sew(BasicType etype) { #define patch_vtype(hsb, lsb, vlmul, vsew, vta, vma, vill) \ if (vill == 1) { \ - guarantee((vlmul | vsew | vsew | vta | vma == 0), \ + guarantee((vlmul | vsew | vta | vma == 0), \ "the other bits in vtype shall be zero"); \ } \ patch((address)&insn, lsb + 2, lsb, vlmul); \ @@ -1328,6 +1380,23 @@ static Assembler::SEW elemtype_to_sew(BasicType etype) { #undef patch_vtype +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + emit(insn); \ + } + + // Vector Configuration Instruction + INSN(vsetvl, 0b1010111, 0b111, 0b1000000); + +#undef INSN + enum VectorMask { v0_t = 0b0, unmasked = 0b1 @@ -1443,25 +1512,6 @@ enum VectorMask { INSN(vfcvt_rtz_xu_f_v, 0b1010111, 0b001, 0b00110, 0b010010); INSN(vfcvt_rtz_x_f_v, 0b1010111, 0b001, 0b00111, 0b010010); - // Vector Widening Floating-Point/Integer Type-Convert Instructions - INSN(vfwcvt_xu_f_v, 0b1010111, 0b001, 0b01000, 0b010010); - INSN(vfwcvt_x_f_v, 0b1010111, 0b001, 0b01001, 0b010010); - INSN(vfwcvt_f_xu_v, 0b1010111, 0b001, 0b01010, 0b010010); - INSN(vfwcvt_f_x_v, 0b1010111, 0b001, 0b01011, 0b010010); - INSN(vfwcvt_f_f_v, 0b1010111, 0b001, 0b01100, 0b010010); - INSN(vfwcvt_rtz_xu_f_v, 0b1010111, 0b001, 0b01110, 0b010010); - INSN(vfwcvt_rtz_x_f_v, 0b1010111, 0b001, 0b01111, 0b010010); - - // Vector Narrowing Floating-Point/Integer Type-Convert Instructions - INSN(vfncvt_xu_f_w, 0b1010111, 0b001, 0b10000, 0b010010); - INSN(vfncvt_x_f_w, 0b1010111, 0b001, 0b10001, 0b010010); - INSN(vfncvt_f_xu_w, 0b1010111, 0b001, 0b10010, 0b010010); - INSN(vfncvt_f_x_w, 0b1010111, 0b001, 0b10011, 0b010010); - INSN(vfncvt_f_f_w, 0b1010111, 0b001, 0b10100, 0b010010); - INSN(vfncvt_rod_f_f_w, 0b1010111, 0b001, 0b10101, 0b010010); - INSN(vfncvt_rtz_xu_f_w, 0b1010111, 0b001, 0b10110, 0b010010); - INSN(vfncvt_rtz_x_f_w, 0b1010111, 0b001, 0b10111, 0b010010); - // Vector Floating-Point Instruction INSN(vfsqrt_v, 0b1010111, 0b001, 0b00000, 0b010011); INSN(vfclass_v, 0b1010111, 0b001, 0b10000, 0b010011); @@ -2251,21 +2301,1072 @@ enum Nf { #undef INSN #undef patch_VLdSt +// ==================================== +// RISC-V Bit-Manipulation Extension +// ==================================== +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + emit(insn); \ + } + + INSN(add_uw, 0b0111011, 0b000, 0b0000100); + INSN(rol, 0b0110011, 0b001, 0b0110000); + INSN(rolw, 0b0111011, 0b001, 0b0110000); + INSN(ror, 0b0110011, 0b101, 0b0110000); + INSN(rorw, 0b0111011, 0b101, 0b0110000); + INSN(sh1add, 0b0110011, 0b010, 0b0010000); + INSN(sh2add, 0b0110011, 0b100, 0b0010000); + INSN(sh3add, 0b0110011, 0b110, 0b0010000); + INSN(sh1add_uw, 0b0111011, 0b010, 0b0010000); + INSN(sh2add_uw, 0b0111011, 0b100, 0b0010000); + INSN(sh3add_uw, 0b0111011, 0b110, 0b0010000); + INSN(andn, 0b0110011, 0b111, 0b0100000); + INSN(orn, 0b0110011, 0b110, 0b0100000); + INSN(xnor, 0b0110011, 0b100, 0b0100000); + INSN(max, 0b0110011, 0b110, 0b0000101); + INSN(maxu, 0b0110011, 0b111, 0b0000101); + INSN(min, 0b0110011, 0b100, 0b0000101); + INSN(minu, 0b0110011, 0b101, 0b0000101); + +#undef INSN + +#define INSN(NAME, op, funct3, funct12) \ + void NAME(Register Rd, Register Rs1) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 20, funct12); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(rev8, 0b0010011, 0b101, 0b011010111000); + INSN(sext_b, 0b0010011, 0b001, 0b011000000100); + INSN(sext_h, 0b0010011, 0b001, 0b011000000101); + INSN(zext_h, 0b0111011, 0b100, 0b000010000000); + INSN(clz, 0b0010011, 0b001, 0b011000000000); + INSN(clzw, 0b0011011, 0b001, 0b011000000000); + INSN(ctz, 0b0010011, 0b001, 0b011000000001); + INSN(ctzw, 0b0011011, 0b001, 0b011000000001); + INSN(cpop, 0b0010011, 0b001, 0b011000000010); + INSN(cpopw, 0b0011011, 0b001, 0b011000000010); + INSN(orc_b, 0b0010011, 0b101, 0b001010000111); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) {\ + guarantee(shamt <= 0x3f, "Shamt is invalid"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 25, 20, shamt); \ + patch((address)&insn, 31, 26, funct6); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(rori, 0b0010011, 0b101, 0b011000); + INSN(slli_uw, 0b0011011, 0b001, 0b000010); + +#undef INSN + +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) {\ + guarantee(shamt <= 0x1f, "Shamt is invalid"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 24, 20, shamt); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(roriw, 0b0011011, 0b101, 0b0110000); + +#undef INSN + +// ======================================== +// RISC-V Compressed Instructions Extension +// ======================================== +// Note: +// 1. When UseRVC is enabled, 32-bit instructions under 'CompressibleRegion's will be +// transformed to 16-bit instructions if compressible. +// 2. RVC instructions in Assembler always begin with 'c_' prefix, as 'c_li', +// but most of time we have no need to explicitly use these instructions. +// 3. 'CompressibleRegion' is introduced to hint instructions in this Region's RTTI range +// are qualified to be compressed with their 2-byte versions. +// An example: +// +// CompressibleRegion cr(_masm); +// __ andr(...); // this instruction could change to c.and if able to +// +// 4. Using -XX:PrintAssemblyOptions=no-aliases could distinguish RVC instructions from +// normal ones. +// + +private: + bool _in_compressible_region; +public: + bool in_compressible_region() const { return _in_compressible_region; } + void set_in_compressible_region(bool b) { _in_compressible_region = b; } +public: + + // a compressible region + class CompressibleRegion : public StackObj { + protected: + Assembler *_masm; + bool _saved_in_compressible_region; + public: + CompressibleRegion(Assembler *_masm) + : _masm(_masm) + , _saved_in_compressible_region(_masm->in_compressible_region()) { + _masm->set_in_compressible_region(true); + } + ~CompressibleRegion() { + _masm->set_in_compressible_region(_saved_in_compressible_region); + } + }; + + // patch a 16-bit instruction. + static void c_patch(address a, unsigned msb, unsigned lsb, uint16_t val) { + assert_cond(a != NULL); + assert_cond(msb >= lsb && msb <= 15); + unsigned nbits = msb - lsb + 1; + guarantee(val < (1U << nbits), "Field too big for insn"); + uint16_t mask = (1U << nbits) - 1; + val <<= lsb; + mask <<= lsb; + uint16_t target = *(uint16_t *)a; + target &= ~mask; + target |= val; + *(uint16_t *)a = target; + } + + static void c_patch(address a, unsigned bit, uint16_t val) { + c_patch(a, bit, bit, val); + } + + // patch a 16-bit instruction with a general purpose register ranging [0, 31] (5 bits) + static void c_patch_reg(address a, unsigned lsb, Register reg) { + c_patch(a, lsb + 4, lsb, reg->encoding_nocheck()); + } + + // patch a 16-bit instruction with a general purpose register ranging [8, 15] (3 bits) + static void c_patch_compressed_reg(address a, unsigned lsb, Register reg) { + c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck()); + } + + // patch a 16-bit instruction with a float register ranging [0, 31] (5 bits) + static void c_patch_reg(address a, unsigned lsb, FloatRegister reg) { + c_patch(a, lsb + 4, lsb, reg->encoding_nocheck()); + } + + // patch a 16-bit instruction with a float register ranging [8, 15] (3 bits) + static void c_patch_compressed_reg(address a, unsigned lsb, FloatRegister reg) { + c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck()); + } + +// -------------- RVC Instruction Definitions -------------- + + void c_nop() { + c_addi(x0, 0); + } + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd_Rs1, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 6, 0)); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ + c_patch_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_addi, 0b000, 0b01); + INSN(c_addiw, 0b001, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 10, 0)); \ + assert_cond((imm & 0b1111) == 0); \ + assert_cond(imm != 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 4, 3, (imm & right_n_bits(9)) >> 7); \ + c_patch((address)&insn, 5, 5, (imm & nth_bit(6)) >> 6); \ + c_patch((address)&insn, 6, 6, (imm & nth_bit(4)) >> 4); \ + c_patch_reg((address)&insn, 7, sp); \ + c_patch((address)&insn, 12, 12, (imm & nth_bit(9)) >> 9); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_addi16sp, 0b011, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 10, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + assert_cond(uimm != 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_compressed_reg((address)&insn, 2, Rd); \ + c_patch((address)&insn, 5, 5, (uimm & nth_bit(3)) >> 3); \ + c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \ + c_patch((address)&insn, 10, 7, (uimm & right_n_bits(10)) >> 6); \ + c_patch((address)&insn, 12, 11, (uimm & right_n_bits(6)) >> 4); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_addi4spn, 0b000, 0b00); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd_Rs1, uint32_t shamt) { \ + assert_cond(is_unsigned_imm_in_range(shamt, 6, 0)); \ + assert_cond(shamt != 0); \ + assert_cond(Rd_Rs1 != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5))); \ + c_patch_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_slli, 0b000, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, funct2, op) \ + void NAME(Register Rd_Rs1, uint32_t shamt) { \ + assert_cond(is_unsigned_imm_in_range(shamt, 6, 0)); \ + assert_cond(shamt != 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5))); \ + c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 11, 10, funct2); \ + c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_srli, 0b100, 0b00, 0b01); + INSN(c_srai, 0b100, 0b01, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, funct2, op) \ + void NAME(Register Rd_Rs1, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 6, 0)); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ + c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 11, 10, funct2); \ + c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_andi, 0b100, 0b10, 0b01); + +#undef INSN + +#define INSN(NAME, funct6, funct2, op) \ + void NAME(Register Rd_Rs1, Register Rs2) { \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_compressed_reg((address)&insn, 2, Rs2); \ + c_patch((address)&insn, 6, 5, funct2); \ + c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 15, 10, funct6); \ + emit_int16(insn); \ + } + + INSN(c_sub, 0b100011, 0b00, 0b01); + INSN(c_xor, 0b100011, 0b01, 0b01); + INSN(c_or, 0b100011, 0b10, 0b01); + INSN(c_and, 0b100011, 0b11, 0b01); + INSN(c_subw, 0b100111, 0b00, 0b01); + INSN(c_addw, 0b100111, 0b01, 0b01); + +#undef INSN + +#define INSN(NAME, funct4, op) \ + void NAME(Register Rd_Rs1, Register Rs2) { \ + assert_cond(Rd_Rs1 != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_reg((address)&insn, 2, Rs2); \ + c_patch_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 15, 12, funct4); \ + emit_int16(insn); \ + } + + INSN(c_mv, 0b1000, 0b10); + INSN(c_add, 0b1001, 0b10); + +#undef INSN + +#define INSN(NAME, funct4, op) \ + void NAME(Register Rs1) { \ + assert_cond(Rs1 != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_reg((address)&insn, 2, x0); \ + c_patch_reg((address)&insn, 7, Rs1); \ + c_patch((address)&insn, 15, 12, funct4); \ + emit_int16(insn); \ + } + + INSN(c_jr, 0b1000, 0b10); + INSN(c_jalr, 0b1001, 0b10); + +#undef INSN + + typedef void (Assembler::* j_c_insn)(address dest); + typedef void (Assembler::* compare_and_branch_c_insn)(Register Rs1, address dest); + + void wrap_label(Label &L, j_c_insn insn) { + if (L.is_bound()) { + (this->*insn)(target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(pc()); + } + } + + void wrap_label(Label &L, Register r, compare_and_branch_c_insn insn) { + if (L.is_bound()) { + (this->*insn)(r, target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(r, pc()); + } + } + +#define INSN(NAME, funct3, op) \ + void NAME(int32_t offset) { \ + assert_cond(is_imm_in_range(offset, 11, 1)); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 2, 2, (offset & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 5, 3, (offset & right_n_bits(4)) >> 1); \ + c_patch((address)&insn, 6, 6, (offset & nth_bit(7)) >> 7); \ + c_patch((address)&insn, 7, 7, (offset & nth_bit(6)) >> 6); \ + c_patch((address)&insn, 8, 8, (offset & nth_bit(10)) >> 10); \ + c_patch((address)&insn, 10, 9, (offset & right_n_bits(10)) >> 8); \ + c_patch((address)&insn, 11, 11, (offset & nth_bit(4)) >> 4); \ + c_patch((address)&insn, 12, 12, (offset & nth_bit(11)) >> 11); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } \ + void NAME(address dest) { \ + assert_cond(dest != NULL); \ + int64_t distance = dest - pc(); \ + assert_cond(is_imm_in_range(distance, 11, 1)); \ + c_j(distance); \ + } \ + void NAME(Label &L) { \ + wrap_label(L, &Assembler::NAME); \ + } + + INSN(c_j, 0b101, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rs1, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 8, 1)); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 4, 3, (imm & right_n_bits(3)) >> 1); \ + c_patch((address)&insn, 6, 5, (imm & right_n_bits(8)) >> 6); \ + c_patch_compressed_reg((address)&insn, 7, Rs1); \ + c_patch((address)&insn, 11, 10, (imm & right_n_bits(5)) >> 3); \ + c_patch((address)&insn, 12, 12, (imm & nth_bit(8)) >> 8); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } \ + void NAME(Register Rs1, address dest) { \ + assert_cond(dest != NULL); \ + int64_t distance = dest - pc(); \ + assert_cond(is_imm_in_range(distance, 8, 1)); \ + NAME(Rs1, distance); \ + } \ + void NAME(Register Rs1, Label &L) { \ + wrap_label(L, Rs1, &Assembler::NAME); \ + } + + INSN(c_beqz, 0b110, 0b01); + INSN(c_bnez, 0b111, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 18, 0)); \ + assert_cond((imm & 0xfff) == 0); \ + assert_cond(imm != 0); \ + assert_cond(Rd != x0 && Rd != x2); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (imm & right_n_bits(17)) >> 12); \ + c_patch_reg((address)&insn, 7, Rd); \ + c_patch((address)&insn, 12, 12, (imm & nth_bit(17)) >> 17); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_lui, 0b011, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 6, 0)); \ + assert_cond(Rd != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ + c_patch_reg((address)&insn, 7, Rd); \ + c_patch((address)&insn, 12, 12, (imm & right_n_bits(6)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_li, 0b010, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ + assert_cond((uimm & 0b111) == 0); \ + assert_cond(Rd != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6); \ + c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3); \ + c_patch_reg((address)&insn, 7, Rd); \ + c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_ldsp, 0b011, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(FloatRegister Rd, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ + assert_cond((uimm & 0b111) == 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6); \ + c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3); \ + c_patch_reg((address)&insn, 7, Rd); \ + c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_fldsp, 0b001, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, op, REGISTER_TYPE) \ + void NAME(REGISTER_TYPE Rd_Rs2, Register Rs1, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ + assert_cond((uimm & 0b111) == 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); \ + c_patch((address)&insn, 6, 5, (uimm & right_n_bits(8)) >> 6); \ + c_patch_compressed_reg((address)&insn, 7, Rs1); \ + c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_ld, 0b011, 0b00, Register); + INSN(c_sd, 0b111, 0b00, Register); + INSN(c_fld, 0b001, 0b00, FloatRegister); + INSN(c_fsd, 0b101, 0b00, FloatRegister); + +#undef INSN + +#define INSN(NAME, funct3, op, REGISTER_TYPE) \ + void NAME(REGISTER_TYPE Rs2, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ + assert_cond((uimm & 0b111) == 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_reg((address)&insn, 2, Rs2); \ + c_patch((address)&insn, 9, 7, (uimm & right_n_bits(9)) >> 6); \ + c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_sdsp, 0b111, 0b10, Register); + INSN(c_fsdsp, 0b101, 0b10, FloatRegister); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rs2, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_reg((address)&insn, 2, Rs2); \ + c_patch((address)&insn, 8, 7, (uimm & right_n_bits(8)) >> 6); \ + c_patch((address)&insn, 12, 9, (uimm & right_n_bits(6)) >> 2); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_swsp, 0b110, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + assert_cond(Rd != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 3, 2, (uimm & right_n_bits(8)) >> 6); \ + c_patch((address)&insn, 6, 4, (uimm & right_n_bits(5)) >> 2); \ + c_patch_reg((address)&insn, 7, Rd); \ + c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_lwsp, 0b010, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd_Rs2, Register Rs1, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 7, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); \ + c_patch((address)&insn, 5, 5, (uimm & nth_bit(6)) >> 6); \ + c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \ + c_patch_compressed_reg((address)&insn, 7, Rs1); \ + c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_lw, 0b010, 0b00); + INSN(c_sw, 0b110, 0b00); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME() { \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 11, 2, 0x0); \ + c_patch((address)&insn, 12, 12, 0b1); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_ebreak, 0b100, 0b10); + +#undef INSN + +// -------------- RVC Transformation Functions -------------- + +// -------------------------- +// Register instructions +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + /* add -> c.add */ \ + if (do_compress()) { \ + Register src = noreg; \ + if (Rs1 != x0 && Rs2 != x0 && ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) { \ + c_add(Rd, src); \ + return; \ + } \ + } \ + _add(Rd, Rs1, Rs2); \ + } + + INSN(add); + +#undef INSN + +// -------------------------- +#define INSN(NAME, C_NAME, NORMAL_NAME) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + /* sub/subw -> c.sub/c.subw */ \ + if (do_compress() && \ + (Rd == Rs1 && Rd->is_compressed_valid() && Rs2->is_compressed_valid())) { \ + C_NAME(Rd, Rs2); \ + return; \ + } \ + NORMAL_NAME(Rd, Rs1, Rs2); \ + } + + INSN(sub, c_sub, _sub); + INSN(subw, c_subw, _subw); + +#undef INSN + +// -------------------------- +#define INSN(NAME, C_NAME, NORMAL_NAME) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + /* and/or/xor/addw -> c.and/c.or/c.xor/c.addw */ \ + if (do_compress()) { \ + Register src = noreg; \ + if (Rs1->is_compressed_valid() && Rs2->is_compressed_valid() && \ + ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) { \ + C_NAME(Rd, src); \ + return; \ + } \ + } \ + NORMAL_NAME(Rd, Rs1, Rs2); \ + } + + INSN(andr, c_and, _andr); + INSN(orr, c_or, _orr); + INSN(xorr, c_xor, _xorr); + INSN(addw, c_addw, _addw); + +#undef INSN + +private: +// some helper functions + bool do_compress() const { + return UseRVC && in_compressible_region(); + } + +#define FUNC(NAME, funct3, bits) \ + bool NAME(Register rs1, Register rd_rs2, int32_t imm12, bool ld) { \ + return rs1 == sp && \ + is_unsigned_imm_in_range(imm12, bits, 0) && \ + (intx(imm12) & funct3) == 0x0 && \ + (!ld || rd_rs2 != x0); \ + } \ + + FUNC(is_c_ldsdsp, 0b111, 9); + FUNC(is_c_lwswsp, 0b011, 8); + +#undef FUNC + +#define FUNC(NAME, funct3, bits) \ + bool NAME(Register rs1, int32_t imm12) { \ + return rs1 == sp && \ + is_unsigned_imm_in_range(imm12, bits, 0) && \ + (intx(imm12) & funct3) == 0x0; \ + } \ + + FUNC(is_c_fldsdsp, 0b111, 9); + +#undef FUNC + +#define FUNC(NAME, REG_TYPE, funct3, bits) \ + bool NAME(Register rs1, REG_TYPE rd_rs2, int32_t imm12) { \ + return rs1->is_compressed_valid() && \ + rd_rs2->is_compressed_valid() && \ + is_unsigned_imm_in_range(imm12, bits, 0) && \ + (intx(imm12) & funct3) == 0x0; \ + } \ + + FUNC(is_c_ldsd, Register, 0b111, 8); + FUNC(is_c_lwsw, Register, 0b011, 7); + FUNC(is_c_fldsd, FloatRegister, 0b111, 8); + +#undef FUNC + +public: +// -------------------------- +// Load/store register +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + /* lw -> c.lwsp/c.lw */ \ + if (do_compress()) { \ + if (is_c_lwswsp(Rs, Rd, offset, true)) { \ + c_lwsp(Rd, offset); \ + return; \ + } else if (is_c_lwsw(Rs, Rd, offset)) { \ + c_lw(Rd, Rs, offset); \ + return; \ + } \ + } \ + _lw(Rd, Rs, offset); \ + } + + INSN(lw); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + /* ld -> c.ldsp/c.ld */ \ + if (do_compress()) { \ + if (is_c_ldsdsp(Rs, Rd, offset, true)) { \ + c_ldsp(Rd, offset); \ + return; \ + } else if (is_c_ldsd(Rs, Rd, offset)) { \ + c_ld(Rd, Rs, offset); \ + return; \ + } \ + } \ + _ld(Rd, Rs, offset); \ + } + + INSN(ld); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ + /* fld -> c.fldsp/c.fld */ \ + if (do_compress()) { \ + if (is_c_fldsdsp(Rs, offset)) { \ + c_fldsp(Rd, offset); \ + return; \ + } else if (is_c_fldsd(Rs, Rd, offset)) { \ + c_fld(Rd, Rs, offset); \ + return; \ + } \ + } \ + _fld(Rd, Rs, offset); \ + } + + INSN(fld); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + /* sd -> c.sdsp/c.sd */ \ + if (do_compress()) { \ + if (is_c_ldsdsp(Rs, Rd, offset, false)) { \ + c_sdsp(Rd, offset); \ + return; \ + } else if (is_c_ldsd(Rs, Rd, offset)) { \ + c_sd(Rd, Rs, offset); \ + return; \ + } \ + } \ + _sd(Rd, Rs, offset); \ + } + + INSN(sd); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + /* sw -> c.swsp/c.sw */ \ + if (do_compress()) { \ + if (is_c_lwswsp(Rs, Rd, offset, false)) { \ + c_swsp(Rd, offset); \ + return; \ + } else if (is_c_lwsw(Rs, Rd, offset)) { \ + c_sw(Rd, Rs, offset); \ + return; \ + } \ + } \ + _sw(Rd, Rs, offset); \ + } + + INSN(sw); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ + /* fsd -> c.fsdsp/c.fsd */ \ + if (do_compress()) { \ + if (is_c_fldsdsp(Rs, offset)) { \ + c_fsdsp(Rd, offset); \ + return; \ + } else if (is_c_fldsd(Rs, Rd, offset)) { \ + c_fsd(Rd, Rs, offset); \ + return; \ + } \ + } \ + _fsd(Rd, Rs, offset); \ + } + + INSN(fsd); + +#undef INSN + +// -------------------------- +// Conditional branch instructions +// -------------------------- +#define INSN(NAME, C_NAME, NORMAL_NAME) \ + void NAME(Register Rs1, Register Rs2, const int64_t offset) { \ + /* beq/bne -> c.beqz/c.bnez */ \ + if (do_compress() && \ + (offset != 0 && Rs2 == x0 && Rs1->is_compressed_valid() && \ + is_imm_in_range(offset, 8, 1))) { \ + C_NAME(Rs1, offset); \ + return; \ + } \ + NORMAL_NAME(Rs1, Rs2, offset); \ + } + + INSN(beq, c_beqz, _beq); + INSN(bne, c_beqz, _bne); + +#undef INSN + +// -------------------------- +// Unconditional branch instructions +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, const int32_t offset) { \ + /* jal -> c.j */ \ + if (do_compress() && offset != 0 && Rd == x0 && is_imm_in_range(offset, 11, 1)) { \ + c_j(offset); \ + return; \ + } \ + _jal(Rd, offset); \ + } + + INSN(jal); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + /* jalr -> c.jr/c.jalr */ \ + if (do_compress() && (offset == 0 && Rs != x0)) { \ + if (Rd == x1) { \ + c_jalr(Rs); \ + return; \ + } else if (Rd == x0) { \ + c_jr(Rs); \ + return; \ + } \ + } \ + _jalr(Rd, Rs, offset); \ + } + + INSN(jalr); + +#undef INSN + +// -------------------------- +// Miscellaneous Instructions +// -------------------------- +#define INSN(NAME) \ + void NAME() { \ + /* ebreak -> c.ebreak */ \ + if (do_compress()) { \ + c_ebreak(); \ + return; \ + } \ + _ebreak(); \ + } + + INSN(ebreak); + +#undef INSN + +#define INSN(NAME) \ + void NAME() { \ + /* The illegal instruction in RVC is presented by a 16-bit 0. */ \ + if (do_compress()) { \ + emit_int16(0); \ + return; \ + } \ + _halt(); \ + } + + INSN(halt); + +#undef INSN + +// -------------------------- +// Immediate Instructions +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, int64_t imm) { \ + /* li -> c.li */ \ + if (do_compress() && (is_imm_in_range(imm, 6, 0) && Rd != x0)) { \ + c_li(Rd, imm); \ + return; \ + } \ + _li(Rd, imm); \ + } + + INSN(li); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs1, int32_t imm) { \ + /* addi -> c.addi/c.nop/c.mv/c.addi16sp/c.addi4spn */ \ + if (do_compress()) { \ + if (Rd == Rs1 && is_imm_in_range(imm, 6, 0)) { \ + c_addi(Rd, imm); \ + return; \ + } else if (imm == 0 && Rd != x0 && Rs1 != x0) { \ + c_mv(Rd, Rs1); \ + return; \ + } else if (Rs1 == sp && imm != 0) { \ + if (Rd == Rs1 && (imm & 0b1111) == 0x0 && is_imm_in_range(imm, 10, 0)) { \ + c_addi16sp(imm); \ + return; \ + } else if (Rd->is_compressed_valid() && (imm & 0b11) == 0x0 && is_unsigned_imm_in_range(imm, 10, 0)) { \ + c_addi4spn(Rd, imm); \ + return; \ + } \ + } \ + } \ + _addi(Rd, Rs1, imm); \ + } + + INSN(addi); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs1, int32_t imm) { \ + /* addiw -> c.addiw */ \ + if (do_compress() && (Rd == Rs1 && Rd != x0 && is_imm_in_range(imm, 6, 0))) { \ + c_addiw(Rd, imm); \ + return; \ + } \ + _addiw(Rd, Rs1, imm); \ + } + + INSN(addiw); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs1, int32_t imm) { \ + /* and_imm12 -> c.andi */ \ + if (do_compress() && \ + (Rd == Rs1 && Rd->is_compressed_valid() && is_imm_in_range(imm, 6, 0))) { \ + c_andi(Rd, imm); \ + return; \ + } \ + _and_imm12(Rd, Rs1, imm); \ + } + + INSN(and_imm12); + +#undef INSN + +// -------------------------- +// Shift Immediate Instructions +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) { \ + /* slli -> c.slli */ \ + if (do_compress() && (Rd == Rs1 && Rd != x0 && shamt != 0)) { \ + c_slli(Rd, shamt); \ + return; \ + } \ + _slli(Rd, Rs1, shamt); \ + } + + INSN(slli); + +#undef INSN + +// -------------------------- +#define INSN(NAME, C_NAME, NORMAL_NAME) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) { \ + /* srai/srli -> c.srai/c.srli */ \ + if (do_compress() && (Rd == Rs1 && Rd->is_compressed_valid() && shamt != 0)) { \ + C_NAME(Rd, shamt); \ + return; \ + } \ + NORMAL_NAME(Rd, Rs1, shamt); \ + } + + INSN(srai, c_srai, _srai); + INSN(srli, c_srli, _srli); + +#undef INSN + +// -------------------------- +// Upper Immediate Instruction +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, int32_t imm) { \ + /* lui -> c.lui */ \ + if (do_compress() && (Rd != x0 && Rd != x2 && imm != 0 && is_imm_in_range(imm, 18, 0))) { \ + c_lui(Rd, imm); \ + return; \ + } \ + _lui(Rd, imm); \ + } + + INSN(lui); + +#undef INSN + // CSky specific Instruction // load into 2 registers, store 2 registers #define INSN(NAME, op, funct3, funct5) \ void NAME(Register Rd1, Register Rd2, Register Rs, const int32_t offset) { \ - guarantee(offset >= 0 && offset <= 3, "offset is invalid."); \ - unsigned insn = 0; \ + guarantee(offset >= 0 && offset <= 3, "offset is invalid."); \ + unsigned insn = 0; \ int32_t val = offset & 0x3; \ - patch((address)&insn, 6, 0, op); \ - patch_reg((address)&insn, 7, Rd1); \ - patch((address)&insn, 14, 12, funct3); \ - patch_reg((address)&insn, 15, Rs); \ - patch_reg((address)&insn, 20, Rd2); \ - patch((address)&insn, 26, 25, val); \ - patch((address)&insn, 31, 27, funct5); \ - emit(insn); \ + patch((address)&insn, 6, 0, op); \ + patch_reg((address)&insn, 7, Rd1); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 15, Rs); \ + patch_reg((address)&insn, 20, Rd2); \ + patch((address)&insn, 26, 25, val); \ + patch((address)&insn, 31, 27, funct5); \ + emit(insn); \ } INSN(ldd, 0b0001011, 0b100, 0b11111); INSN(lwd, 0b0001011, 0b100, 0b11100); @@ -2274,6 +3375,8 @@ enum Nf { INSN(swd, 0b0001011, 0b101, 0b11100); #undef INSN +// --------------------------------------------------------------------------------------- + void bgt(Register Rs, Register Rt, const address &dest); void ble(Register Rs, Register Rt, const address &dest); void bgtu(Register Rs, Register Rt, const address &dest); @@ -2299,7 +3402,11 @@ enum Nf { void sub(Register Rd, Register Rn, int64_t decrement, Register temp = t0); void subw(Register Rd, Register Rn, int64_t decrement, Register temp = t0); - Assembler(CodeBuffer* code) : AbstractAssembler(code) { + // RVB pseudo instructions + // zero extend word + void zext_w(Register Rd, Register Rs); + + Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) { } virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, @@ -2316,23 +3423,14 @@ enum Nf { return is_imm_in_range(imm, 12, 0); } - // The maximum range of a branch is fixed for the riscv64 - // architecture. + // The maximum range of a branch is fixed for the RISCV architecture. static const unsigned long branch_range = 1 * M; static bool reachable_from_branch_at(address branch, address target) { return uabs(target - branch) < branch_range; } - static Assembler::SEW elemBytes_to_sew(int esize) { - assert(esize > 0 && esize <= 64 && is_power_of_2(esize), "unsupported element size"); - return (Assembler::SEW) exact_log2(esize); - } - virtual ~Assembler() {} - -#undef NORMAL -#undef COMPRESSED }; class BiasedLockingCounters; diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp index 0ac92413aae..f60e0e38ae8 100644 --- a/src/hotspot/cpu/riscv/bytes_riscv.hpp +++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2016 SAP SE. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,8 +27,6 @@ #ifndef CPU_RISCV_BYTES_RISCV_HPP #define CPU_RISCV_BYTES_RISCV_HPP -#include "memory/allocation.hpp" - class Bytes: AllStatic { public: // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp index 96aa18cf6f7..3779514d2fc 100644 --- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,8 +39,7 @@ #define __ ce->masm()-> -void CounterOverflowStub::emit_code(LIR_Assembler* ce) -{ +void CounterOverflowStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); Metadata *m = _method->as_constant_ptr()->as_metadata(); __ mov_metadata(t0, m); @@ -53,21 +52,18 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) } RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) - : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) -{ + : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) { assert(info != NULL, "must have info"); _info = new CodeEmitInfo(info); } RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) - : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) -{ + : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { assert(info != NULL, "must have info"); _info = new CodeEmitInfo(info); } -void RangeCheckStub::emit_code(LIR_Assembler* ce) -{ +void RangeCheckStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); if (_info->deoptimize_on_exception()) { address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); @@ -92,20 +88,18 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) stub_id = Runtime1::throw_range_check_failed_id; } int32_t off = 0; - __ la_patchable(lr, RuntimeAddress(Runtime1::entry_for(stub_id)), off); - __ jalr(lr, lr, off); + __ la_patchable(ra, RuntimeAddress(Runtime1::entry_for(stub_id)), off); + __ jalr(ra, ra, off); ce->add_call_info_here(_info); ce->verify_oop_map(_info); debug_only(__ should_not_reach_here()); } -PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) -{ +PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { _info = new CodeEmitInfo(info); } -void PredicateFailedStub::emit_code(LIR_Assembler* ce) -{ +void PredicateFailedStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); __ far_call(RuntimeAddress(a)); @@ -114,8 +108,7 @@ void PredicateFailedStub::emit_code(LIR_Assembler* ce) debug_only(__ should_not_reach_here()); } -void DivByZeroStub::emit_code(LIR_Assembler* ce) -{ +void DivByZeroStub::emit_code(LIR_Assembler* ce) { if (_offset != -1) { ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); } @@ -129,21 +122,19 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) } // Implementation of NewInstanceStub -NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) -{ +NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) { _result = result; _klass = klass; _klass_reg = klass_reg; _info = new CodeEmitInfo(info); - assert(stub_id == Runtime1::new_instance_id || - stub_id == Runtime1::fast_new_instance_id || + assert(stub_id == Runtime1::new_instance_id || + stub_id == Runtime1::fast_new_instance_id || stub_id == Runtime1::fast_new_instance_init_check_id, "need new_instance id"); _stub_id = stub_id; } -void NewInstanceStub::emit_code(LIR_Assembler* ce) -{ +void NewInstanceStub::emit_code(LIR_Assembler* ce) { assert(__ rsp_offset() == 0, "frame size should be fixed"); __ bind(_entry); __ mv(x13, _klass_reg->as_register()); @@ -155,16 +146,14 @@ void NewInstanceStub::emit_code(LIR_Assembler* ce) } // Implementation of NewTypeArrayStub -NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) -{ +NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { _klass_reg = klass_reg; _length = length; _result = result; _info = new CodeEmitInfo(info); } -void NewTypeArrayStub::emit_code(LIR_Assembler* ce) -{ +void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { assert(__ rsp_offset() == 0, "frame size should be fixed"); __ bind(_entry); assert(_length->as_register() == x9, "length must in x9"); @@ -177,16 +166,14 @@ void NewTypeArrayStub::emit_code(LIR_Assembler* ce) } // Implementation of NewObjectArrayStub -NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) -{ +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { _klass_reg = klass_reg; _result = result; _length = length; _info = new CodeEmitInfo(info); } -void NewObjectArrayStub::emit_code(LIR_Assembler* ce) -{ +void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { assert(__ rsp_offset() == 0, "frame size should be fixed"); __ bind(_entry); assert(_length->as_register() == x9, "length must in x9"); @@ -200,13 +187,11 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce) // Implementation of MonitorAccessStubs MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) -: MonitorAccessStub(obj_reg, lock_reg) -{ +: MonitorAccessStub(obj_reg, lock_reg) { _info = new CodeEmitInfo(info); } -void MonitorEnterStub::emit_code(LIR_Assembler* ce) -{ +void MonitorEnterStub::emit_code(LIR_Assembler* ce) { assert(__ rsp_offset() == 0, "frame size should be fixed"); __ bind(_entry); ce->store_parameter(_obj_reg->as_register(), 1); @@ -223,8 +208,7 @@ void MonitorEnterStub::emit_code(LIR_Assembler* ce) __ j(_continuation); } -void MonitorExitStub::emit_code(LIR_Assembler* ce) -{ +void MonitorExitStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); if (_compute_lock) { // lock_reg was destroyed by fast unlocking attempt => recompute it @@ -238,7 +222,7 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) } else { exit_id = Runtime1::monitorexit_nofpu_id; } - __ la(lr, _continuation); + __ la(ra, _continuation); __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id))); } @@ -247,18 +231,23 @@ void LoadKlassStub::emit_code(LIR_Assembler* ce) { Unimplemented(); } +// Implementation of patching: +// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) +// - Replace original code with a call to the stub +// At Runtime: +// - call to stub, jump to runtime +// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object) +// - in runtime: after initializing class, restore original code, reexecute instruction + int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; void PatchingStub::align_patch_site(MacroAssembler* masm) {} -// RISCV64 don't use C1 runtime patching. When need patch, just deoptimize. -void PatchingStub::emit_code(LIR_Assembler* ce) -{ - assert(false, "RISCV64 should not use C1 runtime patching"); +void PatchingStub::emit_code(LIR_Assembler* ce) { + assert(false, "RISCV should not use C1 runtime patching"); } -void DeoptimizeStub::emit_code(LIR_Assembler* ce) -{ +void DeoptimizeStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); ce->store_parameter(_trap_request, 0); __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id))); @@ -266,8 +255,7 @@ void DeoptimizeStub::emit_code(LIR_Assembler* ce) DEBUG_ONLY(__ should_not_reach_here()); } -void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) -{ +void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { address a = NULL; if (_info->deoptimize_on_exception()) { // Deoptimize, do not throw the exception, because it is probably wrong to do it here. @@ -284,8 +272,7 @@ void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) debug_only(__ should_not_reach_here()); } -void SimpleExceptionStub::emit_code(LIR_Assembler* ce) -{ +void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { assert(__ rsp_offset() == 0, "frame size should be fixed"); __ bind(_entry); @@ -299,14 +286,12 @@ void SimpleExceptionStub::emit_code(LIR_Assembler* ce) debug_only(__ should_not_reach_here()); } -void ArrayCopyStub::emit_code(LIR_Assembler* ce) -{ +void ArrayCopyStub::emit_code(LIR_Assembler* ce) { // ---------------slow case: call to native----------------- __ bind(_entry); // Figure out where the args should go // This should really convert the IntrinsicID to the Method* and signature // but I don't know how to do that. - // const int args_num = 5; VMRegPair args[args_num]; BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT }; @@ -314,12 +299,11 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) // push parameters Register r[args_num]; - int i = 0; - r[i++] = src()->as_register(); - r[i++] = src_pos()->as_register(); - r[i++] = dst()->as_register(); - r[i++] = dst_pos()->as_register(); - r[i++] = length()->as_register(); + r[0] = src()->as_register(); + r[1] = src_pos()->as_register(); + r[2] = dst()->as_register(); + r[3] = dst_pos()->as_register(); + r[4] = length()->as_register(); // next registers will get stored on the stack for (int j = 0; j < args_num; j++) { @@ -328,7 +312,7 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) int st_off = r_1->reg2stack() * wordSize; __ sd(r[j], Address(sp, st_off)); } else { - assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg "); + assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg"); } } @@ -348,8 +332,10 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) ce->add_call_info_here(info()); #ifndef PRODUCT - __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); - __ add_memory_int32(Address(t1), 1); + if (PrintC1Statistics) { + __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); + __ add_memory_int32(Address(t1), 1); + } #endif __ j(_continuation); diff --git a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp index 550d6be22e1..4417ad63091 100644 --- a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp +++ b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp @@ -1,7 +1,6 @@ /* * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -77,7 +76,7 @@ enum { // Encoding of float value in debug info. This is true on x86 where // floats are extended to doubles when stored in the stack, false for -// RISCV64 where floats and doubles are stored in their native form. +// RISCV where floats and doubles are stored in their native form. enum { pd_float_saved_as_double = false }; diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp index 657cf025d18..e3a2606c532 100644 --- a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp @@ -1,7 +1,6 @@ /* * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,4 +27,4 @@ // FpuStackSim //-------------------------------------------------------- -// No FPU stack on RISCV64 +// No FPU stack on RISCV diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp index 5686156edcc..7bc3d311501 100644 --- a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp @@ -1,7 +1,6 @@ /* * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,7 +26,7 @@ #ifndef CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP #define CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP -// No FPU stack on RISCV64 +// No FPU stack on RISCV class FpuStackSim; #endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp index e7de3f39f2d..682ebe82627 100644 --- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp @@ -1,7 +1,6 @@ /* * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,8 +29,7 @@ #include "runtime/sharedRuntime.hpp" #include "vmreg_riscv.inline.hpp" -LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) -{ +LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { LIR_Opr opr = LIR_OprFact::illegalOpr; VMReg r_1 = reg->first(); VMReg r_2 = reg->second(); @@ -231,7 +229,7 @@ void FrameMap::initialize() { // special register map_register(i, x0); zr_opr = LIR_OprFact::single_cpu(i); i++; // zr - map_register(i, x1); r1_opr = LIR_OprFact::single_cpu(i); i++; // lr + map_register(i, x1); r1_opr = LIR_OprFact::single_cpu(i); i++; // ra map_register(i, x2); r2_opr = LIR_OprFact::single_cpu(i); i++; // sp map_register(i, x3); r3_opr = LIR_OprFact::single_cpu(i); i++; // gp map_register(i, x4); r4_opr = LIR_OprFact::single_cpu(i); i++; // thread @@ -331,7 +329,7 @@ Address FrameMap::make_new_address(ByteSize sp_offset) const { // ----------------mapping----------------------- -// all mapping is based on rfp addressing, except for simple leaf methods where we access +// all mapping is based on fp addressing, except for simple leaf methods where we access // the locals sp based (and no frame is built) @@ -352,7 +350,7 @@ Address FrameMap::make_new_address(ByteSize sp_offset) const { // +----------+ // | ret addr | // +----------+ -// | args | <- RFP +// | args | <- FP // | .........| @@ -376,14 +374,13 @@ VMReg FrameMap::fpu_regname (int n) { return as_FloatRegister(n)->as_VMReg(); } -LIR_Opr FrameMap::stack_pointer() -{ +LIR_Opr FrameMap::stack_pointer() { return FrameMap::sp_opr; } // JSR 292 LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { - return LIR_OprFact::illegalOpr; // Not needed on riscv64 + return LIR_OprFact::illegalOpr; // Not needed on riscv } bool FrameMap::validate_frame() { diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp index b1ff1afb660..01281f5c9e1 100644 --- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp @@ -1,7 +1,6 @@ /* * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,7 +26,7 @@ #ifndef CPU_RISCV_C1_FRAMEMAP_RISCV_HPP #define CPU_RISCV_C1_FRAMEMAP_RISCV_HPP -// On RISCV64 the frame looks as follows: +// On RISCV the frame looks as follows: // // +-----------------------------+---------+----------------------------------------+----------------+----------- // | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling . diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp index 5c1ca282e04..2a99d49c94b 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp @@ -1,7 +1,6 @@ /* * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -65,8 +64,7 @@ void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, if (is_imm_in_range(c - 1, 12, 0)) { __ andi(t1, t1, c - 1); } else { - __ slli(t1, t1, registerSize - shift); - __ srli(t1, t1, registerSize - shift); + __ zero_extend(t1, t1, shift); } __ subw(dreg, t1, t0); } @@ -80,8 +78,7 @@ void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, if (is_imm_in_range(c - 1, 12, 0)) { __ andi(t0, t0, c - 1); } else { - __ slli(t0, t0, registerSize - shift); - __ srli(t0, t0, registerSize - shift); + __ zero_extend(t0, t0, shift); } __ addw(dreg, t0, lreg); __ sraiw(dreg, dreg, shift); @@ -193,39 +190,37 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c); break; case lir_div: - assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); + assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); if (c == 1) { // move lreg_lo to dreg if divisor is 1 __ mv(dreg, lreg_lo); } else { - unsigned int shift = exact_log2(c); + unsigned int shift = exact_log2_long(c); // use t0 as intermediate result register __ srai(t0, lreg_lo, 0x3f); if (is_imm_in_range(c - 1, 12, 0)) { __ andi(t0, t0, c - 1); } else { - __ slli(t0, t0, registerSize - shift); - __ srli(t0, t0, registerSize - shift); + __ zero_extend(t0, t0, shift); } __ add(dreg, t0, lreg_lo); __ srai(dreg, dreg, shift); } break; case lir_rem: - assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); + assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); if (c == 1) { // move 0 to dreg if divisor is 1 __ mv(dreg, zr); } else { - unsigned int shift = exact_log2(c); + unsigned int shift = exact_log2_long(c); __ srai(t0, lreg_lo, 0x3f); __ srli(t0, t0, BitsPerLong - shift); __ add(t1, lreg_lo, t0); if (is_imm_in_range(c - 1, 12, 0)) { __ andi(t1, t1, c - 1); } else { - __ slli(t1, t1, registerSize - shift); - __ srli(t1, t1, registerSize - shift); + __ zero_extend(t1, t1, shift); } __ sub(dreg, t1, t0); } @@ -243,9 +238,9 @@ void LIR_Assembler::arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig switch (code) { case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; - case lir_mul_strictfp: // fall through + case lir_mul_strictfp: // fall through case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; - case lir_div_strictfp: // fall through + case lir_div_strictfp: // fall through case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; default: ShouldNotReachHere(); @@ -258,9 +253,9 @@ void LIR_Assembler::arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig switch (code) { case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; - case lir_mul_strictfp: // fall through + case lir_mul_strictfp: // fall through case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; - case lir_div_strictfp: // fall through + case lir_div_strictfp: // fall through case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; default: ShouldNotReachHere(); diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp index b7a2cbf2912..ab0a9963fc1 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -23,6 +22,7 @@ * questions. * */ + #ifndef CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP #define CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP @@ -32,4 +32,6 @@ void arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); void arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); void arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right, Register lreg, Register dreg); + void arithmetic_idiv(LIR_Op3* op, bool is_irem); + #endif // CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp index 7502fc2d161..b7f53e395f3 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp @@ -1,7 +1,6 @@ /* * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -350,12 +349,10 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { void LIR_Assembler::arraycopy_prepare_params(Register src, Register src_pos, Register length, Register dst, Register dst_pos, BasicType basic_type) { int scale = array_element_size(basic_type); - __ slli(t0, src_pos, scale); - __ add(c_rarg0, src, t0); + __ shadd(c_rarg0, src_pos, src, t0, scale); __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type)); assert_different_registers(c_rarg0, dst, dst_pos, length); - __ slli(t0, dst_pos, scale); - __ add(c_rarg1, dst, t0); + __ shadd(c_rarg1, dst_pos, dst, t0, scale); __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type)); assert_different_registers(c_rarg1, dst, length); __ mv(c_rarg2, length); diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp index 1b9990e20c7..06a0f248ca6 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -26,6 +25,7 @@ #ifndef CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP #define CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP + // arraycopy sub functions void generic_arraycopy(Register src, Register src_pos, Register length, Register dst, Register dst_pos, CodeStub *stub); @@ -48,4 +48,5 @@ Register dst, Register dst_pos); void arraycopy_load_args(Register src, Register src_pos, Register length, Register dst, Register dst_pos); + #endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp index ce1dc4fc456..97e87aa85c5 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,8 +36,6 @@ #include "ci/ciArrayKlass.hpp" #include "ci/ciInstance.hpp" #include "code/compiledIC.hpp" -#include "gc/shared/barrierSet.hpp" -#include "gc/shared/cardTableBarrierSet.hpp" #include "gc/shared/collectedHeap.hpp" #include "nativeInst_riscv.hpp" #include "oops/objArrayKlass.hpp" @@ -100,25 +98,11 @@ LIR_Opr LIR_Assembler::osrBufferPointer() { return FrameMap::as_pointer_opr(receiverOpr()->as_register()); } -//--------------fpu register translations----------------------- -void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } - -void LIR_Assembler::reset_FPU() { Unimplemented(); } - -void LIR_Assembler::fpop() { Unimplemented(); } - -void LIR_Assembler::fxch(int i) { Unimplemented(); } - -void LIR_Assembler::fld(int i) { Unimplemented(); } - -void LIR_Assembler::ffree(int i) { Unimplemented(); } - void LIR_Assembler::breakpoint() { Unimplemented(); } void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } -//------------------------------------------- static jlong as_long(LIR_Opr data) { jlong result; @@ -136,6 +120,43 @@ static jlong as_long(LIR_Opr data) { return result; } +Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) { + if (addr->base()->is_illegal()) { + assert(addr->index()->is_illegal(), "must be illegal too"); + __ movptr(tmp, addr->disp()); + return Address(tmp, 0); + } + + Register base = addr->base()->as_pointer_register(); + LIR_Opr index_opr = addr->index(); + + if (index_opr->is_illegal()) { + return Address(base, addr->disp()); + } + + int scale = addr->scale(); + if (index_opr->is_cpu_register()) { + Register index; + if (index_opr->is_single_cpu()) { + index = index_opr->as_register(); + } else { + index = index_opr->as_register_lo(); + } + if (scale != 0) { + __ shadd(tmp, index, base, tmp, scale); + } else { + __ add(tmp, base, index); + } + return Address(tmp, addr->disp()); + } else if (index_opr->is_constant()) { + intptr_t addr_offset = (((intptr_t)index_opr->as_constant_ptr()->as_jint()) << scale) + addr->disp(); + return Address(base, addr_offset); + } + + Unimplemented(); + return Address(); +} + Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { ShouldNotReachHere(); return Address(); @@ -640,8 +661,7 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool po } } -void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, - bool pop_fpu_stack, bool wide, bool /* unaligned */) { +void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { LIR_Address* to_addr = dest->as_address_ptr(); // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src Register compressed_src = t1; @@ -763,8 +783,7 @@ void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { reg2stack(temp, dest, dest->type(), false); } -void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, - bool wide, bool /* unaligned */) { +void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) { assert(src->is_address(), "should not call otherwise"); assert(dest->is_register(), "should not call otherwise"); @@ -809,6 +828,9 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch __ ld(dest->as_register(), as_Address(from_addr)); break; case T_ADDRESS: + // FIXME: OMG this is a horrible kludge. Any offset from an + // address that matches klass_offset_in_bytes() will be loaded + // as a word, not a long. if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { __ lwu(dest->as_register(), as_Address(from_addr)); } else { @@ -960,13 +982,13 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { case Bytecodes::_d2f: __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg()); break; case Bytecodes::_i2c: - __ zero_ext(dest->as_register(), src->as_register(), registerSize - 16); break; // 16: char size + __ zero_extend(dest->as_register(), src->as_register(), 16); break; case Bytecodes::_i2l: __ addw(dest->as_register_lo(), src->as_register(), zr); break; case Bytecodes::_i2s: - __ sign_ext(dest->as_register(), src->as_register(), registerSize - 16); break; // 16: short size + __ sign_extend(dest->as_register(), src->as_register(), 16); break; case Bytecodes::_i2b: - __ sign_ext(dest->as_register(), src->as_register(), registerSize - 8); break; // 8: byte size + __ sign_extend(dest->as_register(), src->as_register(), 8); break; case Bytecodes::_l2i: _masm->block_comment("FIXME: This coulde be no-op"); __ addw(dest->as_register(), src->as_register_lo(), zr); break; @@ -1331,7 +1353,12 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op } } -void LIR_Assembler::align_call(LIR_Code code) { } +void LIR_Assembler::align_call(LIR_Code code) { + // With RVC a call instruction may get 2-byte aligned. + // The address of the call instruction needs to be 4-byte aligned to + // ensure that it does not span a cache line so that it can be patched. + __ align(4); +} void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { address call = __ trampoline_call(Address(op->addr(), rtype)); @@ -1351,10 +1378,14 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { add_call_info(code_offset(), op->info()); } -void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ShouldNotReachHere(); } +/* Currently, vtable-dispatch is only enabled for sparc platforms */ +void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { + ShouldNotReachHere(); +} void LIR_Assembler::emit_static_call_stub() { address call_pc = __ pc(); + assert((__ offset() % 4) == 0, "bad alignment"); address stub = __ start_a_stub(call_stub_size()); if (stub == NULL) { bailout("static call stub overflow"); @@ -1366,7 +1397,8 @@ void LIR_Assembler::emit_static_call_stub() { __ relocate(static_stub_Relocation::spec(call_pc)); __ emit_static_call_stub(); - assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(), "stub too big"); + assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() + <= call_stub_size(), "stub too big"); __ end_a_stub(); } @@ -1668,8 +1700,7 @@ void LIR_Assembler::check_no_conflict(ciKlass* exact_klass, intptr_t current_kla } #endif // first time here. Set profile type. - // TODO: Fix this typo. See JDK-8267625. - __ ld(tmp, mdo_addr); + __ sd(tmp, mdo_addr); } else { assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); @@ -1774,30 +1805,33 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) { void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { - if (patch_code != lir_patch_none) { +#if INCLUDE_SHENANDOAHGC + if (UseShenandoahGC && patch_code != lir_patch_none) { deoptimize_trap(info); return; } +#endif + assert(patch_code == lir_patch_none, "Patch code not supported"); LIR_Address* adr = addr->as_address_ptr(); Register dst = dest->as_register_lo(); assert_different_registers(dst, t0); - if(adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) { - + if (adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) { + int scale = adr->scale(); intptr_t offset = adr->disp(); LIR_Opr index_op = adr->index(); - int scale = adr->scale(); - if(index_op->is_constant()) { + if (index_op->is_constant()) { offset += ((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale; } - if(!is_imm_in_range(offset, 12, 0)) { + if (!is_imm_in_range(offset, 12, 0)) { __ la(t0, as_Address(adr)); __ mv(dst, t0); return; } } + __ la(dst, as_Address(adr)); } @@ -1817,13 +1851,11 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* arg if (info != NULL) { add_call_info_here(info); } - __ ifence(); } void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { if (dest->is_address() || src->is_address()) { - move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, - /* unaligned */ false, /* wide */ false); + move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/ false, /* wide */ false); } else { ShouldNotReachHere(); } @@ -1950,42 +1982,6 @@ int LIR_Assembler::array_element_size(BasicType type) const { return exact_log2(elem_size); } -Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) { - if (addr->base()->is_illegal()) { - assert(addr->index()->is_illegal(), "must be illegal too"); - __ movptr(tmp, addr->disp()); - return Address(tmp, 0); - } - - Register base = addr->base()->as_pointer_register(); - LIR_Opr index_op = addr->index(); - int scale = addr->scale(); - - if (index_op->is_illegal()) { - return Address(base, addr->disp()); - } else if (index_op->is_cpu_register()) { - Register index; - if (index_op->is_single_cpu()) { - index = index_op->as_register(); - } else { - index = index_op->as_register_lo(); - } - if (scale != 0) { - __ slli(tmp, index, scale); - __ add(tmp, base, tmp); - } else { - __ add(tmp, base, index); - } - return Address(tmp, addr->disp()); - } else if (index_op->is_constant()) { - intptr_t addr_offset = (((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale) + addr->disp(); - return Address(base, addr_offset); - } - - Unimplemented(); - return Address(); -} - // helper functions which checks for overflow and sets bailout if it // occurs. Always returns a valid embeddable pointer but in the // bailout case the pointer won't be to unique storage. @@ -2019,6 +2015,18 @@ address LIR_Assembler::int_constant(jlong n) { } } +void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } + +void LIR_Assembler::reset_FPU() { Unimplemented(); } + +void LIR_Assembler::fpop() { Unimplemented(); } + +void LIR_Assembler::fxch(int i) { Unimplemented(); } + +void LIR_Assembler::fld(int i) { Unimplemented(); } + +void LIR_Assembler::ffree(int i) { Unimplemented(); } + void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */, Assembler::rl /* release */, t0, true /* result as bool */); diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp index a8d58d7c7a2..5c81f1c704c 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -68,17 +68,16 @@ friend class ArrayCopyStub; void deoptimize_trap(CodeEmitInfo *info); - enum - { - // see emit_static_call_stub for detail: + enum { + // See emit_static_call_stub for detail // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address) _call_stub_size = 14 * NativeInstruction::instruction_size + (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size), _call_aot_stub_size = 0, - // see emit_exception_handler for detail: + // See emit_exception_handler for detail // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY) _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller - // see emit_deopt_handler for detail + // See emit_deopt_handler for detail // auipc (1) + far_jump (6 or 2) _deopt_handler_size = 1 * NativeInstruction::instruction_size + 6 * NativeInstruction::instruction_size // or smaller diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp index 58aeec448b8..c41819fc2ae 100644 --- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -142,7 +142,6 @@ bool LIRGenerator::can_inline_as_constant(Value v) const { return false; } - bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { if (c->as_constant() != NULL) { long constant = 0; @@ -158,7 +157,6 @@ bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; } - LIR_Opr LIRGenerator::safepoint_poll_register() { return LIR_OprFact::illegalOpr; } @@ -166,6 +164,7 @@ LIR_Opr LIRGenerator::safepoint_poll_register() { LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, int shift, int disp, BasicType type) { assert(base->is_register(), "must be"); + if (index->is_constant()) { LIR_Const *constant = index->as_constant_ptr(); jlong c; @@ -182,9 +181,9 @@ LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, __ move(index, tmp); return new LIR_Address(base, tmp, type); } - } else { - return new LIR_Address(base, index, (LIR_Address::Scale)shift, disp, type); } + + return new LIR_Address(base, index, (LIR_Address::Scale)shift, disp, type); } LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, @@ -192,28 +191,23 @@ LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_o int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); int elem_size = type2aelembytes(type); int shift = exact_log2(elem_size); - - LIR_Address* addr = NULL; - if (index_opr->is_constant()) { - addr = new LIR_Address(array_opr, offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type); - } else { - if (index_opr->type() == T_INT) { - LIR_Opr tmp = new_register(T_LONG); - __ convert(Bytecodes::_i2l, index_opr, tmp); - index_opr = tmp; - } - addr = new LIR_Address(array_opr, index_opr, LIR_Address::scale(type), offset_in_bytes, type); - } - return addr; + return generate_address(array_opr, index_opr, shift, offset_in_bytes, type); } LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { + LIR_Opr r; switch (type) { - case T_LONG: return LIR_OprFact::longConst(x); - case T_INT: return LIR_OprFact::intConst(x); - default: ShouldNotReachHere(); + case T_LONG: + r = LIR_OprFact::longConst(x); + break; + case T_INT: + r = LIR_OprFact::intConst(x); + break; + default: + ShouldNotReachHere(); + r = NULL; } - return NULL; + return r; } void LIRGenerator::increment_counter(address counter, BasicType type, int step) { @@ -283,10 +277,10 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { // "lock" stores the address of the monitor stack slot, so this is not an oop LIR_Opr lock = new_register(T_INT); - // Need a tmp register for biased locking - LIR_Opr tmp = LIR_OprFact::illegalOpr; + // Need a scratch register for biased locking + LIR_Opr scratch = LIR_OprFact::illegalOpr; if (UseBiasedLocking) { - tmp = new_register(T_INT); + scratch = new_register(T_INT); } CodeEmitInfo* info_for_exception = NULL; @@ -296,7 +290,7 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { // this CodeEmitInfo must not have the xhandlers because here the // object is already locked (xhandlers expect object to be unlocked) CodeEmitInfo* info = state_for(x, x->state(), true); - monitor_enter(obj.result(), lock, syncTempOpr(), tmp, + monitor_enter(obj.result(), lock, syncTempOpr(), scratch, x->monitor_no(), info_for_exception, info); } @@ -380,7 +374,7 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { // missing test if instr is commutative and if we should swap - LIRItem left(x->x(), this); + LIRItem left(x->x(), this); LIRItem right(x->y(), this); if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { @@ -393,7 +387,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { // no need to do div-by-zero check if the divisor is a non-zero constant if (c != 0) { need_zero_check = false; } // do not load right if the divisor is a power-of-2 constant - if (c > 0 && is_power_of_2(c)) { + if (c > 0 && is_power_of_2_long(c)) { right.dont_load_item(); } else { right.load_item(); @@ -404,7 +398,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { if (need_zero_check) { CodeEmitInfo* info = state_for(x); __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); - __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info)); + __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info)); } rlock_result(x); @@ -561,7 +555,7 @@ void LIRGenerator::do_LogicOp(LogicOp* x) { left.load_item(); rlock_result(x); ValueTag tag = right.type()->tag(); - if(right.is_constant() && + if (right.is_constant() && ((tag == longTag && Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) || (tag == intTag && Assembler::operand_valid_for_add_immediate(right.get_jint_constant())))) { right.dont_load_item(); @@ -663,14 +657,22 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { value.load_item(); LIR_Opr dst = rlock_result(x); - if (x->id() == vmIntrinsics::_dsqrt) { - __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); - } else { // vmIntrinsics::_dabs - __ abs(value.result(), dst, LIR_OprFact::illegalOpr); + switch (x->id()) { + case vmIntrinsics::_dsqrt: { + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + case vmIntrinsics::_dabs: { + __ abs(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + default: + ShouldNotReachHere(); } break; } - default: ShouldNotReachHere(); + default: + ShouldNotReachHere(); } } @@ -1088,9 +1090,5 @@ void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, CodeEmitInfo* info) { - if (!UseBarriersForVolatile) { - __ membar(); - } - __ volatile_load_mem_reg(address, result, info); } diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp index 3d0b6b063c2..78a61128bdd 100644 --- a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,5 +29,5 @@ #include "utilities/bitMap.inline.hpp" void LinearScan::allocate_fpu_stack() { - // No FPU stack on RISCV64 + // No FPU stack on RISCV } diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp index bec623f2ec6..d7ca7b0fd05 100644 --- a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,7 +36,6 @@ inline int LinearScan::num_physical_regs(BasicType type) { return 1; } - inline bool LinearScan::requires_adjacent_regs(BasicType type) { return false; } @@ -58,8 +57,8 @@ inline bool LinearScan::is_caller_save(int assigned_reg) { return false; } - inline void LinearScan::pd_add_temps(LIR_Op* op) { + // No special case behaviours yet } @@ -81,5 +80,4 @@ inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) return false; } - #endif // CPU_RISCV_C1_LINEARSCAN_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp index 110118caab3..99d981f97f4 100644 --- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,7 +33,6 @@ #include "gc/shared/collectedHeap.hpp" #include "interpreter/interpreter.hpp" #include "oops/arrayOop.hpp" -#include "oops/markOop.hpp" #include "runtime/basicLock.hpp" #include "runtime/biasedLocking.hpp" #include "runtime/os.hpp" @@ -51,7 +50,7 @@ void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result, } } -int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case) { +int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { const int aligned_mask = BytesPerWord - 1; const int hdr_offset = oopDesc::mark_offset_in_bytes(); assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); @@ -64,8 +63,8 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); if (UseBiasedLocking) { - assert(tmp != noreg, "should have tmp register at this point"); - null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, tmp, false, done, &slow_case); + assert(scratch != noreg, "should have scratch register at this point"); + null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); } else { null_check_offset = offset(); } @@ -182,7 +181,7 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register } // preserves obj, destroys len_in_bytes -void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1) { +void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp) { assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); Label done; @@ -194,7 +193,7 @@ void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int if (hdr_size_in_bytes) { add(obj, obj, hdr_size_in_bytes); } - zero_memory(obj, len_in_bytes, tmp1); + zero_memory(obj, len_in_bytes, tmp); if (hdr_size_in_bytes) { sub(obj, obj, hdr_size_in_bytes); } @@ -288,8 +287,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register tmp1 const Register arr_size = tmp2; // okay to be the same // align object end mv(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask); - slli(t0, len, f); - add(arr_size, arr_size, t0); + shadd(arr_size, len, arr_size, t0, f); andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask); try_allocate(obj, arr_size, 0, tmp1, tmp2, slow_case); @@ -320,14 +318,14 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache, L void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { // If we have to make this method not-entrant we'll overwrite its - // first instruction with a jump. For this action to be legal we + // first instruction with a jump. For this action to be legal we // must ensure that this first instruction is a J, JAL or NOP. // Make it a NOP. nop(); + assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); // Make sure there is enough stack space for this method's activation. - // Note that we do this before doing an enter(). - + // Note that we do this before creating a frame. generate_stack_overflow_check(bang_size_in_bytes); MacroAssembler::build_frame(framesize); } @@ -338,21 +336,15 @@ void C1_MacroAssembler::remove_frame(int framesize) { void C1_MacroAssembler::verified_entry() { - // If we have to make this method not-entrant we'll overwrite its - // first instruction with a jump. For this action to be legal we - // must ensure that this first instruction is a J, JAL or NOP. - // Make it a NOP. - - nop(); } void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { - // fp + 0: link - // + 1: return address - // + 2: argument with offset 0 - // + 3: argument with offset 1 - // + 4: ... - ld(reg, Address(fp, (offset_in_words + 2) * BytesPerWord)); + // fp + -2: link + // + -1: return address + // + 0: argument with offset 0 + // + 1: argument with offset 1 + // + 2: ... + ld(reg, Address(fp, offset_in_words * BytesPerWord)); } #ifndef PRODUCT @@ -431,9 +423,9 @@ void C1_MacroAssembler::c1_cmp_branch(int cmpFlag, Register op1, Register op2, L if (type == T_OBJECT || type == T_ARRAY) { assert(cmpFlag == lir_cond_equal || cmpFlag == lir_cond_notEqual, "Should be equal or notEqual"); if (cmpFlag == lir_cond_equal) { - oop_beq(op1, op2, label, is_far); + beq(op1, op2, label, is_far); } else { - oop_bne(op1, op2, label, is_far); + bne(op1, op2, label, is_far); } } else { assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(c1_cond_branch) / sizeof(c1_cond_branch[0])), diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp index 4e4368bf6c2..1950cee5dd5 100644 --- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -42,14 +42,14 @@ using MacroAssembler::null_check; void try_allocate( Register obj, // result: pointer to object after successful allocation Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise - int con_size_in_bytes, // object size in bytes if known at compile time + int con_size_in_bytes, // object size in bytes if known at compile time Register tmp1, // temp register Register tmp2, // temp register Label& slow_case // continuation point if fast allocation fails ); void initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2); - void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1); + void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp); void float_cmp(bool is_float, int unordered_result, FloatRegister f0, FloatRegister f1, @@ -59,9 +59,9 @@ using MacroAssembler::null_check; // hdr : must be x10, contents destroyed // obj : must point to the object to lock, contents preserved // disp_hdr: must point to the displaced header location, contents preserved - // tmp : temporary register, contents destroyed + // scratch : scratch register, contents destroyed // returns code offset at which to add null check debug information - int lock_object (Register swap, Register obj, Register disp_hdr, Register tmp, Label& slow_case); + int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); // unlocking // hdr : contents destroyed diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp index 8d8db47b71a..329df2e1ca7 100644 --- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -83,7 +83,6 @@ int StubAssembler::call_RT(Register oop_result, Register metadata_result, addres pop_reg(x10, sp); #endif reset_last_Java_frame(true); - ifence(); // check for pending exceptions { Label L; @@ -227,11 +226,11 @@ const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2; // enum reg_save_layout { - reg_save_frame_size = 32 /* float */ + 32 /* integer */ + reg_save_frame_size = 32 /* float */ + 30 /* integer excluding x3, x4 */ }; // Save off registers which might be killed by calls into the runtime. -// Tries to smart of about FP registers. In particular we separate +// Tries to smart of about FPU registers. In particular we separate // saving and describing the FPU registers for deoptimization since we // have to save the FPU registers twice if we describe them. The // deopt blob is the only thing which needs to describe FPU registers. @@ -248,11 +247,12 @@ static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { OopMap* oop_map = new OopMap(frame_size_in_slots, 0); assert_cond(oop_map != NULL); - // cpu_regs, caller save registers only, see FrameMap::initialize - // in c1_FrameMap_riscv64.cpp for detail. - const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = {x7, x10, x11, x12, - x13, x14, x15, x16, x17, - x28, x29, x30, x31}; + // caller save registers only, see FrameMap::initialize + // in c1_FrameMap_riscv.cpp for detail. + const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = { + x7, x10, x11, x12, x13, x14, x15, x16, x17, x28, x29, x30, x31 + }; + for (int i = 0; i < FrameMap::max_nof_caller_save_cpu_regs; i++) { Register r = caller_save_cpu_regs[i]; int sp_offset = cpu_reg_save_offsets[r->encoding()]; @@ -276,8 +276,8 @@ static OopMap* save_live_registers(StubAssembler* sasm, bool save_fpu_registers = true) { __ block_comment("save_live_registers"); - // if the number of pushed regs is odd, zr will be added - __ push_reg(RegSet::range(x3, x31), sp); // integer registers except ra(x1) & sp(x2) + // if the number of pushed regs is odd, one slot will be reserved for alignment + __ push_reg(RegSet::range(x5, x31), sp); // integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4) if (save_fpu_registers) { // float registers @@ -286,7 +286,7 @@ static OopMap* save_live_registers(StubAssembler* sasm, __ fsd(as_FloatRegister(i), Address(sp, i * wordSize)); } } else { - // we define reg_save_layout = 64 as the fixed frame size, + // we define reg_save_layout = 62 as the fixed frame size, // we should also sub 32 * wordSize to sp when save_fpu_registers == false __ addi(sp, sp, -32 * wordSize); } @@ -306,8 +306,8 @@ static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registe __ addi(sp, sp, 32 * wordSize); } - // if the number of popped regs is odd, zr will be added - __ pop_reg(RegSet::range(x3, x31), sp); // integer registers except ra(x1) & sp(x2) + // if the number of popped regs is odd, the reserved slot for alignment will be removed + __ pop_reg(RegSet::range(x5, x31), sp); // integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4) } static void restore_live_registers_except_r10(StubAssembler* sasm, bool restore_fpu_registers = true) { @@ -322,10 +322,10 @@ static void restore_live_registers_except_r10(StubAssembler* sasm, bool restore_ __ addi(sp, sp, 32 * wordSize); } - // if the number of popped regs is odd, zr will be added - // integer registers except ra(x1) & sp(x2) & x10 - __ pop_reg(RegSet::range(x3, x9), sp); // pop zr, x3 ~ x9 - __ pop_reg(RegSet::range(x11, x31), sp); // pop x10 ~ x31, x10 will be loaded to zr + // pop integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4) & x10 + // there is one reserved slot for alignment on the stack in save_live_registers(). + __ pop_reg(RegSet::range(x5, x9), sp); // pop x5 ~ x9 with the reserved slot for alignment + __ pop_reg(RegSet::range(x11, x31), sp); // pop x11 ~ x31; x10 will be automatically skipped here } void Runtime1::initialize_pd() { @@ -339,11 +339,10 @@ void Runtime1::initialize_pd() { sp_offset += step; } - // we save x0, x3 ~ x31, except x1, x2 - cpu_reg_save_offsets[0] = sp_offset; + // a slot reserved for stack 16-byte alignment, see MacroAssembler::push_reg sp_offset += step; - // 3: loop starts from x3 - for (i = 3; i < FrameMap::nof_cpu_regs; i++) { + // we save x5 ~ x31, except x0 ~ x4: loop starts from x5 + for (i = 5; i < FrameMap::nof_cpu_regs; i++) { cpu_reg_save_offsets[i] = sp_offset; sp_offset += step; } @@ -397,7 +396,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { __ sd(zr, Address(xthread, Thread::pending_exception_offset())); // load issuing PC (the return address for this stub) into x13 - __ ld(exception_pc, Address(fp, 1 * BytesPerWord)); + __ ld(exception_pc, Address(fp, frame::return_addr_offset * BytesPerWord)); // make sure that the vm_results are cleared (may be unnecessary) __ sd(zr, Address(xthread, JavaThread::vm_result_offset())); @@ -410,7 +409,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { break; case handle_exception_from_callee_id: { // At this point all registers except exception oop (x10) and - // exception pc (lr) are dead. + // exception pc (ra) are dead. const int frame_size = 2 /* fp, return address */; oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0); sasm->set_frame_size(frame_size); @@ -448,7 +447,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { __ sd(exception_pc, Address(xthread, JavaThread::exception_pc_offset())); // patch throwing pc into return address (has bci & oop map) - __ sd(exception_pc, Address(fp, 1 * BytesPerWord)); + __ sd(exception_pc, Address(fp, frame::return_addr_offset * BytesPerWord)); // compute the exception handler. // the exception oop and the throwing pc are read from the fields in JavaThread @@ -464,7 +463,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { __ invalidate_registers(false, true, true, true, true, true); // patch the return address, this stub will directly return to the exception handler - __ sd(x10, Address(fp, 1 * BytesPerWord)); + __ sd(x10, Address(fp, frame::return_addr_offset * BytesPerWord)); switch (id) { case forward_exception_id: @@ -478,7 +477,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { __ leave(); __ ret(); // jump to exception handler break; - default: ShouldNotReachHere(); + default: ShouldNotReachHere(); } return oop_maps; @@ -514,10 +513,10 @@ void Runtime1::generate_unwind_exception(StubAssembler *sasm) { // save exception_oop __ addi(sp, sp, -2 * wordSize); __ sd(exception_oop, Address(sp, wordSize)); - __ sd(lr, Address(sp)); + __ sd(ra, Address(sp)); // search the exception handler address of the caller (using the return address) - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), xthread, lr); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), xthread, ra); // x10: exception handler address of the caller // Only x10 is valid at this time; all other registers have been @@ -528,11 +527,11 @@ void Runtime1::generate_unwind_exception(StubAssembler *sasm) { __ mv(handler_addr, x10); // get throwing pc (= return address). - // lr has been destroyed by the call - __ ld(lr, Address(sp)); + // ra has been destroyed by the call + __ ld(ra, Address(sp)); __ ld(exception_oop, Address(sp, wordSize)); __ addi(sp, sp, 2 * wordSize); - __ mv(x13, lr); + __ mv(x13, ra); __ verify_not_null_oop(exception_oop); @@ -581,17 +580,14 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { #endif __ reset_last_Java_frame(true); - __ ifence(); - // check for pending exceptions - { - Label L; + { Label L; __ ld(t0, Address(xthread, Thread::pending_exception_offset())); __ beqz(t0, L); // exception pending => remove activation and forward to exception handler { Label L1; - __ bnez(x10, L1); // have we deoptimized? + __ bnez(x10, L1); // have we deoptimized? __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); __ bind(L1); } @@ -649,6 +645,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { // Will reexecute. Proper return address is already on the stack we just restore // registers, pop all of our frame but the return address and jump to the deopt blob + restore_live_registers(sasm); __ leave(); __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); @@ -794,8 +791,8 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { OopMap* map = save_live_registers(sasm); assert_cond(map != NULL); - const int bci_off = 2; - const int method_off = 3; + const int bci_off = 0; + const int method_off = 1; // Retrieve bci __ lw(bci, Address(fp, bci_off * BytesPerWord)); // And a pointer to the Method* @@ -859,8 +856,8 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { __ sll(arr_size, length, t0); int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1); int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width; - __ slli(tmp1, tmp1, registerSize - lh_header_size_msb); - __ srli(tmp1, tmp1, registerSize - lh_header_size_width); + __ slli(tmp1, tmp1, XLEN - lh_header_size_msb); + __ srli(tmp1, tmp1, XLEN - lh_header_size_width); __ add(arr_size, arr_size, tmp1); __ addi(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up __ andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask); diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp index f7fcbb13067..9316d4be02e 100644 --- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp @@ -1,7 +1,6 @@ /* * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -58,7 +57,7 @@ define_pd_global(uintx, CodeCacheMinBlockLength, 1); define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); define_pd_global(uintx, MetaspaceSize, 12*M ); define_pd_global(bool, NeverActAsServerClassMachine, true ); -define_pd_global(uint64_t,MaxRAM, 1ULL*G); +define_pd_global(uint64_t, MaxRAM, 1ULL*G); define_pd_global(bool, CICompileOSR, true ); #endif // !TIERED define_pd_global(bool, UseTypeProfile, false); @@ -67,6 +66,6 @@ define_pd_global(bool, RoundFPResults, true ); define_pd_global(bool, LIRFillDelaySlots, false); define_pd_global(bool, OptimizeSinglePrecision, true ); define_pd_global(bool, CSEArrayLength, false); -define_pd_global(bool, TwoOperandLIRForm, false ); +define_pd_global(bool, TwoOperandLIRForm, false); #endif // CPU_RISCV_C1_GLOBALS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp index 9955fe805a2..3da1f1c6d86 100644 --- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp @@ -1,7 +1,6 @@ /* * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -51,7 +50,7 @@ define_pd_global(intx, FLOATPRESSURE, 32); define_pd_global(intx, FreqInlineSize, 325); define_pd_global(intx, MinJumpTableSize, 10); define_pd_global(intx, INTPRESSURE, 24); -define_pd_global(intx, InteriorEntryAlignment, 4); +define_pd_global(intx, InteriorEntryAlignment, 16); define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); define_pd_global(intx, LoopUnrollLimit, 60); define_pd_global(intx, LoopPercentProfileLimit, 10); @@ -77,7 +76,7 @@ define_pd_global(intx, ReservedCodeCacheSize, 48*M); define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); define_pd_global(intx, ProfiledCodeHeapSize, 22*M); define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); -define_pd_global(uintx, CodeCacheMinBlockLength, 4); +define_pd_global(uintx, CodeCacheMinBlockLength, 6); define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); // Heap related flags @@ -86,6 +85,6 @@ define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); // Ergonomics related flags define_pd_global(bool, NeverActAsServerClassMachine, false); -define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed. +define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed. #endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp index 1144bac9e10..cdbd69807be 100644 --- a/src/hotspot/cpu/riscv/c2_init_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,7 +28,7 @@ #include "opto/compile.hpp" #include "opto/node.hpp" -// processor dependent initialization for riscv64 +// processor dependent initialization for riscv extern void reg_mask_init(); diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp index 0af09b57f31..a29e5be9dbb 100644 --- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp @@ -86,7 +86,7 @@ int CompiledStaticCall::reloc_to_interp_stub() { } void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { - address stub = find_stub(false); + address stub = find_stub(false /* is_aot */); guarantee(stub != NULL, "stub not found"); if (TraceICs) { @@ -118,6 +118,7 @@ void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, ad } void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { + assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); // Reset stub. address stub = static_stub->addr(); assert(stub != NULL, "stub not found"); @@ -134,12 +135,10 @@ void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_ void CompiledDirectStaticCall::verify() { // Verify call. _call->verify(); - if (os::is_MP()) { - _call->verify_alignment(); - } + _call->verify_alignment(); // Verify stub. - address stub = find_stub(false); + address stub = find_stub(false /* is_aot */); assert(stub != NULL, "no stub found for static call"); // Creation also verifies the object. NativeMovConstReg* method_holder diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp index f68298f3db1..05da242e354 100644 --- a/src/hotspot/cpu/riscv/copy_riscv.hpp +++ b/src/hotspot/cpu/riscv/copy_riscv.hpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,8 +30,7 @@ // Inline functions for memory copy and fill. // Contains inline asm implementations -#include OS_CPU_HEADER(copy) - +#include OS_CPU_HEADER_INLINE(copy) static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { julong* to = (julong*) tohw; diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp index 18eba342cb7..e9ff307b647 100644 --- a/src/hotspot/cpu/riscv/depChecker_riscv.hpp +++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,9 +24,9 @@ * */ -#ifndef CPU_RISCV64_VM_DEPCHECKER_RISCV64_HPP -#define CPU_RISCV64_VM_DEPCHECKER_RISCV64_HPP +#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP +#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP -// Nothing to do on riscv64 +// Nothing to do on riscv -#endif // CPU_RISCV64_VM_DEPCHECKER_RISCV64_HPP +#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp index d8c04b431ca..06bca5298cd 100644 --- a/src/hotspot/cpu/riscv/disassembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -15,7 +15,8 @@ * accompanied this code). * * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any @@ -26,32 +27,12 @@ #ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP #define CPU_RISCV_DISASSEMBLER_RISCV_HPP - static int pd_instruction_alignment() { - return 1; - } - - static const char* pd_cpu_opts() { - return NULL; - } - -// Returns address of n-th instruction preceding addr, -// NULL if no preceding instruction can be found. -// On (riscv64), we assume a constant instruction length. -// It might be beneficial to check "is_readable" as we do on ppc and s390. -static address find_prev_instr(address addr, int n_instr) { - return addr - Assembler::instruction_size * n_instr; +static int pd_instruction_alignment() { + return 1; } -// special-case instruction decoding. -// There may be cases where the binutils disassembler doesn't do -// the perfect job. In those cases, decode_instruction0 may kick in -// and do it right. -// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)" -static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) { - return here; +static const char* pd_cpu_opts() { + return ""; } -// platform-specific instruction annotations (like value of loaded constants) -static void annotate(address pc, outputStream* st) { }; - #endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp index d489638a15f..d4fcbdcbbde 100644 --- a/src/hotspot/cpu/riscv/frame_riscv.cpp +++ b/src/hotspot/cpu/riscv/frame_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,7 +29,6 @@ #include "interpreter/interpreter.hpp" #include "memory/resourceArea.hpp" #include "memory/universe.hpp" -#include "oops/markOop.hpp" #include "oops/method.hpp" #include "oops/oop.inline.hpp" #include "prims/methodHandles.hpp" @@ -56,19 +55,19 @@ void RegisterMap::check_location_valid() { // Profiling/safepoint support bool frame::safe_for_sender(JavaThread *thread) { - address addr_sp = (address)_sp; - address addr_fp = (address)_fp; + address sp = (address)_sp; + address fp = (address)_fp; address unextended_sp = (address)_unextended_sp; // consider stack guards when trying to determine "safe" stack pointers static size_t stack_guard_size = os::uses_stack_guard_pages() ? (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0; - assert_cond(thread != NULL); size_t usable_stack_size = thread->stack_size() - stack_guard_size; // sp must be within the usable part of the stack (not in guards) - bool sp_safe = (addr_sp < thread->stack_base()) && - (addr_sp >= thread->stack_base() - usable_stack_size); + bool sp_safe = (sp < thread->stack_base()) && + (sp >= thread->stack_base() - usable_stack_size); + if (!sp_safe) { return false; @@ -95,8 +94,7 @@ bool frame::safe_for_sender(JavaThread *thread) { // an fp must be within the stack and above (but not equal) sp // second evaluation on fp+ is added to handle situation where fp is -1 - bool fp_safe = (addr_fp < thread->stack_base() && (addr_fp > addr_sp) && - (((addr_fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); + bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); // We know sp/unextended_sp are safe only fp is questionable here @@ -104,7 +102,7 @@ bool frame::safe_for_sender(JavaThread *thread) { // to construct the sender and do some validation of it. This goes a long way // toward eliminating issues when we get in frame construction code - if (_cb != NULL ) { + if (_cb != NULL) { // First check if frame is complete and tester is reliable // Unfortunately we can only check frame complete for runtime stubs and nmethod @@ -139,14 +137,13 @@ bool frame::safe_for_sender(JavaThread *thread) { return false; } - sender_pc = (address) this->fp()[return_addr_offset]; + sender_pc = (address)this->fp()[return_addr_offset]; // for interpreted frames, the value below is the sender "raw" sp, // which can be different from the sender unextended sp (the sp seen // by the sender) because of current frame local variables sender_sp = (intptr_t*) addr_at(sender_sp_offset); sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; saved_fp = (intptr_t*) this->fp()[link_offset]; - } else { // must be some sort of compiled/runtime frame // fp does not have to be safe (although it could be check for c1?) @@ -161,10 +158,10 @@ bool frame::safe_for_sender(JavaThread *thread) { if ((address)sender_sp >= thread->stack_base()) { return false; } + sender_unextended_sp = sender_sp; - sender_pc = (address) *(sender_sp-1); - // Note: frame::sender_sp_offset is only valid for compiled frame - saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); + sender_pc = (address) *(sender_sp - 1); + saved_fp = (intptr_t*) *(sender_sp - 2); } @@ -182,16 +179,14 @@ bool frame::safe_for_sender(JavaThread *thread) { } // construct the potential sender - frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); return sender.is_interpreted_frame_valid(thread); - } // We must always be able to find a recognizable pc CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); - if (sender_pc == NULL || sender_blob == NULL) { + if (sender_pc == NULL || sender_blob == NULL) { return false; } @@ -219,7 +214,6 @@ bool frame::safe_for_sender(JavaThread *thread) { } // construct the potential sender - frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); // Validate the JavaCallWrapper an entry frame must have @@ -240,7 +234,6 @@ bool frame::safe_for_sender(JavaThread *thread) { // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size // because the return address counts against the callee's frame. - if (sender_blob->frame_size() <= 0) { assert(!sender_blob->is_compiled(), "should count return address at least"); return false; @@ -250,7 +243,6 @@ bool frame::safe_for_sender(JavaThread *thread) { // code cache (current frame) is called by an entity within the code cache that entity // should not be anything but the call stub (already covered), the interpreter (already covered) // or an nmethod. - if (!sender_blob->is_compiled()) { return false; } @@ -266,20 +258,17 @@ bool frame::safe_for_sender(JavaThread *thread) { // Must be native-compiled frame. Since sender will try and use fp to find // linkages it must be safe - if (!fp_safe) { return false; } // Will the pc we fetch be non-zero (which we'll find at the oldest frame) - - if ((address) this->fp()[c_frame_return_addr_offset] == NULL) { return false; } + if ((address)this->fp()[return_addr_offset] == NULL) { return false; } return true; } void frame::patch_pc(Thread* thread, address pc) { - assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); address* pc_addr = &(((address*) sp())[-1]); if (TracePcPatching) { tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", @@ -289,6 +278,7 @@ void frame::patch_pc(Thread* thread, address pc) { // patch in the same address that's already there. assert(_pc == *pc_addr || pc == *pc_addr, "must be"); *pc_addr = pc; + _cb = CodeCache::find_blob(pc); address original_pc = CompiledMethod::get_deopt_original_pc(this); if (original_pc != NULL) { assert(original_pc == _pc, "expected original PC to be stored before patching"); @@ -395,7 +385,7 @@ void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp //------------------------------------------------------------------------------ // frame::adjust_unextended_sp void frame::adjust_unextended_sp() { - // On riscv64, sites calling method handle intrinsics and lambda forms are treated + // On riscv, sites calling method handle intrinsics and lambda forms are treated // as any other call site. Therefore, no special action is needed when we are // returning to any of these call sites. @@ -464,9 +454,9 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const { intptr_t* unextended_sp = l_sender_sp; // the return_address is always the word on the stack - address sender_pc = (address) *(l_sender_sp-1); + address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset); - intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp - frame::sender_sp_offset); + intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset); assert(map != NULL, "map must be set"); if (map->update_map()) { @@ -489,8 +479,8 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const { } //------------------------------------------------------------------------------ -// frame::sender_raw -frame frame::sender_raw(RegisterMap* map) const { +// frame::sender +frame frame::sender(RegisterMap* map) const { // Default is we done have to follow them. The sender_for_xxx will // update it accordingly assert(map != NULL, "map must be set"); @@ -515,10 +505,6 @@ frame frame::sender_raw(RegisterMap* map) const { return frame(sender_sp(), link(), sender_pc()); } -frame frame::sender(RegisterMap* map) const { - return sender_raw(map); -} - bool frame::is_interpreted_frame_valid(JavaThread* thread) const { assert(is_interpreted_frame(), "Not an interpreted frame"); // These are reasonable sanity checks @@ -540,13 +526,12 @@ bool frame::is_interpreted_frame_valid(JavaThread* thread) const { // do some validation of frame elements // first the method - Method* m = *interpreter_frame_method_addr(); - // validate the method we'd find in this potential sender if (!Method::is_valid_method(m)) { return false; } + // stack frames shouldn't be much larger than max_stack elements // this test requires the use of unextended_sp which is the sp as seen by // the current frame, and not sp which is the "raw" pc which could point @@ -557,7 +542,7 @@ bool frame::is_interpreted_frame_valid(JavaThread* thread) const { } // validate bci/bcx - address bcp = interpreter_frame_bcp(); + address bcp = interpreter_frame_bcp(); if (m->validate_bci_from_bcp(bcp) < 0) { return false; } @@ -567,12 +552,22 @@ bool frame::is_interpreted_frame_valid(JavaThread* thread) const { if (MetaspaceObj::is_valid(cp) == false) { return false; } + // validate locals - address locals = (address) *interpreter_frame_locals_addr(); + address locals = (address) *interpreter_frame_locals_addr(); + if (locals > thread->stack_base()) { + return false; + } - if (locals > thread->stack_base() || locals < (address) fp()) { + if (m->max_locals() > 0 && locals < (address) fp()) { + // fp in interpreter frame on RISC-V is higher than that on AArch64, + // pointing to sender_sp and sender_sp-2 relatively. + // On RISC-V, if max_locals is 0, the 'locals' pointer may be below fp, + // pointing to sender_sp-1 (with one padding slot). + // So we verify the 'locals' pointer only if max_locals > 0. return false; } + // We'd have to be pretty unlucky to be mislead at this point return true; } @@ -652,7 +647,7 @@ void frame::describe_pd(FrameValues& values, int frame_no) { #endif intptr_t *frame::initial_deoptimization_info() { - // Not used on riscv64, but we must return something. + // Not used on riscv, but we must return something. return NULL; } diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp index 7829a8b9f02..18e021dcb94 100644 --- a/src/hotspot/cpu/riscv/frame_riscv.hpp +++ b/src/hotspot/cpu/riscv/frame_riscv.hpp @@ -1,7 +1,6 @@ /* * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -56,10 +55,10 @@ // [last esp ] = last_sp() last_sp_offset // [old stack pointer ] (sender_sp) sender_sp_offset -// [old frame pointer ] <- fp = link() +// [old frame pointer ] // [return pc ] -// [last sp ] +// [last sp ] <- fp = link() // [oop temp ] (only for native calls) // [padding ] (to preserve machine SP alignment) @@ -107,18 +106,14 @@ public: enum { pc_return_offset = 0, - // C frames - c_frame_link_offset = -2, - c_frame_return_addr_offset = -1, - c_frame_sender_sp_offset = 0, - // Java frames - link_offset = 0, - return_addr_offset = 1, - sender_sp_offset = 2, + // All frames + link_offset = -2, + return_addr_offset = -1, + sender_sp_offset = 0, // Interpreter frames - interpreter_frame_oop_temp_offset = 3, // for native calls only + interpreter_frame_oop_temp_offset = 1, // for native calls only - interpreter_frame_sender_sp_offset = -1, + interpreter_frame_sender_sp_offset = -3, // outgoing sp before a call to an invoked method interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1, @@ -136,8 +131,8 @@ // Entry frames // n.b. these values are determined by the layout defined in // stubGenerator for the Java call stub - entry_frame_after_call_words = 32, - entry_frame_call_wrapper_offset = -8, + entry_frame_after_call_words = 34, + entry_frame_call_wrapper_offset = -10, // we don't need a save area arg_reg_save_area_bytes = 0 @@ -190,12 +185,6 @@ inline address* sender_pc_addr() const; - // C frame methods - inline intptr_t* c_frame_link() const; - inline address* c_frame_sender_pc_addr() const; - inline address c_frame_sender_pc() const; - inline intptr_t* c_frame_sender_sp() const; - // expression stack tos if we are nested in a java call intptr_t* interpreter_frame_last_sp() const; @@ -207,7 +196,4 @@ static jint interpreter_frame_expression_stack_direction() { return -1; } - // returns the sending frame, without applying any barriers - frame sender_raw(RegisterMap* map) const; - #endif // CPU_RISCV_FRAME_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp index c9449c3254d..abd5bda7e49 100644 --- a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp +++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,7 +30,7 @@ #include "code/codeCache.hpp" #include "code/vmreg.inline.hpp" -// Inline functions for RISCV64 frames: +// Inline functions for RISCV frames: // Constructors: @@ -143,6 +143,11 @@ inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); } +inline intptr_t* frame::link_or_null() const { + intptr_t** ptr = (intptr_t **)addr_at(link_offset); + return os::is_readable_pointer(ptr) ? *ptr : NULL; +} + inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } // Return address @@ -150,12 +155,6 @@ inline address* frame::sender_pc_addr() const { return (address*) addr_at(re inline address frame::sender_pc() const { return *sender_pc_addr(); } inline intptr_t* frame::sender_sp() const { return addr_at(sender_sp_offset); } -// C frame methods -inline intptr_t* frame::c_frame_link() const { return (intptr_t*) *(intptr_t **)addr_at(c_frame_link_offset); } -inline address* frame::c_frame_sender_pc_addr() const { return (address*) addr_at(c_frame_return_addr_offset); } -inline address frame::c_frame_sender_pc() const { return *c_frame_sender_pc_addr(); } -inline intptr_t* frame::c_frame_sender_sp() const { return addr_at(c_frame_sender_sp_offset); } - inline intptr_t** frame::interpreter_frame_locals_addr() const { return (intptr_t**)addr_at(interpreter_frame_locals_offset); } @@ -233,21 +232,14 @@ inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { // Compiled frames inline oop frame::saved_oop_result(RegisterMap* map) const { oop* result_adr = (oop *)map->location(x10->as_VMReg()); - if(result_adr != NULL) { - return (*result_adr); - } else { - ShouldNotReachHere(); - return NULL; - } + guarantee(result_adr != NULL, "bad register save location"); + return (*result_adr); } inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { oop* result_adr = (oop *)map->location(x10->as_VMReg()); - if(result_adr != NULL) { - *result_adr = obj; - } else { - ShouldNotReachHere(); - } + guarantee(result_adr != NULL, "bad register save location"); + *result_adr = obj; } #endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp index 2a95e72c937..e191cbcee2a 100644 --- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp @@ -157,21 +157,15 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, __ j(done); __ bind(runtime); - // save the live input values - RegSet saved = RegSet::of(pre_val); - if (tosca_live) { saved += RegSet::of(x10); } - if (obj != noreg) { saved += RegSet::of(obj); } - - __ push_reg(saved, sp); + __ push_call_clobbered_registers(); if (expand_call) { assert(pre_val != c_rarg1, "smashed arg"); __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); } else { __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); } - - __ pop_reg(saved, sp); + __ pop_call_clobbered_registers(); __ bind(done); @@ -196,6 +190,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, BarrierSet* bs = BarrierSet::barrier_set(); CardTableBarrierSet* ctbs = barrier_set_cast(bs); CardTable* ct = ctbs->card_table(); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); Label done; Label runtime; @@ -213,6 +208,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, // storing region crossing non-NULL, is card already dirty? ExternalAddress cardtable((address) ct->byte_map_base()); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); const Register card_addr = tmp; __ srli(card_addr, store_addr, CardTable::card_shift); @@ -265,7 +261,7 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator bool on_reference = on_weak || on_phantom; ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); if (on_oop && on_reference) { - // LR is live. It must be saved around calls. + // RA is live. It must be saved around calls. __ enter(); // barrier may call runtime // Generate the G1 pre-barrier code to log the value of // the referent field in an SATB buffer. @@ -338,8 +334,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier Register pre_val_reg = stub->pre_val()->as_register(); if (stub->do_load()) { - ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), - false /* wide */, false /* unaligned */); + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); } __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); ce->store_parameter(stub->pre_val()->as_register(), 0); @@ -420,6 +415,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* BarrierSet* bs = BarrierSet::barrier_set(); CardTableBarrierSet* ctbs = barrier_set_cast(bs); CardTable* ct = ctbs->card_table(); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); Label done; Label runtime; @@ -432,8 +428,8 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); const Register card_offset = t1; - // LR is free here, so we can use it to hold the byte_map_base. - const Register byte_map_base = lr; + // RA is free here, so we can use it to hold the byte_map_base. + const Register byte_map_base = ra; assert_different_registers(card_offset, byte_map_base, t0); @@ -464,8 +460,8 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* __ sub(t0, t0, wordSize); __ sd(t0, queue_index); - // Reuse LR to hold buffer_addr - const Register buffer_addr = lr; + // Reuse RA to hold buffer_addr + const Register buffer_addr = ra; __ ld(buffer_addr, buffer); __ add(t0, buffer_addr, t0); diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp index 33a3856f882..37bc183f39c 100644 --- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp @@ -23,8 +23,8 @@ * */ -#ifndef CPU_RISCV64_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP -#define CPU_RISCV64_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP +#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP #include "asm/macroAssembler.hpp" #include "gc/shared/modRefBarrierSetAssembler.hpp" @@ -75,4 +75,4 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { Register dst, Address src, Register tmp1, Register tmp_thread); }; -#endif // CPU_RISCV64_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP +#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp index 3568a54fac6..8735fd014ff 100644 --- a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp +++ b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp @@ -20,6 +20,7 @@ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. + * */ #ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp index f6721636d84..2b556b95d71 100644 --- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,7 +27,6 @@ #include "classfile/classLoaderData.hpp" #include "gc/shared/barrierSet.hpp" #include "gc/shared/barrierSetAssembler.hpp" -#include "gc/shared/barrierSetNMethod.hpp" #include "gc/shared/collectedHeap.hpp" #include "interpreter/interp_masm.hpp" #include "memory/universe.hpp" @@ -42,7 +41,7 @@ void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, Register dst, Address src, Register tmp1, Register tmp_thread) { assert_cond(masm != NULL); - // LR is live. It must be saved around calls. + // RA is live. It must be saved around calls. bool in_heap = (decorators & IN_HEAP) != 0; bool in_native = (decorators & IN_NATIVE) != 0; @@ -176,16 +175,24 @@ void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, } else { Register end = tmp1; Label retry; - int32_t offset = 0; __ bind(retry); - Register tmp = t0; + // Get the current end of the heap + ExternalAddress address_end((address) Universe::heap()->end_addr()); + { + int32_t offset; + __ la_patchable(t1, address_end, offset); + __ ld(t1, Address(t1, offset)); + } // Get the current top of the heap ExternalAddress address_top((address) Universe::heap()->top_addr()); - __ la_patchable(tmp, address_top, offset); - __ addi(tmp, tmp, offset); - __ lr_d(obj, tmp, Assembler::aqrl); + { + int32_t offset; + __ la_patchable(t0, address_top, offset); + __ addi(t0, t0, offset); + __ lr_d(obj, t0, Assembler::aqrl); + } // Adjust it my the size of our new object if (var_size_in_bytes == noreg) { @@ -197,18 +204,12 @@ void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, // if end < obj then we wrapped around high memory __ bltu(end, obj, slow_case, is_far); - Register heap_end = t1; - // Get the current end of the heap - ExternalAddress address_end((address) Universe::heap()->end_addr()); - offset = 0; - __ la_patchable(heap_end, address_end, offset); - __ ld(heap_end, Address(heap_end, offset)); - - __ bgtu(end, heap_end, slow_case, is_far); + __ bgtu(end, t1, slow_case, is_far); // If heap_top hasn't been changed by some other thread, update it. - __ sc_d(t1, end, tmp, Assembler::rl); + __ sc_d(t1, end, t0, Assembler::rl); __ bnez(t1, retry); + incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1); } } diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp index d0ab14d2aff..984d94f4c3d 100644 --- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp @@ -23,12 +23,11 @@ * */ -#ifndef CPU_RISCV64_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP -#define CPU_RISCV64_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP +#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP #include "asm/macroAssembler.hpp" #include "gc/shared/barrierSet.hpp" -#include "gc/shared/barrierSetNMethod.hpp" #include "memory/allocation.hpp" #include "oops/access.hpp" @@ -74,4 +73,4 @@ class BarrierSetAssembler: public CHeapObj { virtual ~BarrierSetAssembler() {} }; -#endif // CPU_RISCV64_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP +#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp index ef51661e0db..81d47d61d4c 100644 --- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,6 +41,10 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob BarrierSet* bs = BarrierSet::barrier_set(); assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + __ srli(obj, obj, CardTable::card_shift); assert(CardTable::dirty_card_val() == 0, "must be"); @@ -56,6 +60,9 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob __ sb(zr, Address(tmp)); __ bind(L_already_dirty); } else { + if (ct->scanned_concurrently()) { + __ membar(MacroAssembler::StoreStore); + } __ sb(zr, Address(tmp)); } } @@ -66,12 +73,16 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl assert_different_registers(start, tmp); assert_different_registers(count, tmp); + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + Label L_loop, L_done; const Register end = count; __ beqz(count, L_done); // zero count - nothing to do - __ slli(count, count, LogBytesPerHeapOop); - __ add(end, start, count); // end = start + count << LogBytesPerHeapOop + // end = start + count << LogBytesPerHeapOop + __ shadd(end, count, start, count, LogBytesPerHeapOop); __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive __ srli(start, start, CardTable::card_shift); @@ -80,6 +91,9 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl __ load_byte_map_base(tmp); __ add(start, start, tmp); + if (ct->scanned_concurrently()) { + __ membar(MacroAssembler::StoreStore); + } __ bind(L_loop); __ add(tmp, start, count); diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp index a9e8cf09fcb..686fe8fa478 100644 --- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp @@ -23,8 +23,8 @@ * */ -#ifndef CPU_RISCV64_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP -#define CPU_RISCV64_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP +#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP #include "asm/macroAssembler.hpp" #include "gc/shared/modRefBarrierSetAssembler.hpp" @@ -37,7 +37,6 @@ class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { Register start, Register count, Register tmp, RegSet saved_regs); virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Address dst, Register val, Register tmp1, Register tmp2); - }; -#endif // #ifndef CPU_RISCV64_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP +#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp index 52c1e011088..00419c3163c 100644 --- a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp @@ -23,8 +23,8 @@ * */ -#ifndef CPU_RISCV64_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP -#define CPU_RISCV64_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP +#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP #include "asm/macroAssembler.hpp" #include "gc/shared/barrierSetAssembler.hpp" @@ -52,4 +52,4 @@ class ModRefBarrierSetAssembler: public BarrierSetAssembler { Address dst, Register val, Register tmp1, Register tmp2); }; -#endif // CPU_RISCV64_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP +#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp index b2edba5f558..d19f5b859ce 100644 --- a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp @@ -26,6 +26,7 @@ #include "precompiled.hpp" #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" +#include "gc/shared/gc_globals.hpp" #include "gc/shenandoah/shenandoahBarrierSet.hpp" #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" @@ -50,15 +51,7 @@ void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) { } ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq, - /* release */ Assembler::rl, /* is_cae */ false, result); - if (UseBarriersForVolatile) { - // The membar here is necessary to prevent reordering between the - // release store in the CAS above and a subsequent volatile load. - // However for !UseBarriersForVolatile, C1 inserts a full barrier before - // volatile loads which means we don't need an additional barrier - // here (see LIRGenerator::volatile_field_load()). - __ membar(MacroAssembler::AnyAny); - } + /* release */ Assembler::rl, /* is_cae */ false, result); } #undef __ diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp index e75e2d1bfdf..b8534c52e77 100644 --- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp @@ -50,8 +50,8 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec Register src, Register dst, Register count, RegSet saved_regs) { if (is_oop) { bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; - if ((ShenandoahSATBBarrier && !dest_uninitialized) || - ShenandoahIUBarrier || ShenandoahLoadRefBarrier) { + if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) { + Label done; // Avoid calling runtime if count == 0 @@ -118,10 +118,10 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); // Is marking active? - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + if (in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 4) { __ lwu(tmp, in_progress); } else { - assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + assert(in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); __ lbu(tmp, in_progress); } __ beqz(tmp, done); @@ -201,7 +201,7 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb // - Test lowest two bits == 0 // - If so, set the lowest two bits // - Invert the result back, and copy to dst - RegSet savedRegs = RegSet::of(t2); + RegSet saved_regs = RegSet::of(t2); bool borrow_reg = (tmp == noreg); if (borrow_reg) { // No free registers available. Make one useful. @@ -209,11 +209,11 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb if (tmp == dst) { tmp = t1; } - savedRegs += RegSet::of(tmp); + saved_regs += RegSet::of(tmp); } assert_different_registers(tmp, dst, t2); - __ push_reg(savedRegs, sp); + __ push_reg(saved_regs, sp); Label done; __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); @@ -224,11 +224,12 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1 __ bind(done); - __ pop_reg(savedRegs, sp); + __ pop_reg(saved_regs, sp); } void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, - Register dst, Address load_addr) { + Register dst, + Address load_addr) { assert(ShenandoahLoadRefBarrier, "Should be enabled"); assert(dst != t1 && load_addr.base() != t1, "need t1"); assert_different_registers(load_addr.base(), t0, t1); @@ -250,15 +251,15 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl } // Save x10 and x11, unless it is an output register - RegSet to_save = RegSet::of(x10, x11) - result_dst; - __ push_reg(to_save, sp); + RegSet saved_regs = RegSet::of(x10, x11) - result_dst; + __ push_reg(saved_regs, sp); __ la(x11, load_addr); __ mv(x10, dst); __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); __ mv(result_dst, x10); - __ pop_reg(to_save, sp); + __ pop_reg(saved_regs, sp); __ bind(done); __ leave(); @@ -267,7 +268,9 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) { if (ShenandoahIUBarrier) { __ push_call_clobbered_registers(); + satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false); + __ pop_call_clobbered_registers(); } } @@ -311,16 +314,14 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, // 2: load a reference from src location and apply LRB if needed if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { - guarantee(dst != x30 && src.base() != x30, "load_at need x30"); - bool ist5 = (dst == src.base()); - if (ist5) { - __ push_reg(RegSet::of(x30), sp); - } Register result_dst = dst; // Preserve src location for LRB + RegSet saved_regs; if (dst == src.base()) { - dst = x30; + dst = (src.base() == x28) ? x29 : x28; + saved_regs = RegSet::of(dst); + __ push_reg(saved_regs, sp); } assert_different_registers(dst, src.base()); @@ -333,8 +334,8 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, dst = result_dst; } - if (ist5) { - __ pop_reg(RegSet::of(x30), sp); + if (saved_regs.bits() != 0) { + __ pop_reg(saved_regs, sp); } } else { BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); @@ -432,39 +433,10 @@ void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler // from-space, or it refers to the to-space version of an object that // is being evacuated out of from-space. // -// By default, this operation implements sequential consistency and the -// value held in the result register following execution of the -// generated code sequence is 0 to indicate failure of CAS, non-zero -// to indicate success. Arguments support variations on this theme: -// -// acquire: Allow relaxation of the memory ordering on CAS from -// sequential consistency. This can be useful when -// sequential consistency is not required, such as when -// another sequentially consistent operation is already -// present in the execution stream. If acquire, successful -// execution has the side effect of assuring that memory -// values updated by other threads and "released" will be -// visible to any read operations perfomed by this thread -// which follow this operation in program order. This is a -// special optimization that should not be enabled by default. -// release: Allow relaxation of the memory ordering on CAS from -// sequential consistency. This can be useful when -// sequential consistency is not required, such as when -// another sequentially consistent operation is already -// present in the execution stream. If release, successful -// completion of this operation has the side effect of -// assuring that all writes to memory performed by this -// thread that precede this operation in program order are -// visible to all other threads that subsequently "acquire" -// before reading the respective memory values. This is a -// special optimization that should not be enabled by default. -// is_cae: This turns CAS (compare and swap) into CAE (compare and -// exchange). This HotSpot convention is that CAE makes -// available to the caller the "failure witness", which is -// the value that was stored in memory which did not match -// the expected value. If is_cae, the result is the value -// most recently fetched from addr rather than a boolean -// success indicator. +// By default the value held in the result register following execution +// of the generated code sequence is 0 to indicate failure of CAS, +// non-zero to indicate success. If is_cae, the result is the value most +// recently fetched from addr rather than a boolean success indicator. // // Clobbers t0, t1 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, @@ -547,8 +519,7 @@ void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, Shen Register pre_val_reg = stub->pre_val()->as_register(); if (stub->do_load()) { - ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), - stub->info(), false /* wide */, false /* unaligned */); + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); } __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); ce->store_parameter(stub->pre_val()->as_register(), 0); @@ -660,12 +631,13 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s __ push_call_clobbered_registers(); __ load_parameter(0, x10); __ load_parameter(1, x11); + if (UseCompressedOops) { - __ mv(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); + __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); } else { - __ mv(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); + __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); } - __ jalr(lr); + __ jalr(ra); __ mv(t0, x10); __ pop_call_clobbered_registers(); __ mv(x10, t0); @@ -714,11 +686,11 @@ address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator __ push_call_clobbered_registers(); if (UseCompressedOops) { - __ mv(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); + __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); } else { - __ mv(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); + __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); } - __ jalr(lr); + __ jalr(ra); __ mv(t0, x10); __ pop_call_clobbered_registers(); __ mv(x10, t0); diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp index 9bd95227fd1..5d75035e9d4 100644 --- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp @@ -28,6 +28,7 @@ #include "asm/macroAssembler.hpp" #include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shenandoah/shenandoahBarrierSet.hpp" #ifdef COMPILER1 class LIR_Assembler; class ShenandoahPreBarrierStub; @@ -37,31 +38,6 @@ class StubAssembler; class StubCodeGenerator; class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { -public: - static address shenandoah_lrb(); - - void iu_barrier(MacroAssembler *masm, Register dst, Register tmp); - -#ifdef COMPILER1 - void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); - void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); - void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); - void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm); -#endif - - virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, - Register src, Register dst, Register count, RegSet saved_regs); - virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, - Register dst, Address src, Register tmp1, Register tmp_thread); - virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, - Address dst, Register val, Register tmp1, Register tmp2); - virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, - Register obj, Register tmp, Label& slowpath); - virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, - Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); - - virtual void barrier_stubs_init(); - private: static address _shenandoah_lrb; @@ -87,6 +63,35 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr); address generate_shenandoah_lrb(StubCodeGenerator* cgen); + +public: + + static address shenandoah_lrb(); + + void iu_barrier(MacroAssembler* masm, Register dst, Register tmp); + +#ifdef COMPILER1 + void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); + void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); + void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); + void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm); +#endif + + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs); + + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath); + + virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, + Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); + + virtual void barrier_stubs_init(); }; #endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad index 36855c3f9b8..bab407a8b76 100644 --- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad @@ -71,7 +71,7 @@ instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, i %} instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); ins_cost(10 * DEFAULT_COST); @@ -93,7 +93,7 @@ instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval %} instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); ins_cost(10 * DEFAULT_COST); @@ -118,9 +118,11 @@ instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldva match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); ins_cost(10 * DEFAULT_COST); effect(TEMP_DEF res, TEMP tmp, KILL cr); + format %{ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah" %} + ins_encode %{ Register tmp = $tmp$$Register; __ mv(tmp, $oldval$$Register); // Must not clobber oldval. @@ -128,6 +130,7 @@ instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldva Assembler::relaxed /* acquire */, Assembler::rl /* release */, true /* is_cae */, $res$$Register); %} + ins_pipe(pipe_slow); %} @@ -139,6 +142,7 @@ instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldva format %{ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah" %} + ins_encode %{ Register tmp = $tmp$$Register; __ mv(tmp, $oldval$$Register); // Must not clobber oldval. @@ -146,6 +150,7 @@ instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldva Assembler::relaxed /* acquire */, Assembler::rl /* release */, true /* is_cae */, $res$$Register); %} + ins_pipe(pipe_slow); %} @@ -158,6 +163,7 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah" "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" %} + ins_encode %{ Register tmp = $tmp$$Register; __ mv(tmp, $oldval$$Register); // Must not clobber oldval. @@ -166,6 +172,7 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva Assembler::relaxed /* acquire */, Assembler::rl /* release */, false /* is_cae */, $res$$Register); %} + ins_pipe(pipe_slow); %} @@ -177,6 +184,7 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva format %{ "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah" %} + ins_encode %{ Register tmp = $tmp$$Register; __ mv(tmp, $oldval$$Register); // Must not clobber oldval. @@ -184,5 +192,6 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva Assembler::relaxed /* acquire */, Assembler::rl /* release */, false /* is_cae */, $res$$Register); %} + ins_pipe(pipe_slow); %} diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp index 20e5a20ee42..d7f261af3ff 100644 --- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,12 +33,8 @@ const int StackAlignmentInBytes = 16; // 32-bit integer argument values are extended to 64 bits. const bool CCallingConventionRequiresIntsAsLongs = false; -// RISC-V has adopted a multicopy atomic model closely following -// that of ARMv8. -#define CPU_MULTI_COPY_ATOMIC - -// We treat concurrent modification and execution of instructions -// conservatively on RISC-V, just like it was done in aarch64 port. +// To be safe, we deoptimize when we come across an access that needs +// patching. This is similar to what is done on aarch64. #define DEOPTIMIZE_WHEN_PATCHING #define SUPPORTS_NATIVE_CX8 @@ -47,6 +43,10 @@ const bool CCallingConventionRequiresIntsAsLongs = false; #define THREAD_LOCAL_POLL -#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS true +#if INCLUDE_JVMCI +#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS (EnableJVMCI || UseAOT) +#else +#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false +#endif #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp index f0743a556b0..2ddb9e62feb 100644 --- a/src/hotspot/cpu/riscv/globals_riscv.hpp +++ b/src/hotspot/cpu/riscv/globals_riscv.hpp @@ -1,7 +1,6 @@ /* * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015, 2019, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -40,7 +39,7 @@ define_pd_global(bool, TrapBasedNullChecks, false); define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. -define_pd_global(intx, CodeEntryAlignment, 16); +define_pd_global(intx, CodeEntryAlignment, 64); define_pd_global(intx, OptoLoopAlignment, 16); define_pd_global(intx, InlineFrequencyCount, 100); @@ -83,34 +82,31 @@ define_pd_global(bool, ThreadLocalHandshakes, true); define_pd_global(intx, InlineSmallCode, 1000); -#define ARCH_FLAGS(develop, \ - product, \ - diagnostic, \ - experimental, \ - notproduct, \ - range, \ - constraint, \ - writeable) \ - \ - product(bool, NearCpool, true, \ - "constant pool is close to instructions") \ - product(bool, UseBarriersForVolatile, false, \ - "Use memory barriers to implement volatile accesses") \ - product(bool, UseCRC32, false, \ - "Use CRC32 instructions for CRC32 computation") \ - product(bool, UseBlockZeroing, true, \ - "Use DC ZVA for block zeroing") \ - product(intx, BlockZeroingLowLimit, 256, \ - "Minimum size in bytes when block zeroing will be used") \ - range(1, max_jint) \ - product(bool, TraceTraps, false, "Trace all traps the signal handler")\ - product(bool, UseConservativeFence, true, \ - "Extend i for r and o for w in the pred/succ flags of fence;" \ - "Extend fence.i to fence.i + fence.") \ - product(bool, AvoidUnalignedAccesses, true, \ - "Avoid generating unaligned memory accesses") \ - product(bool, UseRVV, false, "Use RVV instructions") \ - product(bool, UseRVV071, false, "Use RVV 0.7.1 instructions") \ - product(bool, UseCSky, false, "Use CSky specific instructions") \ +#define ARCH_FLAGS(develop, \ + product, \ + diagnostic, \ + experimental, \ + notproduct, \ + range, \ + constraint, \ + writeable) \ + \ + product(bool, NearCpool, true, \ + "constant pool is close to instructions") \ + product(intx, BlockZeroingLowLimit, 256, \ + "Minimum size in bytes when block zeroing will be used") \ + range(1, max_jint) \ + product(bool, TraceTraps, false, "Trace all traps the signal handler") \ + /* For now we're going to be safe and add the I/O bits to userspace fences. */ \ + product(bool, UseConservativeFence, true, \ + "Extend i for r and o for w in the pred/succ flags of fence;" \ + "Extend fence.i to fence.i + fence.") \ + product(bool, AvoidUnalignedAccesses, true, \ + "Avoid generating unaligned memory accesses") \ + experimental(bool, UseRVV, false, "Use RVV instructions") \ + experimental(bool, UseRVV071, false, "Use RVV 0.7.1 instructions") \ + experimental(bool, UseRVB, false, "Use RVB instructions") \ + experimental(bool, UseRVC, false, "Use RVC instructions") \ + product(bool, UseCSky, false, "Use CSky specific instructions") \ #endif // CPU_RISCV_GLOBALS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp index 908f610cd75..cc93103dc55 100644 --- a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp +++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,7 +36,7 @@ int InlineCacheBuffer::ic_stub_code_size() { // 6: auipc + ld + auipc + jalr + address(2 * instruction_size) - // 5: auipc + ld + j + address(2 * instruction_size ) + // 5: auipc + ld + j + address(2 * instruction_size) return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size; } @@ -47,7 +47,7 @@ void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached ResourceMark rm; CodeBuffer code(code_begin, ic_stub_code_size()); MacroAssembler* masm = new MacroAssembler(&code); - // note: even though the code contains an embedded value, we do not need reloc info + // Note: even though the code contains an embedded value, we do not need reloc info // because // (1) the value is old (i.e., doesn't matter for scavenges) // (2) these ICStubs are removed *before* a GC happens, so the roots disappear diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp index 7faa0d9d32b..922a80f9f3e 100644 --- a/src/hotspot/cpu/riscv/icache_riscv.cpp +++ b/src/hotspot/cpu/riscv/icache_riscv.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,9 +35,7 @@ static int icache_flush(address addr, int lines, int magic) { } void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) { - address start = (address)icache_flush; - *flush_icache_stub = (ICache::flush_icache_stub_t)start; // ICache::invalidate_range() contains explicit condition that the first diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp index ebdc4e56a4c..5bf40ca8204 100644 --- a/src/hotspot/cpu/riscv/icache_riscv.hpp +++ b/src/hotspot/cpu/riscv/icache_riscv.hpp @@ -26,14 +26,14 @@ #ifndef CPU_RISCV_ICACHE_RISCV_HPP #define CPU_RISCV_ICACHE_RISCV_HPP -// Interface for updating the instruction cache. Whenever the VM +// Interface for updating the instruction cache. Whenever the VM // modifies code, part of the processor instruction cache potentially // has to be flushed. class ICache : public AbstractICache { public: enum { - stub_size = 16, // Size of the icache flush stub in bytes + stub_size = 16, // Size of the icache flush stub in bytes line_size = BytesPerWord, // conservative log2_line_size = LogBytesPerWord // log2(line_size) }; diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp index d227397cafb..b50be7e726c 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,7 +33,6 @@ #include "interpreter/interpreterRuntime.hpp" #include "logging/log.hpp" #include "oops/arrayOop.hpp" -#include "oops/markOop.hpp" #include "oops/method.hpp" #include "oops/methodData.hpp" #include "prims/jvmtiExport.hpp" @@ -67,17 +66,17 @@ void InterpreterMacroAssembler::narrow(Register result) { bind(notBool); mv(t1, T_BYTE); bne(t0, t1, notByte); - sign_ext(result, result, registerSize - 8); + sign_extend(result, result, 8); j(done); bind(notByte); mv(t1, T_CHAR); bne(t0, t1, notChar); - zero_ext(result, result, registerSize - 16); // turncate upper 48 bits + zero_extend(result, result, 16); j(done); bind(notChar); - sign_ext(result, result, registerSize - 16); // sign-extend short + sign_extend(result, result, 16); // Nothing to do for T_INT bind(done); @@ -178,7 +177,7 @@ void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) { assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); lhu(reg, Address(xbcp, bcp_offset)); - grev16(reg, reg); + revb_h(reg, reg); } void InterpreterMacroAssembler::get_dispatch() { @@ -223,13 +222,12 @@ void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, assert_different_registers(cache, xcpool); get_cache_index_at_bcp(index, bcp_offset, index_size); assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); - // convert from field index to ConstantPoolCacheEntry - // riscv64 already has the cache in xcpool so there is no need to - // install it in cache. instead we pre-add the indexed offset to + // Convert from field index to ConstantPoolCacheEntry + // riscv already has the cache in xcpool so there is no need to + // install it in cache. Instead we pre-add the indexed offset to // xcpool and return it in cache. All clients of this method need to // be modified accordingly. - slli(cache, index, 5); - add(cache, xcpool, cache); + shadd(cache, index, xcpool, cache, 5); } @@ -250,8 +248,8 @@ void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register lwu(bytecode, bytecode); membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); const int shift_count = (1 + byte_no) * BitsPerByte; - slli(bytecode, bytecode, registerSize - (shift_count + BitsPerByte)); - srli(bytecode, bytecode, registerSize - BitsPerByte); + slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte)); + srli(bytecode, bytecode, XLEN - BitsPerByte); } void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, @@ -261,14 +259,15 @@ void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, assert(cache != tmp, "must use different register"); get_cache_index_at_bcp(tmp, bcp_offset, index_size); assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); - // convert from field index to ConstantPoolCacheEntry index + // Convert from field index to ConstantPoolCacheEntry index // and from word offset to byte offset - assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); + assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, + "else change next line"); ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); // skip past the header add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); - slli(tmp, tmp, 2 + LogBytesPerWord); - add(cache, cache, tmp); // construct pointer to cache entry + // construct pointer to cache entry + shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord); } // Load object from cpool->resolved_references(index) @@ -277,25 +276,22 @@ void InterpreterMacroAssembler::load_resolved_reference_at_index( assert_different_registers(result, index); get_constant_pool(result); - // load pointer for resolved_references[] objArray + // Load pointer for resolved_references[] objArray ld(result, Address(result, ConstantPool::cache_offset_in_bytes())); ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes())); resolve_oop_handle(result, tmp); // Add in the index addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); - slli(index, index, LogBytesPerHeapOop); - add(result, result, index); + shadd(result, index, result, index, LogBytesPerHeapOop); load_heap_oop(result, Address(result, 0)); } void InterpreterMacroAssembler::load_resolved_klass_at_offset( Register cpool, Register index, Register klass, Register temp) { - slli(temp, index, LogBytesPerWord); - add(temp, temp, cpool); + shadd(temp, index, cpool, temp, LogBytesPerWord); lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses - slli(temp, temp, LogBytesPerWord); - add(klass, klass, temp); + shadd(klass, temp, klass, temp, LogBytesPerWord); ld(klass, Address(klass, Array::base_offset_in_bytes())); } @@ -507,23 +503,21 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, Label safepoint; address* const safepoint_table = Interpreter::safept_table(state); bool needs_thread_local_poll = generate_poll && - SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; + SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; if (needs_thread_local_poll) { NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); ld(t1, Address(xthread, Thread::polling_page_offset())); - andi(t1, t1, 1 << exact_log2(SafepointMechanism::poll_bit())); + andi(t1, t1, SafepointMechanism::poll_bit()); bnez(t1, safepoint); } if (table == Interpreter::dispatch_table(state)) { li(t1, Interpreter::distance_from_dispatch_table(state)); add(t1, Rs, t1); - slli(t1, t1, 3); - add(t1, xdispatch, t1); + shadd(t1, t1, xdispatch, t1, 3); } else { mv(t1, (address)table); - slli(Rs, Rs, 3); - add(t1, t1, Rs); + shadd(t1, Rs, t1, Rs, 3); } ld(t1, Address(t1)); jr(t1); @@ -531,8 +525,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, if (needs_thread_local_poll) { bind(safepoint); la(t1, ExternalAddress((address)safepoint_table)); - slli(Rs, Rs, 3); - add(t1, t1, Rs); + shadd(t1, Rs, t1, Rs, 3); ld(t1, Address(t1)); jr(t1); } @@ -581,7 +574,7 @@ void InterpreterMacroAssembler::remove_activation( bool throw_monitor_exception, bool install_monitor_exception, bool notify_jvmdi) { - // Note: Registers x13 xmm0 may be in use for the + // Note: Registers x13 may be in use for the // result check if synchronized method Label unlocked, unlock, no_unlock; @@ -773,7 +766,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) Label slow_case; - // Load object pointer into obj_reg %c_rarg3 + // Load object pointer into obj_reg c_rarg3 ld(obj_reg, Address(lock_reg, obj_offset)); if (UseBiasedLocking) { @@ -791,7 +784,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) "displached header must be first word in BasicObjectLock"); if (PrintBiasedLockingStatistics) { - Label fast, fail; + Label fail, fast; cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail); bind(fast); atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), @@ -804,13 +797,13 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) // Test if the oopMark is an obvious stack pointer, i.e., // 1) (mark & 7) == 0, and - // 2) rsp <= mark < mark + os::pagesize() + // 2) sp <= mark < mark + os::pagesize() // // These 3 tests can be done by evaluating the following - // expression: ((mark - rsp) & (7 - os::vm_page_size())), + // expression: ((mark - sp) & (7 - os::vm_page_size())), // assuming both stack pointer and pagesize have their // least significant 3 bits clear. - // NOTE: the oopMark is in swap_reg %x10 as the result of cmpxchg + // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg sub(swap_reg, swap_reg, sp); li(t0, (int64_t)(7 - os::vm_page_size())); andr(swap_reg, swap_reg, t0); @@ -853,7 +846,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); if (UseHeavyMonitors) { - call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), + lock_reg); } else { Label done; @@ -864,10 +859,10 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) save_bcp(); // Save in case of exception // Convert from BasicObjectLock structure to object and BasicLock - // structure Store the BasicLock address into %x10 + // structure Store the BasicLock address into x10 la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); - // Load oop into obj_reg(%c_rarg3) + // Load oop into obj_reg(c_rarg3) ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // Free entry @@ -889,7 +884,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) // Call the runtime routine for slow case. sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj - call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), + lock_reg); bind(done); @@ -1473,7 +1470,7 @@ void InterpreterMacroAssembler::profile_switch_case(Register index, if (ProfileInterpreter) { Label profile_continue; - // if no method data exists, go to profile_continue. + // If no method data exists, go to profile_continue. test_method_data_pointer(mdp, profile_continue); // Build the base (index * per_case_size_in_bytes()) + @@ -1651,8 +1648,8 @@ void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& md xorr(obj, obj, t0); andi(t0, obj, TypeEntries::type_klass_mask); beqz(t0, next); // klass seen before, nothing to - // do. The unknown bit may have been - // set already but no need to check. + // do. The unknown bit may have been + // set already but no need to check. andi(t0, obj, TypeEntries::type_unknown); bnez(t0, next); @@ -1793,8 +1790,7 @@ void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register ca // CallTypeData/VirtualCallTypeData to reach its end. Non null // if there's a return to profile. assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); - slli(tmp, tmp, exact_log2(DataLayout::cell_size)); - add(mdp, mdp, tmp); + shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size)); } sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); } else { @@ -1833,7 +1829,7 @@ void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, beq(t0, tmp, do_profile); get_method(tmp); lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes())); - li(t1, static_cast(vmIntrinsics::_compiledLambdaForm)); + li(t1, vmIntrinsics::_compiledLambdaForm); bne(t0, t1, profile_continue); bind(do_profile); } @@ -1876,22 +1872,17 @@ void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register t add(t0, mdp, off_base); add(t1, mdp, type_base); - - slli(tmp2, tmp1, per_arg_scale); - add(tmp2, tmp2, t0); + shadd(tmp2, tmp1, t0, tmp2, per_arg_scale); // load offset on the stack from the slot for this parameter ld(tmp2, Address(tmp2, 0)); neg(tmp2, tmp2); // read the parameter from the local area - - slli(tmp2, tmp2, Interpreter::logStackElementSize); - add(tmp2, tmp2, xlocals); + shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize); ld(tmp2, Address(tmp2, 0)); // profile the parameter - slli(t0, tmp1, per_arg_scale); - add(t1, t0, t1); + shadd(t1, tmp1, t1, t0, per_arg_scale); Address arg_type(t1, 0); profile_obj_type(tmp2, arg_type, tmp3); diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp index 21bb67efbb6..4126e8ee70f 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp @@ -122,8 +122,6 @@ class InterpreterMacroAssembler: public MacroAssembler { // Load cpool->resolved_klass_at(index). void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp); - void load_resolved_method_at_index(int byte_no, Register method, Register cache); - void pop_ptr(Register r = x10); void pop_i(Register r = x10); void pop_l(Register r = x10); @@ -148,7 +146,7 @@ class InterpreterMacroAssembler: public MacroAssembler { void load_ptr(int n, Register val); void store_ptr(int n, Register val); -// Load float value from 'address'. The value is loaded onto the FPU register v0. + // Load float value from 'address'. The value is loaded onto the FPU register v0. void load_float(Address src); void load_double(Address src); diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp index 4ef603451c0..776b0787238 100644 --- a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp +++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -45,94 +45,99 @@ Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; Register InterpreterRuntime::SignatureHandlerGenerator::to() { return sp; } Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; } +Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() { + if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { + return g_INTArgReg[++_num_reg_int_args]; + } + return noreg; +} + +FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() { + if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { + return g_FPArgReg[_num_reg_fp_args++]; + } else { + return fnoreg; + } +} + +int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() { + int ret = _stack_offset; + _stack_offset += wordSize; + return ret; +} + InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { _masm = new MacroAssembler(buffer); // allocate on resourse area by default - _num_int_args = (method->is_static() ? 1 : 0); - _num_fp_args = 0; + _num_reg_int_args = (method->is_static() ? 1 : 0); + _num_reg_fp_args = 0; _stack_offset = 0; } void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { const Address src(from(), Interpreter::local_offset_in_bytes(offset())); - if (_num_int_args < Argument::n_int_register_parameters_c - 1) { - __ lw(g_INTArgReg[++_num_int_args], src); + Register reg = next_gpr(); + if (reg != noreg) { + __ lw(reg, src); } else { __ lw(x10, src); - __ sw(x10, Address(to(), _stack_offset)); - _stack_offset += wordSize; - _num_int_args++; + __ sw(x10, Address(to(), next_stack_offset())); } } void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); - if (_num_int_args < Argument::n_int_register_parameters_c - 1) { - __ ld(g_INTArgReg[++_num_int_args], src); - } else { + Register reg = next_gpr(); + if (reg != noreg) { + __ ld(reg, src); + } else { __ ld(x10, src); - __ sd(x10, Address(to(), _stack_offset)); - _stack_offset += wordSize; - _num_int_args++; + __ sd(x10, Address(to(), next_stack_offset())); } } void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { const Address src(from(), Interpreter::local_offset_in_bytes(offset())); - if (_num_fp_args < Argument::n_float_register_parameters_c) { - // to c_farg - __ flw(g_FPArgReg[_num_fp_args++], src); - } else if (_num_int_args < Argument::n_int_register_parameters_c - 1) { - // to c_rarg - __ lwu(g_INTArgReg[++_num_int_args], src); + FloatRegister reg = next_fpr(); + if (reg != fnoreg) { + __ flw(reg, src); } else { - // to stack - __ lwu(x10, src); - __ sw(x10, Address(to(), _stack_offset)); - _stack_offset += wordSize; - _num_fp_args++; + // a floating-point argument is passed according to the integer calling + // convention if no floating-point argument register available + pass_int(); } } void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); - if (_num_fp_args < Argument::n_float_register_parameters_c) { - // to c_farg - __ fld(g_FPArgReg[_num_fp_args++], src); - } else if (_num_int_args < Argument::n_int_register_parameters_c - 1) { - // to c_rarg - __ ld(g_INTArgReg[++_num_int_args], src); + FloatRegister reg = next_fpr(); + if (reg != fnoreg) { + __ fld(reg, src); } else { - // to stack - __ ld(x10, src); - __ sd(x10, Address(to(), _stack_offset)); - _stack_offset += wordSize; - _num_fp_args++; + // a floating-point argument is passed according to the integer calling + // convention if no floating-point argument register available + pass_long(); } } void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { - - if (_num_int_args < Argument::n_int_register_parameters_c - 1) { - // to reg - if (_num_int_args == 0) { - assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); - __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset())); - _num_int_args++; - } else { + Register reg = next_gpr(); + if (reg == c_rarg1) { + assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); + __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset())); + } else if (reg != noreg) { // c_rarg2-c_rarg7 __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); - __ mv(g_INTArgReg[++_num_int_args], 0); //_num_int_args:c_rarg -> 1:c_rarg2, 2:c_rarg3... + __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2, 2:c_rarg3... __ ld(temp(), x10); Label L; __ beqz(temp(), L); - __ mv(g_INTArgReg[_num_int_args], x10); + __ mv(reg, x10); __ bind(L); - } } else { //to stack __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); @@ -141,9 +146,8 @@ void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { __ bnez(temp(), L); __ mv(x10, zr); __ bind(L); - __ sd(x10, Address(to(), _stack_offset)); - _stack_offset += wordSize; - _num_int_args++; + assert(sizeof(jobject) == wordSize, ""); + __ sd(x10, Address(to(), next_stack_offset())); } } @@ -172,84 +176,79 @@ class SlowSignatureHandler intptr_t* _int_args; intptr_t* _fp_args; intptr_t* _fp_identifiers; - unsigned int _num_int_args; - unsigned int _num_fp_args; + unsigned int _num_reg_int_args; + unsigned int _num_reg_fp_args; - virtual void pass_int() - { - jint from_obj = *(jint *)(_from + Interpreter::local_offset_in_bytes(0)); + intptr_t* single_slot_addr() { + intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); _from -= Interpreter::stackElementSize; + return from_addr; + } - if (_num_int_args < Argument::n_int_register_parameters_c - 1) { - *_int_args++ = from_obj; - _num_int_args++; - } else { - *_to++ = from_obj; - _num_int_args++; + intptr_t* double_slot_addr() { + intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1)); + _from -= 2 * Interpreter::stackElementSize; + return from_addr; + } + + int pass_gpr(intptr_t value) { + if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { + *_int_args++ = value; + return _num_reg_int_args++; } + return -1; } - virtual void pass_long() - { - intptr_t from_obj = *(intptr_t*)(_from + Interpreter::local_offset_in_bytes(1)); - _from -= 2*Interpreter::stackElementSize; - - if (_num_int_args < Argument::n_int_register_parameters_c - 1) { - *_int_args++ = from_obj; - _num_int_args++; - } else { - *_to++ = from_obj; - _num_int_args++; + int pass_fpr(intptr_t value) { + if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { + *_fp_args++ = value; + return _num_reg_fp_args++; } + return -1; } - virtual void pass_object() - { - intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); - _from -= Interpreter::stackElementSize; + void pass_stack(intptr_t value) { + *_to++ = value; + } - if (_num_int_args < Argument::n_int_register_parameters_c - 1) { - *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t)from_addr; - _num_int_args++; - } else { - *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; - _num_int_args++; + virtual void pass_int() { + jint value = *(jint*)single_slot_addr(); + if (pass_gpr(value) < 0) { + pass_stack(value); } } - virtual void pass_float() - { - jint from_obj = *(jint*)(_from + Interpreter::local_offset_in_bytes(0)); - _from -= Interpreter::stackElementSize; + virtual void pass_long() { + intptr_t value = *double_slot_addr(); + if (pass_gpr(value) < 0) { + pass_stack(value); + } + } - if (_num_fp_args < Argument::n_float_register_parameters_c) { - *_fp_args++ = from_obj; - _num_fp_args++; - } else if (_num_int_args < Argument::n_int_register_parameters_c - 1) { - *_int_args++ = from_obj; - _num_int_args++; - } else { - *_to++ = from_obj; - _num_fp_args++; + virtual void pass_object() { + intptr_t* addr = single_slot_addr(); + intptr_t value = *addr == 0 ? NULL : (intptr_t)addr; + if (pass_gpr(value) < 0) { + pass_stack(value); } } - virtual void pass_double() - { - intptr_t from_obj = *(intptr_t*)(_from + Interpreter::local_offset_in_bytes(1)); - _from -= 2*Interpreter::stackElementSize; - - if (_num_fp_args < Argument::n_float_register_parameters_c) { - *_fp_args++ = from_obj; - *_fp_identifiers |= (1ull << _num_fp_args); // mark as double - _num_fp_args++; - } else if (_num_int_args < Argument::n_int_register_parameters_c - 1) { - // ld/st from_obj as integer, no need to mark _fp_identifiers - *_int_args++ = from_obj; - _num_int_args++; - } else { - *_to++ = from_obj; - _num_fp_args++; + virtual void pass_float() { + jint value = *(jint*) single_slot_addr(); + // a floating-point argument is passed according to the integer calling + // convention if no floating-point argument register available + if (pass_fpr(value) < 0 && pass_gpr(value) < 0) { + pass_stack(value); + } + } + + virtual void pass_double() { + intptr_t value = *double_slot_addr(); + int arg = pass_fpr(value); + if (0 <= arg) { + *_fp_identifiers |= (1ull << arg); // mark as double + } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack + pass_stack(value); } } @@ -261,12 +260,13 @@ class SlowSignatureHandler _to = to; _int_args = to - (method->is_static() ? 16 : 17); - _fp_args = to - 8; + _fp_args = to - 8; _fp_identifiers = to - 9; *(int*) _fp_identifiers = 0; - _num_int_args = (method->is_static() ? 1 : 0); - _num_fp_args = 0; + _num_reg_int_args = (method->is_static() ? 1 : 0); + _num_reg_fp_args = 0; } + ~SlowSignatureHandler() { _from = NULL; diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp index d56896fefd8..05df63ba2ae 100644 --- a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp +++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp @@ -35,8 +35,8 @@ class SignatureHandlerGenerator: public NativeSignatureIterator { private: MacroAssembler* _masm; - unsigned int _num_fp_args; - unsigned int _num_int_args; + unsigned int _num_reg_fp_args; + unsigned int _num_reg_int_args; int _stack_offset; void pass_int(); @@ -45,6 +45,10 @@ class SignatureHandlerGenerator: public NativeSignatureIterator { void pass_double(); void pass_object(); + Register next_gpr(); + FloatRegister next_fpr(); + int next_stack_offset(); + public: // Creation SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp index 76610084f75..5a0c9b812fc 100644 --- a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp +++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp @@ -1,7 +1,6 @@ /* * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,7 +29,7 @@ private: // FP value associated with _last_Java_sp: - intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to + intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to public: // Each arch must define reset, save, restore @@ -80,10 +79,11 @@ public: - void set_last_Java_sp(intptr_t* java_sp) { _last_Java_sp = java_sp; OrderAccess::release(); } + void set_last_Java_sp(intptr_t* java_sp) { _last_Java_sp = java_sp; OrderAccess::release(); } + + intptr_t* last_Java_fp(void) { return _last_Java_fp; } - intptr_t* last_Java_fp(void) { return _last_Java_fp; } // Assert (last_Java_sp == NULL || fp == NULL) - void set_last_Java_fp(intptr_t* java_fp) { OrderAccess::release(); _last_Java_fp = java_fp; } + void set_last_Java_fp(intptr_t* fp) { OrderAccess::release(); _last_Java_fp = fp; } #endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp index a0c5b7be125..f6e7351c4fc 100644 --- a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp +++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp @@ -83,28 +83,10 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { // An even value means there are no ongoing safepoint operations __ andi(t0, rcounter, 1); __ bnez(t0, slow); - - if (JvmtiExport::can_post_field_access()) { - // Using barrier to order wrt. JVMTI check and load of result. - __ membar(MacroAssembler::LoadLoad); - - // Check to see if a field access watch has been set before we - // take the fast path. - int32_t offset2; - __ la_patchable(result, - ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), - offset2); - __ lwu(result, Address(result, offset2)); - __ bnez(result, slow); - - __ mv(robj, c_rarg1); - } else { - // Using address dependency to order wrt. load of result. - __ xorr(robj, c_rarg1, rcounter); - __ xorr(robj, robj, rcounter); // obj, since - // robj ^ rcounter ^ rcounter == robj - // robj is address dependent on rcounter. - } + __ xorr(robj, c_rarg1, rcounter); + __ xorr(robj, robj, rcounter); // obj, since + // robj ^ rcounter ^ rcounter == robj + // robj is address dependent on rcounter. // Both robj and t0 are clobbered by try_resolve_jobject_in_native. BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); @@ -137,10 +119,8 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { default: ShouldNotReachHere(); } - // Using acquire: Order JVMTI check and load of result wrt. succeeding check - // (LoadStore for volatile field). - __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); - + __ xorr(rcounter_addr, rcounter_addr, result); + __ xorr(rcounter_addr, rcounter_addr, result); __ lw(t0, safepoint_counter_addr); __ bne(rcounter, t0, slow); @@ -172,7 +152,6 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { int32_t tmp_offset = 0; __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset); __ jalr(x1, t0, tmp_offset); - __ ifence(); __ leave(); __ ret(); } diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp index a0c0cebf41a..df3c0267eea 100644 --- a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp +++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp @@ -1,7 +1,6 @@ /* * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,7 +27,6 @@ #define CPU_RISCV_JNITYPES_RISCV_HPP #include "jni.h" -#include "memory/allocation.hpp" #include "oops/oop.hpp" // This file holds platform-dependent routines used to write primitive jni @@ -67,9 +65,9 @@ class JNITypes : private AllStatic { } // Oops are stored in native format in one JavaCallArgument slot at *to. - static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } - static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } - static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } + static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } + static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } + static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } // Floats are stored in native format in one JavaCallArgument slot at *to. static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index 3406d29ed23..f35f3a86797 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -49,6 +49,7 @@ #include "runtime/thread.hpp" #ifdef COMPILER2 #include "opto/compile.hpp" +#include "opto/intrinsicnode.hpp" #include "opto/node.hpp" #include "opto/output.hpp" #endif @@ -88,8 +89,9 @@ static void pass_arg3(MacroAssembler* masm, Register arg) { } } -void MacroAssembler::align(int modulus) { - while (offset() % modulus != 0) { nop(); } +void MacroAssembler::align(int modulus, int extra_offset) { + CompressibleRegion cr(this); + while ((offset() + extra_offset) % modulus != 0) { nop(); } } void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { @@ -190,6 +192,22 @@ void MacroAssembler::call_VM(Register oop_result, void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} void MacroAssembler::check_and_handle_popframe(Register java_thread) {} +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + intptr_t value = *delayed_value_addr; + if (value != 0) + return RegisterOrConstant(value + offset); + + // load indirectly to solve generation ordering problem + ld(tmp, ExternalAddress((address) delayed_value_addr)); + + if (offset != 0) + add(tmp, tmp, offset); + + return RegisterOrConstant(tmp); +} + // Calls to C land // // When entering C land, the fp, & esp of the last Java frame have to be recorded @@ -198,7 +216,7 @@ void MacroAssembler::check_and_handle_popframe(Register java_thread) {} void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, - Register temp) { + Register tmp) { if (last_java_pc->is_valid()) { sd(last_java_pc, Address(xthread, @@ -208,8 +226,8 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp, // determine last_java_sp register if (last_java_sp == sp) { - mv(temp, sp); - last_java_sp = temp; + mv(tmp, sp); + last_java_sp = tmp; } else if (!last_java_sp->is_valid()) { last_java_sp = esp; } @@ -225,25 +243,49 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp, void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, - Register temp) { + Register tmp) { assert(last_java_pc != NULL, "must provide a valid PC"); - la(temp, last_java_pc); - sd(temp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + la(tmp, last_java_pc); + sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); - set_last_Java_frame(last_java_sp, last_java_fp, noreg, temp); + set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp); } void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &L, - Register temp) { + Register tmp) { if (L.is_bound()) { - set_last_Java_frame(last_java_sp, last_java_fp, target(L), temp); + set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); } else { InstructionMark im(this); L.add_patch_at(code(), locator()); - set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, temp); + set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); + } +} + +// Just like safepoint_poll, but use an acquiring load for thread- +// local polling. +// +// We need an acquire here to ensure that any subsequent load of the +// global SafepointSynchronize::_state flag is ordered after this load +// of the local Thread::_polling page. We don't want this poll to +// return false (i.e. not safepointing) and a later poll of the global +// SafepointSynchronize::_state spuriously to return true. +// +// This is to avoid a race when we're in a native->Java transition +// racing the code which wakes up from a safepoint. +// +void MacroAssembler::safepoint_poll_acquire(Label& slow_path) { + if (SafepointMechanism::uses_thread_local_poll()) { + membar(MacroAssembler::AnyAny); + ld(t1, Address(xthread, Thread::polling_page_offset())); + membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + andi(t0, t1, SafepointMechanism::poll_bit()); + bnez(t0, slow_path); + } else { + safepoint_poll(slow_path); } } @@ -344,14 +386,13 @@ void MacroAssembler::verify_oop(Register reg, const char* s) { } BLOCK_COMMENT("verify_oop {"); - push_reg(RegSet::of(lr, t0, t1, c_rarg0), sp); + push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); mv(c_rarg0, reg); // c_rarg0 : x10 - if(b != NULL) { - li(t0, (uintptr_t)(address)b); - } else { - ShouldNotReachHere(); - } + // The length of the instruction sequence emitted should be independent + // of the values of the local char buffer address so that the size of mach + // nodes for scratch emit and normal emit matches. + mv(t0, (address)b); // call indirectly to solve generation ordering problem int32_t offset = 0; @@ -359,7 +400,7 @@ void MacroAssembler::verify_oop(Register reg, const char* s) { ld(t1, Address(t1, offset)); jalr(t1); - pop_reg(RegSet::of(lr, t0, t1, c_rarg0), sp); + pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); BLOCK_COMMENT("} verify_oop"); } @@ -378,7 +419,7 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) { } BLOCK_COMMENT("verify_oop_addr {"); - push_reg(RegSet::of(lr, t0, t1, c_rarg0), sp); + push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); if (addr.uses(sp)) { la(x10, addr); @@ -386,11 +427,11 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) { } else { ld(x10, addr); } - if(b != NULL) { - li(t0, (uintptr_t)(address)b); - } else { - ShouldNotReachHere(); - } + + // The length of the instruction sequence emitted should be independent + // of the values of the local char buffer address so that the size of mach + // nodes for scratch emit and normal emit matches. + mv(t0, (address)b); // call indirectly to solve generation ordering problem int32_t offset = 0; @@ -398,7 +439,7 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) { ld(t1, Address(t1, offset)); jalr(t1); - pop_reg(RegSet::of(lr, t0, t1, c_rarg0), sp); + pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); BLOCK_COMMENT("} verify_oop_addr"); } @@ -416,8 +457,7 @@ Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, return Address(esp, arg_slot.as_constant() * stackElementSize + offset); } else { assert_different_registers(t0, arg_slot.as_register()); - slli(t0, arg_slot.as_register(), exact_log2(stackElementSize)); - add(t0, esp, t0); + shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); return Address(t0, offset); } } @@ -480,12 +520,8 @@ void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) tty->print_cr("x31 = 0x%016lx", regs[31]); BREAKPOINT; } - ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); - } else { - ttyLocker ttyl; - ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); - assert(false, "DEBUG MESSAGE: %s", msg); } + fatal("DEBUG MESSAGE: %s", msg); } void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) { @@ -512,12 +548,11 @@ void MacroAssembler::resolve_jobject(Register value, Register thread, Register t void MacroAssembler::stop(const char* msg) { address ip = pc(); pusha(); - if(msg != NULL && ip != NULL) { - li(c_rarg0, (uintptr_t)(address)msg); - li(c_rarg1, (uintptr_t)(address)ip); - } else { - ShouldNotReachHere(); - } + // The length of the instruction sequence emitted should be independent + // of the values of msg and ip so that the size of mach nodes for scratch + // emit and normal emit matches. + mv(c_rarg0, (address)msg); + mv(c_rarg1, (address)ip); mv(c_rarg2, sp); mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); jalr(c_rarg3); @@ -540,7 +575,6 @@ void MacroAssembler::emit_static_call_stub() { // exact layout of this stub. ifence(); - mov_metadata(xmethod, (Metadata*)NULL); // Jump to the entry point of the i2c stub. @@ -548,11 +582,11 @@ void MacroAssembler::emit_static_call_stub() { movptr_with_offset(t0, 0, offset); jalr(x0, t0, offset); } + void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments, Label *retaddr) { call_native_base(entry_point, retaddr); - ifence(); } void MacroAssembler::call_native(address entry_point, Register arg_0) { @@ -658,6 +692,10 @@ void MacroAssembler::sext_w(Register Rd, Register Rs) { addiw(Rd, Rs, 0); } +void MacroAssembler::zext_b(Register Rd, Register Rs) { + andi(Rd, Rs, 0xFF); +} + void MacroAssembler::seqz(Register Rd, Register Rs) { sltiu(Rd, Rs, 1); } @@ -702,6 +740,18 @@ void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) { fsgnjn_d(Rd, Rs, Rs); } +void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) { + vmnand_mm(vd, vs, vs); +} + +void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) { + vnsrl_wx(vd, vs, x0, vm); +} + +void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) { + vfsgnjn_vv(vd, vs, vs); +} + void MacroAssembler::la(Register Rd, const address &dest) { int64_t offset = dest - pc(); if (is_offset_in_range(offset, 32)) { @@ -717,7 +767,7 @@ void MacroAssembler::la(Register Rd, const Address &adr) { code_section()->relocate(inst_mark(), adr.rspec()); relocInfo::relocType rtype = adr.rspec().reloc()->type(); - switch(adr.getMode()) { + switch (adr.getMode()) { case Address::literal: { if (rtype == relocInfo::none) { li(Rd, (intptr_t)(adr.target())); @@ -726,7 +776,7 @@ void MacroAssembler::la(Register Rd, const Address &adr) { } break; } - case Address::base_plus_offset:{ + case Address::base_plus_offset: { int32_t offset = 0; baseOffset(Rd, adr, offset); addi(Rd, Rd, offset); @@ -741,13 +791,13 @@ void MacroAssembler::la(Register Rd, Label &label) { la(Rd, target(label)); } -#define INSN(NAME) \ - void MacroAssembler::NAME##z(Register Rs, const address &dest) { \ - NAME(Rs, zr, dest); \ - } \ - void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ - NAME(Rs, zr, l, is_far); \ - } \ +#define INSN(NAME) \ + void MacroAssembler::NAME##z(Register Rs, const address &dest) { \ + NAME(Rs, zr, dest); \ + } \ + void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ + NAME(Rs, zr, l, is_far); \ + } \ INSN(beq); INSN(bne); @@ -760,14 +810,14 @@ void MacroAssembler::la(Register Rd, Label &label) { // Float compare branch instructions -#define INSN(NAME, FLOATCMP, BRANCH) \ - void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ - FLOATCMP##_s(t0, Rs1, Rs2); \ - BRANCH(t0, l, is_far); \ - } \ - void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ - FLOATCMP##_d(t0, Rs1, Rs2); \ - BRANCH(t0, l, is_far); \ +#define INSN(NAME, FLOATCMP, BRANCH) \ + void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ + FLOATCMP##_s(t0, Rs1, Rs2); \ + BRANCH(t0, l, is_far); \ + } \ + void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ + FLOATCMP##_d(t0, Rs1, Rs2); \ + BRANCH(t0, l, is_far); \ } INSN(beq, feq, bnez); @@ -776,30 +826,30 @@ void MacroAssembler::la(Register Rd, Label &label) { #undef INSN -#define INSN(NAME, FLOATCMP1, FLOATCMP2) \ - void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ - bool is_far, bool is_unordered) { \ - if(is_unordered) { \ - /* jump if either source is NaN or condition is expected */ \ - FLOATCMP2##_s(t0, Rs2, Rs1); \ - beqz(t0, l, is_far); \ - } else { \ - /* jump if no NaN in source and condition is expected */ \ - FLOATCMP1##_s(t0, Rs1, Rs2); \ - bnez(t0, l, is_far); \ - } \ - } \ - void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ - bool is_far, bool is_unordered) { \ - if(is_unordered) { \ - /* jump if either source is NaN or condition is expected */ \ - FLOATCMP2##_d(t0, Rs2, Rs1); \ - beqz(t0, l, is_far); \ - } else { \ - /* jump if no NaN in source and condition is expected */ \ - FLOATCMP1##_d(t0, Rs1, Rs2); \ - bnez(t0, l, is_far); \ - } \ +#define INSN(NAME, FLOATCMP1, FLOATCMP2) \ + void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ + if (is_unordered) { \ + /* jump if either source is NaN or condition is expected */ \ + FLOATCMP2##_s(t0, Rs2, Rs1); \ + beqz(t0, l, is_far); \ + } else { \ + /* jump if no NaN in source and condition is expected */ \ + FLOATCMP1##_s(t0, Rs1, Rs2); \ + bnez(t0, l, is_far); \ + } \ + } \ + void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ + if (is_unordered) { \ + /* jump if either source is NaN or condition is expected */ \ + FLOATCMP2##_d(t0, Rs2, Rs1); \ + beqz(t0, l, is_far); \ + } else { \ + /* jump if no NaN in source and condition is expected */ \ + FLOATCMP1##_d(t0, Rs1, Rs2); \ + bnez(t0, l, is_far); \ + } \ } INSN(ble, fle, flt); @@ -807,14 +857,14 @@ void MacroAssembler::la(Register Rd, Label &label) { #undef INSN -#define INSN(NAME, CMP) \ - void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ - bool is_far, bool is_unordered) { \ - float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ - } \ - void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ - bool is_far, bool is_unordered) { \ - double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ +#define INSN(NAME, CMP) \ + void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ + float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ + } \ + void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ + double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ } INSN(bgt, blt); @@ -904,102 +954,6 @@ void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { #undef INSN -#ifdef COMPILER2 -typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); -typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, - bool is_far, bool is_unordered); - -static conditional_branch_insn conditional_branches[] = -{ - /* SHORT branches */ - (conditional_branch_insn)&Assembler::beq, - (conditional_branch_insn)&Assembler::bgt, - NULL, // BoolTest::overflow - (conditional_branch_insn)&Assembler::blt, - (conditional_branch_insn)&Assembler::bne, - (conditional_branch_insn)&Assembler::ble, - NULL, // BoolTest::no_overflow - (conditional_branch_insn)&Assembler::bge, - - /* UNSIGNED branches */ - (conditional_branch_insn)&Assembler::beq, - (conditional_branch_insn)&Assembler::bgtu, - NULL, - (conditional_branch_insn)&Assembler::bltu, - (conditional_branch_insn)&Assembler::bne, - (conditional_branch_insn)&Assembler::bleu, - NULL, - (conditional_branch_insn)&Assembler::bgeu -}; - -static float_conditional_branch_insn float_conditional_branches[] = -{ - /* FLOAT SHORT branches */ - (float_conditional_branch_insn)&MacroAssembler::float_beq, - (float_conditional_branch_insn)&MacroAssembler::float_bgt, - NULL, // BoolTest::overflow - (float_conditional_branch_insn)&MacroAssembler::float_blt, - (float_conditional_branch_insn)&MacroAssembler::float_bne, - (float_conditional_branch_insn)&MacroAssembler::float_ble, - NULL, // BoolTest::no_overflow - (float_conditional_branch_insn)&MacroAssembler::float_bge, - - /* DOUBLE SHORT branches */ - (float_conditional_branch_insn)&MacroAssembler::double_beq, - (float_conditional_branch_insn)&MacroAssembler::double_bgt, - NULL, - (float_conditional_branch_insn)&MacroAssembler::double_blt, - (float_conditional_branch_insn)&MacroAssembler::double_bne, - (float_conditional_branch_insn)&MacroAssembler::double_ble, - NULL, - (float_conditional_branch_insn)&MacroAssembler::double_bge -}; - -void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { - assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), - "invalid conditional branch index"); - (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); -} - -// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use -// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). -void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { - assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), - "invalid float conditional branch index"); - int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask); - (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, - (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); -} - -void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { - switch (cmpFlag) { - case BoolTest::eq: - case BoolTest::le: - beqz(op1, L, is_far); - break; - case BoolTest::ne: - case BoolTest::gt: - bnez(op1, L, is_far); - break; - default: - ShouldNotReachHere(); - } -} - -void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { - switch (cmpFlag) { - case BoolTest::eq: - beqz(op1, L, is_far); - break; - case BoolTest::ne: - bnez(op1, L, is_far); - break; - default: - ShouldNotReachHere(); - } -} -#endif // COMPILER2 - void MacroAssembler::push_reg(Register Rs) { addi(esp, esp, 0 - wordSize); @@ -1013,22 +967,14 @@ void MacroAssembler::pop_reg(Register Rd) } int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { - DEBUG_ONLY(int words_pushed = 0;) - int count = 0; - // Sp is x2, and zr is x0, which should not be pushed. - // If the number of registers is odd, zr is used for stack alignment.Otherwise, it will be ignored. - bitset &= ~ (1U << 2); - bitset |= 0x1; - // Scan bitset to accumulate register pairs - for (int reg = 31; reg >= 0; reg --) { + for (int reg = 31; reg >= 0; reg--) { if ((1U << 31) & bitset) { regs[count++] = reg; } bitset <<= 1; } - count &= ~1; // Only push an even number of regs return count; } @@ -1036,15 +982,18 @@ int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { // Return the number of words pushed int MacroAssembler::push_reg(unsigned int bitset, Register stack) { DEBUG_ONLY(int words_pushed = 0;) + CompressibleRegion cr(this); unsigned char regs[32]; int count = bitset_to_regs(bitset, regs); + // reserve one slot to align for odd count + int offset = is_even(count) ? 0 : wordSize; if (count) { - addi(stack, stack, - count * wordSize); + addi(stack, stack, - count * wordSize - offset); } for (int i = count - 1; i >= 0; i--) { - sd(as_Register(regs[i]), Address(stack, (count -1 - i) * wordSize)); + sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); DEBUG_ONLY(words_pushed ++;) } @@ -1055,42 +1004,33 @@ int MacroAssembler::push_reg(unsigned int bitset, Register stack) { int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { DEBUG_ONLY(int words_popped = 0;) + CompressibleRegion cr(this); unsigned char regs[32]; int count = bitset_to_regs(bitset, regs); + // reserve one slot to align for odd count + int offset = is_even(count) ? 0 : wordSize; for (int i = count - 1; i >= 0; i--) { - ld(as_Register(regs[i]), Address(stack, (count -1 - i) * wordSize)); + ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); DEBUG_ONLY(words_popped ++;) } if (count) { - addi(stack, stack, count * wordSize); + addi(stack, stack, count * wordSize + offset); } assert(words_popped == count, "oops, popped != count"); return count; } -int MacroAssembler::bitset_to_fregs(unsigned int bitset, unsigned char* regs) { - int count = 0; - // Scan bitset to accumulate register pairs - for (int reg = 31; reg >= 0; reg--) { - if ((1U << 31) & bitset) { - regs[count++] = reg; - } - bitset <<= 1; - } - - return count; -} - // Push float registers in the bitset, except sp. // Return the number of heapwords pushed. int MacroAssembler::push_fp(unsigned int bitset, Register stack) { + CompressibleRegion cr(this); int words_pushed = 0; unsigned char regs[32]; - int count = bitset_to_fregs(bitset, regs); + int count = bitset_to_regs(bitset, regs); int push_slots = count + (count & 1); if (count) { @@ -1107,9 +1047,10 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) { } int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { + CompressibleRegion cr(this); int words_popped = 0; unsigned char regs[32]; - int count = bitset_to_fregs(bitset, regs); + int count = bitset_to_regs(bitset, regs); int pop_slots = count + (count & 1); for (int i = count - 1; i >= 0; i--) { @@ -1125,18 +1066,6 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { return count; } -void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) { - vmnand_mm(vd, vs, vs); -} - -void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) { - vnsrl_wx(vd, vs, x0, vm); -} - -void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) { - vfsgnjn_vv(vd, vs, vs); -} - // CSky specific ldd/lwd/lwud/swd/sdd to merge 2 load or 2 store instructions // Checks whether current and previous load/store can be merged. // Returns true if it can be merged, else false. @@ -1331,6 +1260,7 @@ void MacroAssembler::sw(Register Rw, const Address& adr) { } void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { + CompressibleRegion cr(this); // Push integer registers x7, x10-x17, x28-x31. push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); @@ -1345,6 +1275,7 @@ void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { } void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { + CompressibleRegion cr(this); int offset = 0; for (int i = 0; i < 32; i++) { if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { @@ -1356,18 +1287,22 @@ void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); } -// Push all the integer registers, except zr(x0) & sp(x2). +// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). void MacroAssembler::pusha() { - push_reg(0xfffffffa, sp); + CompressibleRegion cr(this); + push_reg(0xffffffe2, sp); } +// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). void MacroAssembler::popa() { - pop_reg(0xfffffffa, sp); + CompressibleRegion cr(this); + pop_reg(0xffffffe2, sp); } void MacroAssembler::push_CPU_state() { - // integer registers, except zr(x0) & ra(x1) & sp(x2) - push_reg(0xfffffff8, sp); + CompressibleRegion cr(this); + // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) + push_reg(0xffffffe0, sp); // float registers addi(sp, sp, - 32 * wordSize); @@ -1377,14 +1312,16 @@ void MacroAssembler::push_CPU_state() { } void MacroAssembler::pop_CPU_state() { + CompressibleRegion cr(this); + // float registers for (int i = 0; i < 32; i++) { fld(as_FloatRegister(i), Address(sp, i * wordSize)); } addi(sp, sp, 32 * wordSize); - // integer registers, except zr(x0) & ra(x1) & sp(x2) - pop_reg(0xfffffff8, sp); + // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) + pop_reg(0xffffffe0, sp); } static int patch_offset_in_jal(address branch, int64_t offset) { @@ -1534,10 +1471,14 @@ int MacroAssembler::pd_patch_instruction_size(address branch, address target) { int64_t imm = (intptr_t)target; return patch_imm_in_li32(branch, (int32_t)imm); } else { - tty->print_cr("pd_patch_instruction_size: instruction 0x%x could not be patched!\n", *(unsigned*)branch); +#ifdef ASSERT + tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", + *(unsigned*)branch, p2i(branch)); + Disassembler::decode(branch - 16, branch + 16); +#endif ShouldNotReachHere(); + return -1; } - return -1; } address MacroAssembler::target_addr_for_insn(address insn_addr) { @@ -1567,7 +1508,7 @@ int MacroAssembler::patch_oop(address insn_addr, address o) { // instruction. if (NativeInstruction::is_li32_at(insn_addr)) { // Move narrow OOP - narrowOop n = CompressedOops::encode(cast_to_oop(o)); + narrowOop n = CompressedOops::encode((oop)o); return patch_imm_in_li32(insn_addr, (int32_t)n); } else if (NativeInstruction::is_movptr_at(insn_addr)) { // Move wide OOP @@ -1580,11 +1521,7 @@ int MacroAssembler::patch_oop(address insn_addr, address o) { void MacroAssembler::reinit_heapbase() { if (UseCompressedOops) { if (Universe::is_fully_initialized()) { - if (Universe::narrow_ptrs_base() == NULL) { - li(xheapbase, 0); - } else { - mv(xheapbase, Universe::narrow_ptrs_base()); - } + mv(xheapbase, Universe::narrow_ptrs_base()); } else { int32_t offset = 0; la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset); @@ -1593,18 +1530,6 @@ void MacroAssembler::reinit_heapbase() { } } -void MacroAssembler::mv(Register Rd, int64_t imm64) { - li(Rd, imm64); -} - -void MacroAssembler::mv(Register Rd, int imm) { - mv(Rd, (int64_t)imm); -} - -void MacroAssembler::mvw(Register Rd, int32_t imm32) { - mv(Rd, imm32); -} - void MacroAssembler::mv(Register Rd, Address dest) { assert(dest.getMode() == Address::literal, "Address mode should be Address::literal"); code_section()->relocate(pc(), dest.rspec()); @@ -1612,7 +1537,7 @@ void MacroAssembler::mv(Register Rd, Address dest) { } void MacroAssembler::mv(Register Rd, address addr) { - // Here in case of use with relocation, use fix length instruciton + // Here in case of use with relocation, use fix length instruction // movptr instead of li movptr(Rd, addr); } @@ -1691,136 +1616,164 @@ void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in } } -void MacroAssembler::reverseb16(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) { - // This method is only used for grev16 - // Rd = Rs[47:0] Rs[55:48] Rs[63:56] - assert_different_registers(Rs, Rtmp1, Rtmp2); - assert_different_registers(Rd, Rtmp1); - srli(Rtmp1, Rs, 48); - andi(Rtmp2, Rtmp1, 0xff); - slli(Rtmp2, Rtmp2, 8); - srli(Rtmp1, Rtmp1, 8); - orr(Rtmp1, Rtmp1, Rtmp2); - slli(Rd, Rs, 16); - orr(Rd, Rd, Rtmp1); -} - -void MacroAssembler::reverseh32(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) { - // This method is only used for grev32 - // Rd[63:0] = Rs[31:0] Rs[47:32] Rs[63:48] - assert_different_registers(Rs, Rtmp1, Rtmp2); - assert_different_registers(Rd, Rtmp1); - srli(Rtmp1, Rs, 32); - slli(Rtmp2, Rtmp1, 48); - srli(Rtmp2, Rtmp2, 32); - srli(Rtmp1, Rtmp1, 16); - orr(Rtmp1, Rtmp1, Rtmp2); - slli(Rd, Rs, 32); - orr(Rd, Rd, Rtmp1); -} - -void MacroAssembler::grevh(Register Rd, Register Rs, Register Rtmp) { - // Reverse bytes in half-word - // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) - assert_different_registers(Rs, Rtmp); - assert_different_registers(Rd, Rtmp); - srli(Rtmp, Rs, 8); - andi(Rtmp, Rtmp, 0xFF); +// reverse bytes in halfword in lower 16 bits and sign-extend +// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) +void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { + if (UseRVB) { + rev8(Rd, Rs); + srai(Rd, Rd, 48); + return; + } + assert_different_registers(Rs, tmp); + assert_different_registers(Rd, tmp); + srli(tmp, Rs, 8); + andi(tmp, tmp, 0xFF); slli(Rd, Rs, 56); srai(Rd, Rd, 48); // sign-extend - orr(Rd, Rd, Rtmp); + orr(Rd, Rd, tmp); } -void MacroAssembler::grevhu(Register Rd, Register Rs, Register Rtmp) { - // Reverse bytes in half-word - // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) - assert_different_registers(Rs, Rtmp); - assert_different_registers(Rd, Rtmp); - srli(Rtmp, Rs, 8); - andi(Rtmp, Rtmp, 0xFF); - andi(Rd, Rs, 0xFF); - slli(Rd, Rd, 8); - orr(Rd, Rd, Rtmp); -} - -void MacroAssembler::grev16w(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) { - // Reverse bytes in half-word (32bit) - // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (sign-extend to 64 bits) - assert_different_registers(Rs, Rtmp1, Rtmp2); - assert_different_registers(Rd, Rtmp1, Rtmp2); - srli(Rtmp2, Rs, 16); - grevh(Rtmp2, Rtmp2, Rtmp1); - grevhu(Rd, Rs, Rtmp1); - slli(Rtmp2, Rtmp2, 16); - orr(Rd, Rd, Rtmp2); -} - -void MacroAssembler::grev16wu(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) { - // Reverse bytes in half-word (32bit) - // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) - assert_different_registers(Rs, Rtmp1, Rtmp2); - assert_different_registers(Rd, Rtmp1, Rtmp2); - srli(Rtmp2, Rs, 16); - grevhu(Rtmp2, Rtmp2, Rtmp1); - grevhu(Rd, Rs, Rtmp1); - slli(Rtmp2, Rtmp2, 16); - orr(Rd, Rd, Rtmp2); -} - -void MacroAssembler::grevw(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) { - // Reverse bytes in word (32bit) - // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) - assert_different_registers(Rs, Rtmp1, Rtmp2); - assert_different_registers(Rd, Rtmp1, Rtmp2); - grev16wu(Rd, Rs, Rtmp1, Rtmp2); - slli(Rtmp2, Rd, 48); - srai(Rtmp2, Rtmp2, 32); // sign-extend +// reverse bytes in lower word and sign-extend +// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) +void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { + if (UseRVB) { + rev8(Rd, Rs); + srai(Rd, Rd, 32); + return; + } + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); + revb_h_w_u(Rd, Rs, tmp1, tmp2); + slli(tmp2, Rd, 48); + srai(tmp2, tmp2, 32); // sign-extend srli(Rd, Rd, 16); - orr(Rd, Rd, Rtmp2); + orr(Rd, Rd, tmp2); } -void MacroAssembler::grevwu(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) { - // Reverse bytes in word (32bit) - // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (zero-extend to 64 bits) - assert_different_registers(Rs, Rtmp1, Rtmp2); - assert_different_registers(Rd, Rtmp1, Rtmp2); - grev16wu(Rd, Rs, Rtmp1, Rtmp2); - slli(Rtmp2, Rd, 48); - srli(Rtmp2, Rtmp2, 32); - srli(Rd, Rd, 16); - orr(Rd, Rd, Rtmp2); +// reverse bytes in halfword in lower 16 bits and zero-extend +// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) +void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { + if (UseRVB) { + rev8(Rd, Rs); + srli(Rd, Rd, 48); + return; + } + assert_different_registers(Rs, tmp); + assert_different_registers(Rd, tmp); + srli(tmp, Rs, 8); + andi(tmp, tmp, 0xFF); + andi(Rd, Rs, 0xFF); + slli(Rd, Rd, 8); + orr(Rd, Rd, tmp); } -void MacroAssembler::grev16(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) { - // Reverse bytes in half-word (64bit) - // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] - assert_different_registers(Rs, Rtmp1, Rtmp2); - assert_different_registers(Rd, Rtmp1, Rtmp2); - reverseb16(Rd, Rs, Rtmp1, Rtmp2); +// reverse bytes in halfwords in lower 32 bits and zero-extend +// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) +void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { + if (UseRVB) { + rev8(Rd, Rs); + rori(Rd, Rd, 32); + roriw(Rd, Rd, 16); + zext_w(Rd, Rd); + return; + } + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); + srli(tmp2, Rs, 16); + revb_h_h_u(tmp2, tmp2, tmp1); + revb_h_h_u(Rd, Rs, tmp1); + slli(tmp2, tmp2, 16); + orr(Rd, Rd, tmp2); +} + +// This method is only used for revb_h +// Rd = Rs[47:0] Rs[55:48] Rs[63:56] +void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1); + srli(tmp1, Rs, 48); + andi(tmp2, tmp1, 0xFF); + slli(tmp2, tmp2, 8); + srli(tmp1, tmp1, 8); + orr(tmp1, tmp1, tmp2); + slli(Rd, Rs, 16); + orr(Rd, Rd, tmp1); +} + +// reverse bytes in each halfword +// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] +void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { + if (UseRVB) { + assert_different_registers(Rs, tmp1); + assert_different_registers(Rd, tmp1); + rev8(Rd, Rs); + zext_w(tmp1, Rd); + roriw(tmp1, tmp1, 16); + slli(tmp1, tmp1, 32); + srli(Rd, Rd, 32); + roriw(Rd, Rd, 16); + zext_w(Rd, Rd); + orr(Rd, Rd, tmp1); + return; + } + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); + revb_h_helper(Rd, Rs, tmp1, tmp2); for (int i = 0; i < 3; ++i) { - reverseb16(Rd, Rd, Rtmp1, Rtmp2); + revb_h_helper(Rd, Rd, tmp1, tmp2); } } -void MacroAssembler::grev32(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) { - // Reverse bytes in word (64bit) - // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] - assert_different_registers(Rs, Rtmp1, Rtmp2); - assert_different_registers(Rd, Rtmp1, Rtmp2); - grev16(Rd, Rs, Rtmp1, Rtmp2); - reverseh32(Rd, Rd, Rtmp1, Rtmp2); - reverseh32(Rd, Rd, Rtmp1, Rtmp2); +// reverse bytes in each word +// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] +void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { + if (UseRVB) { + rev8(Rd, Rs); + rori(Rd, Rd, 32); + return; + } + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); + revb(Rd, Rs, tmp1, tmp2); + ror_imm(Rd, Rd, 32); +} + +// reverse bytes in doubleword +// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] +void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { + if (UseRVB) { + rev8(Rd, Rs); + return; + } + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); + andi(tmp1, Rs, 0xFF); + slli(tmp1, tmp1, 8); + for (int step = 8; step < 56; step += 8) { + srli(tmp2, Rs, step); + andi(tmp2, tmp2, 0xFF); + orr(tmp1, tmp1, tmp2); + slli(tmp1, tmp1, 8); + } + srli(Rd, Rs, 56); + andi(Rd, Rd, 0xFF); + orr(Rd, tmp1, Rd); } -void MacroAssembler::grev(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) { - // Reverse bytes in double-word (64bit) - // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] - assert_different_registers(Rs, Rtmp1, Rtmp2); - assert_different_registers(Rd, Rtmp1, Rtmp2); - grev32(Rd, Rs, Rtmp1, Rtmp2); - slli(Rtmp2, Rd, 32); - srli(Rd, Rd, 32); - orr(Rd, Rd, Rtmp2); +// rotate right with shift bits +void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) +{ + if (UseRVB) { + rori(dst, src, shift); + return; + } + + assert_different_registers(dst, tmp); + assert_different_registers(src, tmp); + assert(shift < 64, "shift amount must be < 64"); + slli(tmp, src, 64 - shift); + srli(dst, src, shift); + orr(dst, dst, tmp); } void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { @@ -1838,7 +1791,7 @@ void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, R if (src.is_register()) { orr(tmp1, tmp1, src.as_register()); } else { - if(is_imm_in_range(src.as_constant(), 12, 0)) { + if (is_imm_in_range(src.as_constant(), 12, 0)) { ori(tmp1, tmp1, src.as_constant()); } else { assert_different_registers(tmp1, tmp2); @@ -1856,11 +1809,6 @@ void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, slli(tmp, tmp, Universe::narrow_klass_shift()); beq(trial_klass, tmp, L); return; - } else if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 - && Universe::narrow_klass_shift() == 0) { - // Only the bottom 32 bits matter - beq(trial_klass, tmp, L); - return; } decode_klass_not_null(tmp); } else { @@ -1869,10 +1817,10 @@ void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, beq(trial_klass, tmp, L); } -// Move an oop into a register. immediate is true if we want -// immediate instructions and nmethod entry barriers are not enabled. -// i.e. we are not going to patch this instruction while the code is being -// executed by another thread. +// Move an oop into a register. immediate is true if we want +// immediate instructions, i.e. we are not going to patch this +// instruction while the code is being executed by another thread. In +// that case we can use move immediates rather than the constant pool. void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { int oop_index; if (obj == NULL) { @@ -2077,16 +2025,16 @@ void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register assert_different_registers(src, xbase); li(xbase, (uintptr_t)Universe::narrow_klass_base()); + if (Universe::narrow_klass_shift() != 0) { assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); assert_different_registers(t0, xbase); - slli(t0, src, LogKlassAlignmentInBytes); - add(dst, xbase, t0); + shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); } else { add(dst, xbase, src); } - if (xbase == xheapbase) { reinit_heapbase(); } + if (xbase == xheapbase) { reinit_heapbase(); } } void MacroAssembler::encode_klass_not_null(Register r) { @@ -2108,7 +2056,7 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 && Universe::narrow_klass_shift() == 0) { - zero_ext(dst, src, 32); // clear upper 32 bits + zero_extend(dst, src, 32); return; } @@ -2160,8 +2108,7 @@ void MacroAssembler::decode_heap_oop(Register d, Register s) { Label done; mv(d, s); beqz(s, done); - slli(d, s, LogMinObjAlignmentInBytes); - add(d, xheapbase, d); + shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); bind(done); } verify_oop(d, "broken oop in decode_heap_oop"); @@ -2243,11 +2190,11 @@ void MacroAssembler::lookup_interface_method(Register recv_klass, Register intf_klass, RegisterOrConstant itable_index, Register method_result, - Register scan_temp, + Register scan_tmp, Label& L_no_such_interface, bool return_method) { - assert_different_registers(recv_klass, intf_klass, scan_temp); - assert_different_registers(method_result, intf_klass, scan_temp); + assert_different_registers(recv_klass, intf_klass, scan_tmp); + assert_different_registers(method_result, intf_klass, scan_tmp); assert(recv_klass != method_result || !return_method, "recv_klass can be destroyed when mehtid isn't needed"); assert(itable_index.is_constant() || itable_index.as_register() == method_result, @@ -2260,12 +2207,11 @@ void MacroAssembler::lookup_interface_method(Register recv_klass, int vte_size = vtableEntry::size_in_bytes(); assert(vte_size == wordSize, "else adjust times_vte_scale"); - lwu(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); + lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); // %%% Could store the aligned, prescaled offset in the klassoop. - slli(scan_temp, scan_temp, 3); - add(scan_temp, recv_klass, scan_temp); - add(scan_temp, scan_temp, vtable_base); + shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); + add(scan_tmp, scan_tmp, vtable_base); if (return_method) { // Adjust recv_klass by scaled itable_index, so we can free itable_index. @@ -2283,23 +2229,23 @@ void MacroAssembler::lookup_interface_method(Register recv_klass, Label search, found_method; - ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); + ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); beq(intf_klass, method_result, found_method); bind(search); // Check that the previous entry is non-null. A null entry means that // the receiver class doens't implement the interface, and wasn't the // same as when the caller was compiled. beqz(method_result, L_no_such_interface, /* is_far */ true); - addi(scan_temp, scan_temp, scan_step); - ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); + addi(scan_tmp, scan_tmp, scan_step); + ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); bne(intf_klass, method_result, search); bind(found_method); // Got a hit. if (return_method) { - lwu(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); - add(method_result, recv_klass, scan_temp); + lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes())); + add(method_result, recv_klass, scan_tmp); ld(method_result, Address(method_result)); } } @@ -2314,8 +2260,7 @@ void MacroAssembler::lookup_virtual_method(Register recv_klass, int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes(); if (vtable_index.is_register()) { - slli(method_result, vtable_index.as_register(), LogBytesPerWord); - add(method_result, recv_klass, method_result); + shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); ld(method_result, Address(method_result, vtable_offset_in_bytes)); } else { vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; @@ -2324,8 +2269,6 @@ void MacroAssembler::lookup_virtual_method(Register recv_klass, } void MacroAssembler::membar(uint32_t order_constraint) { - if (!os::is_MP()) { return; } - address prev = pc() - NativeMembar::instruction_size; address last = code()->last_insn(); @@ -2363,29 +2306,14 @@ Address MacroAssembler::form_address(Register Rd, Register base, long byte_offse void MacroAssembler::check_klass_subtype(Register sub_klass, Register super_klass, - Register temp_reg, + Register tmp_reg, Label& L_success) { Label L_failure; - check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); - check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); + check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL); + check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL); bind(L_failure); } -// Write serialization page so VM thread can do a pseudo remote membar. -// We use the current thread pointer to calculate a thread specific -// offset to write to within the page. This minimizes bus traffic -// due to cache line collision. -void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) { - srli(tmp2, thread, os::get_serialize_page_shift_count()); - - int mask = os::vm_page_size() - sizeof(int); - andi(tmp2, tmp2, mask, tmp1); - - add(tmp1, tmp2, (intptr_t)os::get_memory_serialize_page()); - membar(MacroAssembler::AnyAny); - sw(zr, Address(tmp1)); -} - void MacroAssembler::safepoint_poll(Label& slow_path) { if (SafepointMechanism::uses_thread_local_poll()) { ld(t1, Address(xthread, Thread::polling_page_offset())); @@ -2400,30 +2328,6 @@ void MacroAssembler::safepoint_poll(Label& slow_path) { } } -// Just like safepoint_poll, but use an acquiring load for thread- -// local polling. -// -// We need an acquire here to ensure that any subsequent load of the -// global SafepointSynchronize::_state flag is ordered after this load -// of the local Thread::_polling page. We don't want this poll to -// return false (i.e. not safepointing) and a later poll of the global -// SafepointSynchronize::_state spuriously to return true. -// -// This is to avoid a race when we're in a native->Java transition -// racing the code which wakes up from a safepoint. -// -void MacroAssembler::safepoint_poll_acquire(Label& slow_path) { - if (SafepointMechanism::uses_thread_local_poll()) { - membar(MacroAssembler::AnyAny); - ld(t1, Address(xthread, Thread::polling_page_offset())); - membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); - andi(t0, t1, SafepointMechanism::poll_bit()); - bnez(t0, slow_path); - } else { - safepoint_poll(slow_path); - } -} - void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail) { // oldv holds comparison value @@ -2431,17 +2335,16 @@ void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Reg // addr identifies memory word to compare against/update Label retry_load, nope; bind(retry_load); - // flush and load exclusive from the memory location - // and fail if it is not what we expect + // Load reserved from the memory location lr_d(tmp, addr, Assembler::aqrl); + // Fail and exit if it is not what we expect bne(tmp, oldv, nope); - // if we store+flush with no intervening write tmp wil be zero + // If the store conditional succeeds, tmp will be zero sc_d(tmp, newv, addr, Assembler::rl); beqz(tmp, succeed); - // retry so we only ever return after a load fails to compare - // ensures we don't return a stale value after a failed write. + // Retry only when the store conditional failed j(retry_load); - // if the memory word differs we return it in oldv and signal a fail + bind(nope); membar(AnyAny); mv(oldv, tmp); @@ -2468,7 +2371,7 @@ void MacroAssembler::load_reserved(Register addr, break; case uint32: lr_w(t0, addr, acquire); - clear_upper_bits(t0, 32); + zero_extend(t0, t0, 32); break; default: ShouldNotReachHere(); @@ -2509,8 +2412,9 @@ void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expecte if (size == int8) { addi(mask, zr, 0xff); } else { + // size == int16 case addi(mask, zr, -1); - zero_ext(mask, mask, registerSize - 16); + zero_extend(mask, mask, 16); } sll(mask, mask, shift); @@ -2563,9 +2467,10 @@ void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, srl(result, tmp, shift); if (size == int8) { - sign_ext(result, result, registerSize - 8); - } else if (size == int16) { - sign_ext(result, result, registerSize - 16); + sign_extend(result, result, 8); + } else { + // size == int16 case + sign_extend(result, result, 16); } } } @@ -2695,7 +2600,7 @@ ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) #define ATOMIC_XCHGU(OP1, OP2) \ void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ atomic_##OP2(prev, newv, addr); \ - clear_upper_bits(prev, 32); \ + zero_extend(prev, prev, 32); \ return; \ } @@ -2704,228 +2609,6 @@ ATOMIC_XCHGU(xchgalwu, xchgalw) #undef ATOMIC_XCHGU -void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done, Register flag) { - assert(UseBiasedLocking, "why call this otherwise?"); - - // Check for biased locking unlock case, which is a no-op - // Note: we do not have to check the thread ID for two reasons. - // First, the interpreter checks for IllegalMonitorStateException at - // a higher level. Second, if the bias was revoked while we held the - // lock, the object could not be rebiased toward another thread, so - // the bias bit would be clear. - ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); // 1 << 3 - sub(temp_reg, temp_reg, (u1)markOopDesc::biased_lock_pattern); - if (flag->is_valid()) { mv(flag, temp_reg); } - beqz(temp_reg, done); -} - -void MacroAssembler::load_prototype_header(Register dst, Register src) { - load_klass(dst, src); - ld(dst, Address(dst, Klass::prototype_header_offset())); -} - -int MacroAssembler::biased_locking_enter(Register lock_reg, - Register obj_reg, - Register swap_reg, - Register tmp_reg, - bool swap_reg_contains_mark, - Label& done, - Label* slow_case, - BiasedLockingCounters* counters, - Register flag) { - assert(UseBiasedLocking, "why call this otherwise?"); - assert_different_registers(lock_reg, obj_reg, swap_reg); - - if (PrintBiasedLockingStatistics && counters == NULL) { - counters = BiasedLocking::counters(); - } - - assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0, flag); - assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); - Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); - - // Biased locking - // See whether the lock is currently biased toward our thread and - // whether the epoch is still valid - // Note that the runtime guarantees sufficient alignment of JavaThread - // pointers to allow age to be placed into low bits - // First check to see whether biasing is even enabled for this object - Label cas_label; - int null_check_offset = -1; - if (!swap_reg_contains_mark) { - null_check_offset = offset(); - ld(swap_reg, mark_addr); - } - andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place); - xori(t0, tmp_reg, (u1)markOopDesc::biased_lock_pattern); - bnez(t0, cas_label); // don't care flag unless jumping to done - // The bias pattern is present in the object's header. Need to check - // whether the bias owner and the epoch are both still current. - load_prototype_header(tmp_reg, obj_reg); - orr(tmp_reg, tmp_reg, xthread); - xorr(tmp_reg, swap_reg, tmp_reg); - andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place)); - if (flag->is_valid()) { - mv(flag, tmp_reg); - } - - if (counters != NULL) { - Label around; - bnez(tmp_reg, around); - atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0); - j(done); - bind(around); - } else { - beqz(tmp_reg, done); - } - - Label try_revoke_bias; - Label try_rebias; - - // At this point we know that the header has the bias pattern and - // that we are not the bias owner in the current epoch. We need to - // figure out more details about the state of the header in order to - // know what operations can be legally performed on the object's - // header. - - // If the low three bits in the xor result aren't clear, that means - // the prototype header is no longer biased and we have to revoke - // the bias on this object. - andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place); - bnez(t0, try_revoke_bias); - - // Biasing is still enabled for this data type. See whether the - // epoch of the current bias is still valid, meaning that the epoch - // bits of the mark word are equal to the epoch bits of the - // prototype header. (Note that the prototype header's epoch bits - // only change at a safepoint.) If not, attempt to rebias the object - // toward the current thread. Note that we must be absolutely sure - // that the current epoch is invalid in order to do this because - // otherwise the manipulations it performs on the mark word are - // illegal. - andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place); - bnez(t0, try_rebias); - - // The epoch of the current bias is still valid but we know nothing - // about the owner; it might be set or it might be clear. Try to - // acquire the bias of the object using an atomic operation. If this - // fails we will go in to the runtime to revoke the object's bias. - // Note that we first construct the presumed unbiased header so we - // don't accidentally blow away another thread's valid bias. - { - Label cas_success; - Label counter; - li(t0, (int64_t)(markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); - andr(swap_reg, swap_reg, t0); - orr(tmp_reg, swap_reg, xthread); - cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); - // cas failed here if slow_cass == NULL - if (flag->is_valid()) { - li(flag, 1); - j(counter); - } - - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - bind(cas_success); - if (flag->is_valid()) { - li(flag, 0); - bind(counter); - } - - if (counters != NULL) { - atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), - tmp_reg, t0); - } - } - j(done); - - bind(try_rebias); - // At this point we know the epoch has expired, meaning that the - // current "bias owner", if any, is actually invalid. Under these - // circumstances _only_, we are allowed to use the current header's - // value as the comparison value when doing the cas to acquire the - // bias in the current epoch. In other words, we allow transfer of - // the bias from one thread to another directly in this situation. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - { - Label cas_success; - Label counter; - load_prototype_header(tmp_reg, obj_reg); - orr(tmp_reg, xthread, tmp_reg); - cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); - // cas failed here if slow_cass == NULL - if (flag->is_valid()) { - li(flag, 1); - j(counter); - } - - // If the biasing toward our thread failed, then another thread - // succeeded in biasing it toward itself and we need to revoke that - // bias. The revocation will occur in the runtime in the slow case. - bind(cas_success); - if (flag->is_valid()) { - li(flag, 0); - bind(counter); - } - - if (counters != NULL) { - atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), - tmp_reg, t0); - } - } - j(done); - - // don't care flag unless jumping to done - bind(try_revoke_bias); - // The prototype mark in the klass doesn't have the bias bit set any - // more, indicating that objects of this data type are not supposed - // to be biased any more. We are going to try to reset the mark of - // this object to the prototype value and fall through to the - // CAS-based locking scheme. Note that if our CAS fails, it means - // that another thread raced us for the privilege of revoking the - // bias of this particular object, so it's okay to continue in the - // normal locking code. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - { - Label cas_success, nope; - load_prototype_header(tmp_reg, obj_reg); - cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope); - bind(cas_success); - - // Fall through to the normal CAS-based lock, because no matter what - // the result of the above CAS, some thread must have succeeded in - // removing the bias bit from the object's header. - if (counters != NULL) { - atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, - t0); - } - bind(nope); - } - - bind(cas_label); - - return null_check_offset; -} - -void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { - Label retry_load; - bind(retry_load); - // flush and load exclusive from the memory location - lr_w(tmp, counter_addr); - addw(tmp, tmp, 1); - // if we store+flush with no intervening write tmp wil be zero - sc_w(tmp, tmp, counter_addr); - bnez(tmp, retry_load); -} - void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) { assert(ReservedCodeCacheSize < 4*G, "branch out of range"); assert(CodeCache::find_blob(entry.target()) != NULL, @@ -2962,15 +2645,15 @@ void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) { void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, Register super_klass, - Register temp_reg, + Register tmp_reg, Label* L_success, Label* L_failure, Label* L_slow_path, Register super_check_offset) { - assert_different_registers(sub_klass, super_klass, temp_reg); + assert_different_registers(sub_klass, super_klass, tmp_reg); bool must_load_sco = (super_check_offset == noreg); if (must_load_sco) { - assert(temp_reg != noreg, "supply either a temp or a register offset"); + assert(tmp_reg != noreg, "supply either a temp or a register offset"); } else { assert_different_registers(sub_klass, super_klass, super_check_offset); } @@ -3002,8 +2685,8 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, // Check the supertype display: if (must_load_sco) { - lwu(temp_reg, super_check_offset_addr); - super_check_offset = temp_reg; + lwu(tmp_reg, super_check_offset_addr); + super_check_offset = tmp_reg; } add(t0, sub_klass, super_check_offset); Address super_check_addr(t0); @@ -3034,15 +2717,15 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, #undef final_jmp } -// scans count pointer sized words at [addr] for occurence of value, +// Scans count pointer sized words at [addr] for occurence of value, // generic void MacroAssembler::repne_scan(Register addr, Register value, Register count, - Register temp) { + Register tmp) { Label Lloop, Lexit; beqz(count, Lexit); bind(Lloop); - ld(temp, addr); - beq(value, temp, Lexit); + ld(tmp, addr); + beq(value, tmp, Lexit); add(addr, addr, wordSize); sub(count, count, 1); bnez(count, Lloop); @@ -3051,15 +2734,15 @@ void MacroAssembler::repne_scan(Register addr, Register value, Register count, void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, Register super_klass, - Register temp_reg, - Register temp2_reg, + Register tmp1_reg, + Register tmp2_reg, Label* L_success, Label* L_failure) { - assert_different_registers(sub_klass, super_klass, temp_reg); - if (temp2_reg != noreg) { - assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, t0); + assert_different_registers(sub_klass, super_klass, tmp1_reg); + if (tmp2_reg != noreg) { + assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); } -#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) +#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) Label L_fallthrough; int label_nulls = 0; @@ -3068,7 +2751,7 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, assert(label_nulls <= 1, "at most one NULL in the batch"); - // a couple of usefule fields in sub_klass: + // A couple of usefule fields in sub_klass: int ss_offset = in_bytes(Klass::secondary_supers_offset()); int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); Address secondary_supers_addr(sub_klass, ss_offset); @@ -3126,7 +2809,7 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, // pop will restore x10, so we should use a temp register to keep its value mv(t1, x10); - // Unspill the temp. registers: + // Unspill the temp registers: pop_reg(pushed_registers, sp); bne(t1, t0, *L_failure); @@ -3159,11 +2842,11 @@ void MacroAssembler::tlab_allocate(Register obj, void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, - Register tmp1, + Register tmp, Label& slow_case, bool is_far) { BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, slow_case, is_far); + bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far); } @@ -3188,7 +2871,8 @@ void MacroAssembler::get_thread(Register thread) { } void MacroAssembler::load_byte_map_base(Register reg) { - jbyte *byte_map_base = ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); + jbyte *byte_map_base = + ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); li(reg, (uint64_t)byte_map_base); } @@ -3219,19 +2903,19 @@ void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &o } void MacroAssembler::build_frame(int framesize) { - assert(framesize >= 2 * wordSize, "framesize must include space for FP/LR"); - assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment"); + assert(framesize >= 2, "framesize must include space for FP/RA"); + assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); sub(sp, sp, framesize); sd(fp, Address(sp, framesize - 2 * wordSize)); - sd(lr, Address(sp, framesize - wordSize)); - if (PreserveFramePointer) { add(fp, sp, framesize - 2 * wordSize); } + sd(ra, Address(sp, framesize - wordSize)); + if (PreserveFramePointer) { add(fp, sp, framesize); } } void MacroAssembler::remove_frame(int framesize) { - assert(framesize >= 2 * wordSize, "framesize must include space for FP/LR"); - assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment"); + assert(framesize >= 2, "framesize must include space for FP/RA"); + assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); ld(fp, Address(sp, framesize - 2 * wordSize)); - ld(lr, Address(sp, framesize - wordSize)); + ld(ra, Address(sp, framesize - wordSize)); add(sp, sp, framesize); } @@ -3242,7 +2926,7 @@ void MacroAssembler::reserved_stack_check() { ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); bltu(sp, t0, no_reserved_zone_enabling); - enter(); // LR and FP are live. + enter(); // RA and FP are live. mv(c_rarg0, xthread); int32_t offset = 0; la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset); @@ -3260,52 +2944,267 @@ void MacroAssembler::reserved_stack_check() { bind(no_reserved_zone_enabling); } -// Move the address of the polling page into dest. -void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) { - if (SafepointMechanism::uses_thread_local_poll()) { - ld(dest, Address(xthread, Thread::polling_page_offset())); - } else { - unsigned long align = (uintptr_t)page & 0xfff; - assert(align == 0, "polling page must be page aligned"); - la_patchable(dest, Address(page, rtype), offset); - } +void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { + Label retry_load; + bind(retry_load); + // flush and load exclusive from the memory location + lr_w(tmp, counter_addr); + addw(tmp, tmp, 1); + // if we store+flush with no intervening write tmp wil be zero + sc_w(tmp, tmp, counter_addr); + bnez(tmp, retry_load); } -// Move the address of the polling page into dest. -address MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) { - int32_t offset = 0; - get_polling_page(dest, page, offset, rtype); - return read_polling_page(dest, offset, rtype); +void MacroAssembler::load_prototype_header(Register dst, Register src) { + load_klass(dst, src); + ld(dst, Address(dst, Klass::prototype_header_offset())); } -// Read the polling page. The address of the polling page must -// already be in r. -address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { - InstructionMark im(this); - code_section()->relocate(inst_mark(), rtype); - lwu(zr, Address(r, offset)); - return inst_mark(); -} +int MacroAssembler::biased_locking_enter(Register lock_reg, + Register obj_reg, + Register swap_reg, + Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, + Label* slow_case, + BiasedLockingCounters* counters, + Register flag) { + assert(UseBiasedLocking, "why call this otherwise?"); + assert_different_registers(lock_reg, obj_reg, swap_reg); -void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { -#ifdef ASSERT - { - ThreadInVMfromUnknown tiv; - assert (UseCompressedOops, "should only be used for compressed oops"); - assert (Universe::heap() != NULL, "java heap should be initialized"); - assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); - assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); - } -#endif - int oop_index = oop_recorder()->find_index(obj); - InstructionMark im(this); - RelocationHolder rspec = oop_Relocation::spec(oop_index); - code_section()->relocate(inst_mark(), rspec); - li32(dst, 0xDEADBEEF); - clear_upper_bits(dst, 32); // clear upper 32bit, do not sign extend. -} + if (PrintBiasedLockingStatistics && counters == NULL) + counters = BiasedLocking::counters(); -void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0); + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + int null_check_offset = -1; + if (!swap_reg_contains_mark) { + null_check_offset = offset(); + ld(swap_reg, mark_addr); + } + andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place); + li(t0, markOopDesc::biased_lock_pattern); + bne(t0, tmp_reg, cas_label); + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. + load_prototype_header(tmp_reg, obj_reg); + orr(tmp_reg, tmp_reg, xthread); + xorr(tmp_reg, swap_reg, tmp_reg); + andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place)); + if (flag->is_valid()) { + mv(flag, tmp_reg); + } + if (counters != NULL) { + Label around; + bnez(tmp_reg, around); + atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0); + j(done); + bind(around); + } else { + beqz(tmp_reg, done); + } + + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place); + bnez(t0, try_revoke_bias); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place); + bnez(t0, try_rebias); + + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + { + Label cas_success; + Label counter; + mv(t0, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); + andr(swap_reg, swap_reg, t0); + orr(tmp_reg, swap_reg, xthread); + cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); + // cas failed here if slow_cass == NULL + if (flag->is_valid()) { + mv(flag, 1); + j(counter); + } + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + bind(cas_success); + if (flag->is_valid()) { + mv(flag, 0); + bind(counter); + } + if (counters != NULL) { + atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), + tmp_reg, t0); + } + } + j(done); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + { + Label cas_success; + Label counter; + load_prototype_header(tmp_reg, obj_reg); + orr(tmp_reg, xthread, tmp_reg); + cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); + // cas failed here if slow_cass == NULL + if (flag->is_valid()) { + mv(flag, 1); + j(counter); + } + + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + bind(cas_success); + if (flag->is_valid()) { + mv(flag, 0); + bind(counter); + } + if (counters != NULL) { + atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), + tmp_reg, t0); + } + } + j(done); + + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + { + Label cas_success, nope; + load_prototype_header(tmp_reg, obj_reg); + cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope); + bind(cas_success); + + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (counters != NULL) { + atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, + t0); + } + bind(nope); + } + + bind(cas_label); + + return null_check_offset; +} + +void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) { + assert(UseBiasedLocking, "why call this otherwise?"); + + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); + sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern); + if (flag->is_valid()) { mv(flag, tmp_reg); } + beqz(tmp_reg, done); +} + +// Move the address of the polling page into dest. +void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) { + if (SafepointMechanism::uses_thread_local_poll()) { + ld(dest, Address(xthread, Thread::polling_page_offset())); + } else { + uint64_t align = (uint64_t)page & 0xfff; + assert(align == 0, "polling page must be page aligned"); + la_patchable(dest, Address(page, rtype), offset); + } +} + +// Read the polling page. The address of the polling page must +// already be in r. +void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) { + int32_t offset = 0; + get_polling_page(dest, page, offset, rtype); + read_polling_page(dest, offset, rtype); +} + +// Read the polling page. The address of the polling page must +// already be in r. +void MacroAssembler::read_polling_page(Register dest, int32_t offset, relocInfo::relocType rtype) { + code_section()->relocate(pc(), rtype); + lwu(zr, Address(dest, offset)); +} + +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { +#ifdef ASSERT + { + ThreadInVMfromUnknown tiv; + assert (UseCompressedOops, "should only be used for compressed oops"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); + } +#endif + int oop_index = oop_recorder()->find_index(obj); + InstructionMark im(this); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + code_section()->relocate(inst_mark(), rspec); + li32(dst, 0xDEADBEEF); + zero_extend(dst, dst, 32); +} + +void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { assert (UseCompressedClassPointers, "should only be used for compressed headers"); assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); int index = oop_recorder()->find_index(k); @@ -3316,7 +3215,7 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { code_section()->relocate(inst_mark(), rspec); narrowKlass nk = Klass::encode_klass(k); li32(dst, nk); - clear_upper_bits(dst, 32); // clear upper 32bit, do not sign extend. + zero_extend(dst, dst, 32); } // Maybe emit a call via a trampoline. If the code cache is small @@ -3376,7 +3275,7 @@ address MacroAssembler::ic_call(address entry, jint method_index) { // // Related trampoline stub for this call site in the stub section: // load the call target from the constant pool -// branch (LR still points to the call site above) +// branch (RA still points to the call site above) address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, address dest) { @@ -3392,7 +3291,8 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, // make sure 4 byte aligned here, so that the destination address would be // 8 byte aligned after 3 intructions - while (offset() % wordSize == 0) { nop(); } + // when we reach here we may get a 2-byte alignment so need to align it + align(wordSize, NativeCallTrampolineStub::data_offset); relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + insts_call_instruction_offset)); @@ -3405,7 +3305,9 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, ld(t0, target); // auipc + ld jr(t0); // jalr bind(target); - assert(offset() % wordSize == 0, "address loaded by ld must be 8-byte aligned under riscv64"); + assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, + "should be"); + assert(offset() % wordSize == 0, "bad alignment"); emit_int64((intptr_t)dest); const address stub_start_addr = addr_at(stub_start_offset); @@ -3452,513 +3354,615 @@ void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { beq(src1, t0, equal); } -void MacroAssembler::load_method_holder(Register holder, Register method) { - ld(holder, Address(method, Method::const_offset())); // ConstMethod* - ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* - ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* -} - -void MacroAssembler::oop_beq(Register obj1, Register obj2, Label& L_equal, bool is_far) { - beq(obj1, obj2, L_equal, is_far); +// string indexof +// compute index by trailing zeros +void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, + Register match_mask, Register result, + Register ch2, Register tmp, + bool haystack_isL) +{ + int haystack_chr_shift = haystack_isL ? 0 : 1; + srl(match_mask, match_mask, trailing_zeros); + srli(match_mask, match_mask, 1); + srli(tmp, trailing_zeros, LogBitsPerByte); + if (!haystack_isL) andi(tmp, tmp, 0xE); + add(haystack, haystack, tmp); + ld(ch2, Address(haystack)); + if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); + add(result, result, tmp); } -void MacroAssembler::oop_bne(Register obj1, Register obj2, Label& L_nequal, bool is_far) { - bne(obj1, obj2, L_nequal, is_far); +// string indexof +// Find pattern element in src, compute match mask, +// only the first occurrence of 0x80/0x8000 at low bits is the valid match index +// match mask patterns and corresponding indices would be like: +// - 0x8080808080808080 (Latin1) +// - 7 6 5 4 3 2 1 0 (match index) +// - 0x8000800080008000 (UTF16) +// - 3 2 1 0 (match index) +void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, + Register mask1, Register mask2) +{ + xorr(src, pattern, src); + sub(match_mask, src, mask1); + orr(src, src, mask2); + notr(src, src); + andr(match_mask, match_mask, src); } #ifdef COMPILER2 -// TODO: wind: THIS FUNCTION IS TOTALLY DIFFERENT FROM JDK11 -// generate_large_array_equals() -//  git difftool ~/jvm/dragonwell11/src/hotspot/cpu/riscv64/stubGenerator_riscv64.cpp ~/jvm/dragonwell11-latest/src/hotspot/cpu/riscv64/stubGenerator_riscv64.cpp -void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, - Register tmp4, Register tmp5, Register tmp6, Register result, - Register cnt1, int elem_size) { - Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; - Register tmp1 = t0; - Register tmp2 = t1; - Register cnt2 = tmp2; // cnt2 only used in array length compare - Register elem_per_word = tmp6; - int log_elem_size = exact_log2(elem_size); - int length_offset = arrayOopDesc::length_offset_in_bytes(); - int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); +// Code for BigInteger::mulAdd instrinsic +// out = x10 +// in = x11 +// offset = x12 (already out.length-offset) +// len = x13 +// k = x14 +// tmp = x28 +// +// pseudo code from java implementation: +// long kLong = k & LONG_MASK; +// carry = 0; +// offset = out.length-offset - 1; +// for (int j = len - 1; j >= 0; j--) { +// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; +// out[offset--] = (int)product; +// carry = product >>> 32; +// } +// return (int)carry; +void MacroAssembler::mul_add(Register out, Register in, Register offset, + Register len, Register k, Register tmp) { + Label L_tail_loop, L_unroll, L_end; + mv(tmp, out); + mv(out, zr); + blez(len, L_end); + zero_extend(k, k, 32); + slliw(t0, offset, LogBytesPerInt); + add(offset, tmp, t0); + slliw(t0, len, LogBytesPerInt); + add(in, in, t0); + + const int unroll = 8; + li(tmp, unroll); + blt(len, tmp, L_tail_loop); + bind(L_unroll); + for (int i = 0; i < unroll; i++) { + sub(in, in, BytesPerInt); + lwu(t0, Address(in, 0)); + mul(t1, t0, k); + add(t0, t1, out); + sub(offset, offset, BytesPerInt); + lwu(t1, Address(offset, 0)); + add(t0, t0, t1); + sw(t0, Address(offset, 0)); + srli(out, t0, 32); + } + subw(len, len, tmp); + bge(len, tmp, L_unroll); + + bind(L_tail_loop); + blez(len, L_end); + sub(in, in, BytesPerInt); + lwu(t0, Address(in, 0)); + mul(t1, t0, k); + add(t0, t1, out); + sub(offset, offset, BytesPerInt); + lwu(t1, Address(offset, 0)); + add(t0, t0, t1); + sw(t0, Address(offset, 0)); + srli(out, t0, 32); + subw(len, len, 1); + j(L_tail_loop); + + bind(L_end); +} + +// add two unsigned input and output carry +void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) +{ + assert_different_registers(dst, carry); + assert_different_registers(dst, src2); + add(dst, src1, src2); + sltu(carry, dst, src2); +} - assert(elem_size == 1 || elem_size == 2, "must be char or byte"); - assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); - li(elem_per_word, wordSize / elem_size); +// add two input with carry +void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) +{ + assert_different_registers(dst, carry); + add(dst, src1, src2); + add(dst, dst, carry); +} - BLOCK_COMMENT("arrays_equals {"); +// add two unsigned input with carry and output carry +void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) +{ + assert_different_registers(dst, src2); + adc(dst, src1, src2, carry); + sltu(carry, dst, src2); +} - // if (a1 == a2), return true - oop_beq(a1, a2, SAME); +void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, + Register src1, Register src2, Register carry) +{ + cad(dest_lo, dest_lo, src1, carry); + add(dest_hi, dest_hi, carry); + cad(dest_lo, dest_lo, src2, carry); + add(final_dest_hi, dest_hi, carry); +} - mv(result, false); - beqz(a1, DONE); - beqz(a2, DONE); - lwu(cnt1, Address(a1, length_offset)); - lwu(cnt2, Address(a2, length_offset)); - bne(cnt2, cnt1, DONE); - beqz(cnt1, SAME); +/** + * Multiply 32 bit by 32 bit first loop. + */ +void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx) +{ + // jlong carry, x[], y[], z[]; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { + // long product = y[idx] * x[xstart] + carry; + // z[kdx] = (int)product; + // carry = product >>> 32; + // } + // z[xstart] = (int)carry; + + Label L_first_loop, L_first_loop_exit; + blez(idx, L_first_loop_exit); + + shadd(t0, xstart, x, t0, LogBytesPerInt); + lwu(x_xstart, Address(t0, 0)); + + bind(L_first_loop); + subw(idx, idx, 1); + shadd(t0, idx, y, t0, LogBytesPerInt); + lwu(y_idx, Address(t0, 0)); + mul(product, x_xstart, y_idx); + add(product, product, carry); + srli(carry, product, 32); + subw(kdx, kdx, 1); + shadd(t0, kdx, z, t0, LogBytesPerInt); + sw(product, Address(t0, 0)); + bgtz(idx, L_first_loop); + + bind(L_first_loop_exit); +} + +/** + * Multiply 64 bit by 64 bit first loop. + */ +void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx) +{ + // + // jlong carry, x[], y[], z[]; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { + // huge_128 product = y[idx] * x[xstart] + carry; + // z[kdx] = (jlong)product; + // carry = (jlong)(product >>> 64); + // } + // z[xstart] = carry; + // - slli(tmp5, cnt1, 3 + log_elem_size); - sub(tmp5, zr, tmp5); - add(a1, a1, base_offset); - add(a2, a2, base_offset); - ld(tmp3, Address(a1, 0)); - ld(tmp4, Address(a2, 0)); - ble(cnt1, elem_per_word, SHORT); // short or same + Label L_first_loop, L_first_loop_exit; + Label L_one_x, L_one_y, L_multiply; - // Main 16 byte comparison loop with 2 exits - bind(NEXT_DWORD); { - ld(tmp1, Address(a1, wordSize)); - ld(tmp2, Address(a2, wordSize)); - sub(cnt1, cnt1, 2 * wordSize / elem_size); - blez(cnt1, TAIL); - bne(tmp3, tmp4, DONE); - ld(tmp3, Address(a1, 2 * wordSize)); - ld(tmp4, Address(a2, 2 * wordSize)); - add(a1, a1, 2 * wordSize); - add(a2, a2, 2 * wordSize); - ble(cnt1, elem_per_word, TAIL2); - } beq(tmp1, tmp2, NEXT_DWORD); - j(DONE); + subw(xstart, xstart, 1); + bltz(xstart, L_one_x); - bind(TAIL); - xorr(tmp4, tmp3, tmp4); - xorr(tmp2, tmp1, tmp2); - sll(tmp2, tmp2, tmp5); - orr(tmp5, tmp4, tmp2); - j(IS_TMP5_ZR); + shadd(t0, xstart, x, t0, LogBytesPerInt); + ld(x_xstart, Address(t0, 0)); + ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian - bind(TAIL2); - bne(tmp1, tmp2, DONE); + bind(L_first_loop); + subw(idx, idx, 1); + bltz(idx, L_first_loop_exit); + subw(idx, idx, 1); + bltz(idx, L_one_y); - bind(SHORT); - xorr(tmp4, tmp3, tmp4); - sll(tmp5, tmp4, tmp5); + shadd(t0, idx, y, t0, LogBytesPerInt); + ld(y_idx, Address(t0, 0)); + ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian + bind(L_multiply); - bind(IS_TMP5_ZR); - bnez(tmp5, DONE); + mulhu(t0, x_xstart, y_idx); + mul(product, x_xstart, y_idx); + cad(product, product, carry, t1); + adc(carry, t0, zr, t1); - bind(SAME); - mv(result, true); - // That's it. - bind(DONE); + subw(kdx, kdx, 2); + ror_imm(product, product, 32); // back to big-endian + shadd(t0, kdx, z, t0, LogBytesPerInt); + sd(product, Address(t0, 0)); - BLOCK_COMMENT("} array_equals"); -} + j(L_first_loop); -// Compare Strings + bind(L_one_y); + lwu(y_idx, Address(y, 0)); + j(L_multiply); -// For Strings we're passed the address of the first characters in a1 -// and a2 and the length in cnt1. -// elem_size is the element size in bytes: either 1 or 2. -// All comparisons (including the final one, which may overlap) are -// performed 8 bytes at a time. + bind(L_one_x); + lwu(x_xstart, Address(x, 0)); + j(L_first_loop); -void MacroAssembler::string_equals(Register a1, Register a2, - Register result, Register cnt1, int elem_size) + bind(L_first_loop_exit); +} + +/** + * Multiply 128 bit by 128 bit. Unrolled inner loop. + * + */ +void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, + Register carry, Register carry2, + Register idx, Register jdx, + Register yz_idx1, Register yz_idx2, + Register tmp, Register tmp3, Register tmp4, + Register tmp6, Register product_hi) { - Label SAME, DONE, SHORT, NEXT_WORD; - Register tmp1 = t0; - Register tmp2 = t1; + // jlong carry, x[], y[], z[]; + // int kdx = xstart+1; + // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop + // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; + // jlong carry2 = (jlong)(tmp3 >>> 64); + // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; + // carry = (jlong)(tmp4 >>> 64); + // z[kdx+idx+1] = (jlong)tmp3; + // z[kdx+idx] = (jlong)tmp4; + // } + // idx += 2; + // if (idx > 0) { + // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; + // z[kdx+idx] = (jlong)yz_idx1; + // carry = (jlong)(yz_idx1 >>> 64); + // } + // - assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); - assert_different_registers(a1, a2, result, cnt1, t0, t1); + Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; - BLOCK_COMMENT("string_equals {"); + srliw(jdx, idx, 2); - beqz(cnt1, SAME); - mv(result, false); + bind(L_third_loop); - // Check for short strings, i.e. smaller than wordSize. - sub(cnt1, cnt1, wordSize); - blez(cnt1, SHORT); + subw(jdx, jdx, 1); + bltz(jdx, L_third_loop_exit); + subw(idx, idx, 4); - // Main 8 byte comparison loop. - bind(NEXT_WORD); { - ld(tmp1, Address(a1, 0)); - add(a1, a1, wordSize); - ld(tmp2, Address(a2, 0)); - add(a2, a2, wordSize); - sub(cnt1, cnt1, wordSize); - bne(tmp1, tmp2, DONE); - } bgtz(cnt1, NEXT_WORD); + shadd(t0, idx, y, t0, LogBytesPerInt); + ld(yz_idx2, Address(t0, 0)); + ld(yz_idx1, Address(t0, wordSize)); - if (!AvoidUnalignedAccesses) { - // Last longword. In the case where length == 4 we compare the - // same longword twice, but that's still faster than another - // conditional branch. - // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when - // length == 4. - add(tmp1, a1, cnt1); - ld(tmp1, Address(tmp1, 0)); - add(tmp2, a2, cnt1); - ld(tmp2, Address(tmp2, 0)); - bne(tmp1, tmp2, DONE); - j(SAME); - } + shadd(tmp6, idx, z, t0, LogBytesPerInt); - bind(SHORT); - ld(tmp1, Address(a1)); - ld(tmp2, Address(a2)); - xorr(tmp1, tmp1, tmp2); - neg(cnt1, cnt1); - slli(cnt1, cnt1, LogBitsPerByte); - sll(tmp1, tmp1, cnt1); - bnez(tmp1, DONE); + ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian + ror_imm(yz_idx2, yz_idx2, 32); - // Arrays are equal. - bind(SAME); - mv(result, true); + ld(t1, Address(tmp6, 0)); + ld(t0, Address(tmp6, wordSize)); - // That's it. - bind(DONE); - BLOCK_COMMENT("} string_equals"); -} + mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 + mulhu(tmp4, product_hi, yz_idx1); -typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); + ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian + ror_imm(t1, t1, 32, tmp); -// Compare strings. -void MacroAssembler::string_compare(Register str1, Register str2, - Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, - Register tmp3, int ae) -{ - Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, - DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, - SHORT_LOOP_START, TAIL_CHECK, L; + mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp + mulhu(carry2, product_hi, yz_idx2); - const int STUB_THRESHOLD = 64 + 8; - bool isLL = ae == StrIntrinsicNode::LL; - bool isLU = ae == StrIntrinsicNode::LU; - bool isUL = ae == StrIntrinsicNode::UL; + cad(tmp3, tmp3, carry, carry); + adc(tmp4, tmp4, zr, carry); + cad(tmp3, tmp3, t0, t0); + cadc(tmp4, tmp4, tmp, t0); + adc(carry, carry2, zr, t0); + cad(tmp4, tmp4, t1, carry2); + adc(carry, carry, zr, carry2); - bool str1_isL = isLL || isLU; - bool str2_isL = isLL || isUL; + ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian + ror_imm(tmp4, tmp4, 32); + sd(tmp4, Address(tmp6, 0)); + sd(tmp3, Address(tmp6, wordSize)); - // for L strings, 1 byte for 1 character - // for U strings, 2 bytes for 1 character - int str1_chr_size = str1_isL ? 1 : 2; - int str2_chr_size = str2_isL ? 1 : 2; - int minCharsInWord = isLL ? wordSize : wordSize / 2; + j(L_third_loop); - load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; - load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; + bind(L_third_loop_exit); - BLOCK_COMMENT("string_compare {"); + andi(idx, idx, 0x3); + beqz(idx, L_post_third_loop_done); - // Bizzarely, the counts are passed in bytes, regardless of whether they - // are L or U strings, however the result is always in characters. - if (!str1_isL) { - sraiw(cnt1, cnt1, 1); - } - if (!str2_isL) { - sraiw(cnt2, cnt2, 1); + Label L_check_1; + subw(idx, idx, 2); + bltz(idx, L_check_1); + + shadd(t0, idx, y, t0, LogBytesPerInt); + ld(yz_idx1, Address(t0, 0)); + ror_imm(yz_idx1, yz_idx1, 32); + + mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 + mulhu(tmp4, product_hi, yz_idx1); + + shadd(t0, idx, z, t0, LogBytesPerInt); + ld(yz_idx2, Address(t0, 0)); + ror_imm(yz_idx2, yz_idx2, 32, tmp); + + add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); + + ror_imm(tmp3, tmp3, 32, tmp); + sd(tmp3, Address(t0, 0)); + + bind(L_check_1); + + andi(idx, idx, 0x1); + subw(idx, idx, 1); + bltz(idx, L_post_third_loop_done); + shadd(t0, idx, y, t0, LogBytesPerInt); + lwu(tmp4, Address(t0, 0)); + mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 + mulhu(carry2, tmp4, product_hi); + + shadd(t0, idx, z, t0, LogBytesPerInt); + lwu(tmp4, Address(t0, 0)); + + add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); + + shadd(t0, idx, z, t0, LogBytesPerInt); + sw(tmp3, Address(t0, 0)); + + slli(t0, carry2, 32); + srli(carry, tmp3, 32); + orr(carry, carry, t0); + + bind(L_post_third_loop_done); +} + +/** + * Code for BigInteger::multiplyToLen() intrinsic. + * + * x10: x + * x11: xlen + * x12: y + * x13: ylen + * x14: z + * x15: zlen + * x16: tmp1 + * x17: tmp2 + * x7: tmp3 + * x28: tmp4 + * x29: tmp5 + * x30: tmp6 + * x31: tmp7 + */ +void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, + Register z, Register zlen, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, + Register tmp5, Register tmp6, Register product_hi) +{ + assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); + + const Register idx = tmp1; + const Register kdx = tmp2; + const Register xstart = tmp3; + + const Register y_idx = tmp4; + const Register carry = tmp5; + const Register product = xlen; + const Register x_xstart = zlen; // reuse register + + mv(idx, ylen); // idx = ylen; + mv(kdx, zlen); // kdx = xlen+ylen; + mv(carry, zr); // carry = 0; + + Label L_multiply_64_x_64_loop, L_done; + + subw(xstart, xlen, 1); + bltz(xstart, L_done); + + const Register jdx = tmp1; + + if (AvoidUnalignedAccesses) { + // Check if x and y are both 8-byte aligned. + orr(t0, xlen, ylen); + andi(t0, t0, 0x1); + beqz(t0, L_multiply_64_x_64_loop); + + multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); + shadd(t0, xstart, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + + Label L_second_loop_unaligned; + bind(L_second_loop_unaligned); + mv(carry, zr); + mv(jdx, ylen); + subw(xstart, xstart, 1); + bltz(xstart, L_done); + sub(sp, sp, 2 * wordSize); + sd(z, Address(sp, 0)); + sd(zr, Address(sp, wordSize)); + shadd(t0, xstart, z, t0, LogBytesPerInt); + addi(z, t0, 4); + shadd(t0, xstart, x, t0, LogBytesPerInt); + lwu(product, Address(t0, 0)); + Label L_third_loop, L_third_loop_exit; + + blez(jdx, L_third_loop_exit); + + bind(L_third_loop); + subw(jdx, jdx, 1); + shadd(t0, jdx, y, t0, LogBytesPerInt); + lwu(t0, Address(t0, 0)); + mul(t1, t0, product); + add(t0, t1, carry); + shadd(tmp6, jdx, z, t1, LogBytesPerInt); + lwu(t1, Address(tmp6, 0)); + add(t0, t0, t1); + sw(t0, Address(tmp6, 0)); + srli(carry, t0, 32); + bgtz(jdx, L_third_loop); + + bind(L_third_loop_exit); + ld(z, Address(sp, 0)); + addi(sp, sp, 2 * wordSize); + shadd(t0, xstart, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + + j(L_second_loop_unaligned); + } + + bind(L_multiply_64_x_64_loop); + multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); + + Label L_second_loop_aligned; + beqz(kdx, L_second_loop_aligned); + + Label L_carry; + subw(kdx, kdx, 1); + beqz(kdx, L_carry); + + shadd(t0, kdx, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + srli(carry, carry, 32); + subw(kdx, kdx, 1); + + bind(L_carry); + shadd(t0, kdx, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + + // Second and third (nested) loops. + // + // for (int i = xstart-1; i >= 0; i--) { // Second loop + // carry = 0; + // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop + // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + + // (z[k] & LONG_MASK) + carry; + // z[k] = (int)product; + // carry = product >>> 32; + // } + // z[i] = (int)carry; + // } + // + // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi + + bind(L_second_loop_aligned); + mv(carry, zr); // carry = 0; + mv(jdx, ylen); // j = ystart+1 + + subw(xstart, xstart, 1); // i = xstart-1; + bltz(xstart, L_done); + + sub(sp, sp, 4 * wordSize); + sd(z, Address(sp, 0)); + + Label L_last_x; + shadd(t0, xstart, z, t0, LogBytesPerInt); + addi(z, t0, 4); + subw(xstart, xstart, 1); // i = xstart-1; + bltz(xstart, L_last_x); + + shadd(t0, xstart, x, t0, LogBytesPerInt); + ld(product_hi, Address(t0, 0)); + ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian + + Label L_third_loop_prologue; + bind(L_third_loop_prologue); + + sd(ylen, Address(sp, wordSize)); + sd(x, Address(sp, 2 * wordSize)); + sd(xstart, Address(sp, 3 * wordSize)); + multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, + tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); + ld(z, Address(sp, 0)); + ld(ylen, Address(sp, wordSize)); + ld(x, Address(sp, 2 * wordSize)); + ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen + addi(sp, sp, 4 * wordSize); + + addiw(tmp3, xlen, 1); + shadd(t0, tmp3, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + + subw(tmp3, tmp3, 1); + bltz(tmp3, L_done); + + srli(carry, carry, 32); + shadd(t0, tmp3, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + j(L_second_loop_aligned); + + // Next infrequent code is moved outside loops. + bind(L_last_x); + lwu(product_hi, Address(x, 0)); + j(L_third_loop_prologue); + + bind(L_done); +} +#endif + +// Count bits of trailing zero chars from lsb to msb until first non-zero element. +// For LL case, one byte for one element, so shift 8 bits once, and for other case, +// shift 16 bits once. +void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) +{ + if (UseRVB) { + assert_different_registers(Rd, Rs, tmp1); + int step = isLL ? 8 : 16; + ctz(Rd, Rs); + andi(tmp1, Rd, step - 1); + sub(Rd, Rd, tmp1); + return; } + assert_different_registers(Rd, Rs, tmp1, tmp2); + Label Loop; + int step = isLL ? 8 : 16; + li(Rd, -step); + mv(tmp2, Rs); - // Compute the minimum of the string lengths and save the difference in result. - sub(result, cnt1, cnt2); - bgt(cnt1, cnt2, L); - mv(cnt2, cnt1); - bind(L); + bind(Loop); + addi(Rd, Rd, step); + andi(tmp1, tmp2, ((1 << step) - 1)); + srli(tmp2, tmp2, step); + beqz(tmp1, Loop); +} - // A very short string - li(t0, minCharsInWord); - ble(cnt2, t0, SHORT_STRING); +// This instruction reads adjacent 4 bytes from the lower half of source register, +// inflate into a register, for example: +// Rs: A7A6A5A4A3A2A1A0 +// Rd: 00A300A200A100A0 +void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) +{ + assert_different_registers(Rd, Rs, tmp1, tmp2); + li(tmp1, 0xFF); + mv(Rd, zr); + for (int i = 0; i <= 3; i++) + { + andr(tmp2, Rs, tmp1); + if (i) { + slli(tmp2, tmp2, i * 8); + } + orr(Rd, Rd, tmp2); + if (i != 3) { + slli(tmp1, tmp1, 8); + } + } +} - // Compare longwords - // load first parts of strings and finish initialization while loading +// This instruction reads adjacent 4 bytes from the upper half of source register, +// inflate into a register, for example: +// Rs: A7A6A5A4A3A2A1A0 +// Rd: 00A700A600A500A4 +void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) +{ + assert_different_registers(Rd, Rs, tmp1, tmp2); + li(tmp1, 0xFF00000000); + mv(Rd, zr); + for (int i = 0; i <= 3; i++) { - if (str1_isL == str2_isL) { // LL or UU - // load 8 bytes once to compare - ld(tmp1, Address(str1)); - beq(str1, str2, DONE); - ld(tmp2, Address(str2)); - li(t0, STUB_THRESHOLD); - bge(cnt2, t0, STUB); - sub(cnt2, cnt2, minCharsInWord); - beqz(cnt2, TAIL_CHECK); - // convert cnt2 from characters to bytes - if(!str1_isL) { - slli(cnt2, cnt2, 1); - } - add(str2, str2, cnt2); - add(str1, str1, cnt2); - sub(cnt2, zr, cnt2); - } else if (isLU) { // LU case - lwu(tmp1, Address(str1)); - ld(tmp2, Address(str2)); - li(t0, STUB_THRESHOLD); - bge(cnt2, t0, STUB); - addi(cnt2, cnt2, -4); - add(str1, str1, cnt2); - sub(cnt1, zr, cnt2); - slli(cnt2, cnt2, 1); - add(str2, str2, cnt2); - inflate_lo32(tmp3, tmp1); - mv(tmp1, tmp3); - sub(cnt2, zr, cnt2); - addi(cnt1, cnt1, 4); - } else { // UL case - ld(tmp1, Address(str1)); - lwu(tmp2, Address(str2)); - li(t0, STUB_THRESHOLD); - bge(cnt2, t0, STUB); - addi(cnt2, cnt2, -4); - slli(t0, cnt2, 1); - sub(cnt1, zr, t0); - add(str1, str1, t0); - add(str2, str2, cnt2); - inflate_lo32(tmp3, tmp2); - mv(tmp2, tmp3); - sub(cnt2, zr, cnt2); - addi(cnt1, cnt1, 8); + andr(tmp2, Rs, tmp1); + orr(Rd, Rd, tmp2); + srli(Rd, Rd, 8); + if (i != 3) { + slli(tmp1, tmp1, 8); } - addi(cnt2, cnt2, isUL ? 4 : 8); - bgez(cnt2, TAIL); - xorr(tmp3, tmp1, tmp2); - bnez(tmp3, DIFFERENCE); - - // main loop - bind(NEXT_WORD); - if (str1_isL == str2_isL) { // LL or UU - add(t0, str1, cnt2); - ld(tmp1, Address(t0)); - add(t0, str2, cnt2); - ld(tmp2, Address(t0)); - addi(cnt2, cnt2, 8); - } else if (isLU) { // LU case - add(t0, str1, cnt1); - lwu(tmp1, Address(t0)); - add(t0, str2, cnt2); - ld(tmp2, Address(t0)); - addi(cnt1, cnt1, 4); - inflate_lo32(tmp3, tmp1); - mv(tmp1, tmp3); - addi(cnt2, cnt2, 8); - } else { // UL case - add(t0, str2, cnt2); - lwu(tmp2, Address(t0)); - add(t0, str1, cnt1); - ld(tmp1, Address(t0)); - inflate_lo32(tmp3, tmp2); - mv(tmp2, tmp3); - addi(cnt1, cnt1, 8); - addi(cnt2, cnt2, 4); - } - bgez(cnt2, TAIL); - - xorr(tmp3, tmp1, tmp2); - beqz(tmp3, NEXT_WORD); - j(DIFFERENCE); - bind(TAIL); - xorr(tmp3, tmp1, tmp2); - bnez(tmp3, DIFFERENCE); - // Last longword. In the case where length == 4 we compare the - // same longword twice, but that's still faster than another - // conditional branch. - if (str1_isL == str2_isL) { // LL or UU - ld(tmp1, Address(str1)); - ld(tmp2, Address(str2)); - } else if (isLU) { // LU case - lwu(tmp1, Address(str1)); - ld(tmp2, Address(str2)); - inflate_lo32(tmp3, tmp1); - mv(tmp1, tmp3); - } else { // UL case - lwu(tmp2, Address(str2)); - ld(tmp1, Address(str1)); - inflate_lo32(tmp3, tmp2); - mv(tmp2, tmp3); - } - bind(TAIL_CHECK); - xorr(tmp3, tmp1, tmp2); - beqz(tmp3, DONE); - - // Find the first different characters in the longwords and - // compute their difference. - bind(DIFFERENCE); - ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb - srl(tmp1, tmp1, result); - srl(tmp2, tmp2, result); - if (isLL) { - andi(tmp1, tmp1, 0xFF); - andi(tmp2, tmp2, 0xFF); - } else { - andi(tmp1, tmp1, 0xFFFF); - andi(tmp2, tmp2, 0xFFFF); - } - sub(result, tmp1, tmp2); - j(DONE); - } - - bind(STUB); - RuntimeAddress stub = NULL; - switch (ae) { - case StrIntrinsicNode::LL: - stub = RuntimeAddress(StubRoutines::riscv64::compare_long_string_LL()); - break; - case StrIntrinsicNode::UU: - stub = RuntimeAddress(StubRoutines::riscv64::compare_long_string_UU()); - break; - case StrIntrinsicNode::LU: - stub = RuntimeAddress(StubRoutines::riscv64::compare_long_string_LU()); - break; - case StrIntrinsicNode::UL: - stub = RuntimeAddress(StubRoutines::riscv64::compare_long_string_UL()); - break; - default: - ShouldNotReachHere(); - } - assert(stub.target() != NULL, "compare_long_string stub has not been generated"); - trampoline_call(stub); - j(DONE); - - bind(SHORT_STRING); - // Is the minimum length zero? - beqz(cnt2, DONE); - // arrange code to do most branches while loading and loading next characters - // while comparing previous - (this->*str1_load_chr)(tmp1, Address(str1), t0); - addi(str1, str1, str1_chr_size); - addi(cnt2, cnt2, -1); - beqz(cnt2, SHORT_LAST_INIT); - (this->*str2_load_chr)(cnt1, Address(str2), t0); - addi(str2, str2, str2_chr_size); - j(SHORT_LOOP_START); - bind(SHORT_LOOP); - addi(cnt2, cnt2, -1); - beqz(cnt2, SHORT_LAST); - bind(SHORT_LOOP_START); - (this->*str1_load_chr)(tmp2, Address(str1), t0); - addi(str1, str1, str1_chr_size); - (this->*str2_load_chr)(t0, Address(str2), t0); - addi(str2, str2, str2_chr_size); - bne(tmp1, cnt1, SHORT_LOOP_TAIL); - addi(cnt2, cnt2, -1); - beqz(cnt2, SHORT_LAST2); - (this->*str1_load_chr)(tmp1, Address(str1), t0); - addi(str1, str1, str1_chr_size); - (this->*str2_load_chr)(cnt1, Address(str2), t0); - addi(str2, str2, str2_chr_size); - beq(tmp2, t0, SHORT_LOOP); - sub(result, tmp2, t0); - j(DONE); - bind(SHORT_LOOP_TAIL); - sub(result, tmp1, cnt1); - j(DONE); - bind(SHORT_LAST2); - beq(tmp2, t0, DONE); - sub(result, tmp2, t0); - - j(DONE); - bind(SHORT_LAST_INIT); - (this->*str2_load_chr)(cnt1, Address(str2), t0); - addi(str2, str2, str2_chr_size); - bind(SHORT_LAST); - beq(tmp1, cnt1, DONE); - sub(result, tmp1, cnt1); - - bind(DONE); - - BLOCK_COMMENT("} string_compare"); -} -#endif // COMPILER2 - -// string indexof -// compute index by trailing zeros -void MacroAssembler::compute_index(Register haystack, Register trailing_zero, - Register match_mask, Register result, - Register ch2, Register tmp, - bool haystack_isL) -{ - int haystack_chr_shift = haystack_isL ? 0 : 1; - srl(match_mask, match_mask, trailing_zero); - srli(match_mask, match_mask, 1); - srli(tmp, trailing_zero, LogBitsPerByte); - if (!haystack_isL) andi(tmp, tmp, 0xE); - add(haystack, haystack, tmp); - ld(ch2, Address(haystack)); - if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); - add(result, result, tmp); -} - -// string indexof -// find pattern element in src, compute match mask, -// only the first occurrence of 0x80/0x8000 at low bits is the valid match index -// match mask patterns and corresponding indices would be like: -// - 0x8080808080808080 (Latin1) -// - 7 6 5 4 3 2 1 0 (match index) -// - 0x8000800080008000 (UTF16) -// - 3 2 1 0 (match index) -void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, - Register mask1, Register mask2) -{ - xorr(src, pattern, src); - sub(match_mask, src, mask1); - orr(src, src, mask2); - notr(src, src); - andr(match_mask, match_mask, src); -} - -// count bits of trailing zero chars from lsb to msb until first non-zero element. -// For LL case, one byte for one element, so shift 8 bits once, and for other case, -// shift 16 bits once. -void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register Rtmp1, Register Rtmp2) -{ - assert_different_registers(Rd, Rs, Rtmp1, Rtmp2); - Label Loop; - int step = isLL ? 8 : 16; - li(Rd, -step); - mv(Rtmp2, Rs); - - bind(Loop); - addi(Rd, Rd, step); - andi(Rtmp1, Rtmp2, ((1 << step) - 1)); - srli(Rtmp2, Rtmp2, step); - beqz(Rtmp1, Loop); -} - -// This instruction reads adjacent 4 bytes from the lower half of source register, -// inflate into a register, for example: -// Rs: A7A6A5A4A3A2A1A0 -// Rd: 00A300A200A100A0 -void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) -{ - assert_different_registers(Rd, Rs, Rtmp1, Rtmp2); - li(Rtmp1, 0xFF); - mv(Rd, zr); - for (int i = 0; i <= 3; i++) - { - andr(Rtmp2, Rs, Rtmp1); - if (i) { - slli(Rtmp2, Rtmp2, i * 8); - } - orr(Rd, Rd, Rtmp2); - if (i != 3) { - slli(Rtmp1, Rtmp1, 8); - } - } -} - -// This instruction reads adjacent 4 bytes from the upper half of source register, -// inflate into a register, for example: -// Rs: A7A6A5A4A3A2A1A0 -// Rd: 00A700A600A500A4 -void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) -{ - assert_different_registers(Rd, Rs, Rtmp1, Rtmp2); - li(Rtmp1, 0xFF00000000); - mv(Rd, zr); - for (int i = 0; i <= 3; i++) - { - andr(Rtmp2, Rs, Rtmp1); - orr(Rd, Rd, Rtmp2); - srli(Rd, Rd, 8); - if (i != 3) { - slli(Rtmp1, Rtmp1, 8); - } - } -} + } +} // The size of the blocks erased by the zero_blocks stub. We must // handle anything smaller than this ourselves in zero_words(). @@ -3984,9 +3988,9 @@ address MacroAssembler::zero_words(Register ptr, Register cnt) Label around, done, done16; bltu(cnt, t0, around); { - RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv64::zero_blocks()); + RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks()); assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); - if (StubRoutines::riscv64::complete()) { + if (StubRoutines::riscv::complete()) { address tpc = trampoline_call(zero_blocks); if (tpc == NULL) { DEBUG_ONLY(reset_labels1(around)); @@ -4020,9 +4024,10 @@ address MacroAssembler::zero_words(Register ptr, Register cnt) return pc(); } -// base: Address of a buffer to be zeroed, 8 bytes aligned. -// cnt: Immediate count in HeapWords. #define SmallArraySize (18 * BytesPerLong) + +// base: Address of a buffer to be zeroed, 8 bytes aligned. +// cnt: Immediate count in HeapWords. void MacroAssembler::zero_words(Register base, u_int64_t cnt) { assert_different_registers(base, t0, t1); @@ -4035,7 +4040,7 @@ void MacroAssembler::zero_words(Register base, u_int64_t cnt) } } else { const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll - int remainder = cnt % unroll; + int remainder = cnt % unroll; for (int i = 0; i < remainder; i++) { sd(zr, Address(base, i * wordSize)); } @@ -4054,6 +4059,7 @@ void MacroAssembler::zero_words(Register base, u_int64_t cnt) add(loop_base, loop_base, unroll * wordSize); bnez(cnt_reg, loop); } + BLOCK_COMMENT("} zero_words"); } @@ -4094,8 +4100,8 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value) andi(t0, cnt, unroll - 1); sub(cnt, cnt, t0); - slli(t1, t0, 3); - add(base, base, t1); // align 8, so first sd n % 8 = mod, next loop sd 8 * n. + // align 8, so first sd n % 8 = mod, next loop sd 8 * n. + shadd(base, t0, base, t1, 3); la(t1, entry); slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) sub(t1, t1, t0); @@ -4114,15 +4120,15 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value) } #define FCVT_SAFE(FLOATCVT, FLOATEQ) \ -void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register temp) { \ +void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ Label L_Okay; \ fscsr(zr); \ FLOATCVT(dst, src); \ - frcsr(temp); \ - andi(temp, temp, 0x1E); \ - beqz(temp, L_Okay); \ - FLOATEQ(temp, src, src); \ - bnez(temp, L_Okay); \ + frcsr(tmp); \ + andi(tmp, tmp, 0x1E); \ + beqz(tmp, L_Okay); \ + FLOATEQ(tmp, src, src); \ + bnez(tmp, L_Okay); \ mv(dst, zr); \ bind(L_Okay); \ } @@ -4172,8 +4178,8 @@ FCMP(double, d); // Zero words; len is in bytes // Destroys all registers except addr // len must be a nonzero multiple of wordSize -void MacroAssembler::zero_memory(Register addr, Register len, Register tmp1) { - assert_different_registers(addr, len, tmp1, t0, t1); +void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { + assert_different_registers(addr, len, tmp, t0, t1); #ifdef ASSERT { @@ -4218,9 +4224,8 @@ void MacroAssembler::zero_memory(Register addr, Register len, Register tmp1) { srli(len, len, LogBytesPerWord); andi(t0, len, unroll - 1); // t0 = cnt % unroll sub(len, len, t0); // cnt -= unroll - // tmp1 always points to the end of the region we're about to zero - slli(t1, t0, LogBytesPerWord); - add(tmp1, addr, t1); + // tmp always points to the end of the region we're about to zero + shadd(tmp, t0, addr, t1, LogBytesPerWord); la(t1, entry); slli(t0, t0, 2); sub(t1, t1, t0); @@ -4228,16 +4233,29 @@ void MacroAssembler::zero_memory(Register addr, Register len, Register tmp1) { bind(loop); sub(len, len, unroll); for (int i = -unroll; i < 0; i++) { - Assembler::sd(zr, Address(tmp1, i * wordSize)); + Assembler::sd(zr, Address(tmp, i * wordSize)); } bind(entry); - add(tmp1, tmp1, unroll * wordSize); + add(tmp, tmp, unroll * wordSize); bnez(len, loop); } // shift left by shamt and add // Rd = (Rs1 << shamt) + Rs2 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { + if (UseRVB) { + if (shamt == 1) { + sh1add(Rd, Rs1, Rs2); + return; + } else if (shamt == 2) { + sh2add(Rd, Rs1, Rs2); + return; + } else if (shamt == 3) { + sh3add(Rd, Rs1, Rs2); + return; + } + } + if (shamt != 0) { slli(tmp, Rs1, shamt); add(Rd, Rs2, tmp); @@ -4246,14 +4264,42 @@ void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp } } -void MacroAssembler::zero_ext(Register dst, Register src, int clear_bits) { - slli(dst, src, clear_bits); - srli(dst, dst, clear_bits); +void MacroAssembler::zero_extend(Register dst, Register src, int bits) { + if (UseRVB) { + if (bits == 16) { + zext_h(dst, src); + return; + } else if (bits == 32) { + zext_w(dst, src); + return; + } + } + + if (bits == 8) { + zext_b(dst, src); + } else { + slli(dst, src, XLEN - bits); + srli(dst, dst, XLEN - bits); + } } -void MacroAssembler::sign_ext(Register dst, Register src, int clear_bits) { - slli(dst, src, clear_bits); - srai(dst, dst, clear_bits); +void MacroAssembler::sign_extend(Register dst, Register src, int bits) { + if (UseRVB) { + if (bits == 8) { + sext_b(dst, src); + return; + } else if (bits == 16) { + sext_h(dst, src); + return; + } + } + + if (bits == 32) { + sext_w(dst, src); + } else { + slli(dst, src, XLEN - bits); + srai(dst, dst, XLEN - bits); + } } void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) @@ -4284,36 +4330,238 @@ void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Registe bind(done); } -#ifdef COMPILER2 -typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); +void MacroAssembler::safepoint_ifence() { + ifence(); +} -// Search for needle in haystack and return index or -1 -// x10: result -// x11: haystack -// x12: haystack_len -// x13: needle -// x14: needle_len -void MacroAssembler::string_indexof(Register haystack, Register needle, - Register haystack_len, Register needle_len, - Register tmp1, Register tmp2, - Register tmp3, Register tmp4, - Register tmp5, Register tmp6, - Register result, int ae) +#ifdef COMPILER2 +// short string +// StringUTF16.indexOfChar +// StringLatin1.indexOfChar +void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, + Register ch, Register result, + bool isL) { - assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); + Register ch1 = t0; + Register index = t1; - Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; + BLOCK_COMMENT("string_indexof_char_short {"); - Register ch1 = t0; - Register ch2 = t1; - Register nlen_tmp = tmp1; // needle len tmp - Register hlen_tmp = tmp2; // haystack len tmp - Register result_tmp = tmp4; + Label LOOP, LOOP1, LOOP4, LOOP8; + Label MATCH, MATCH1, MATCH2, MATCH3, + MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; - bool isLL = ae == StrIntrinsicNode::LL; + mv(result, -1); + mv(index, zr); + + bind(LOOP); + addi(t0, index, 8); + ble(t0, cnt1, LOOP8); + addi(t0, index, 4); + ble(t0, cnt1, LOOP4); + j(LOOP1); + + bind(LOOP8); + isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); + beq(ch, ch1, MATCH); + isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); + beq(ch, ch1, MATCH1); + isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); + beq(ch, ch1, MATCH2); + isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); + beq(ch, ch1, MATCH3); + isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); + beq(ch, ch1, MATCH4); + isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); + beq(ch, ch1, MATCH5); + isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); + beq(ch, ch1, MATCH6); + isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); + beq(ch, ch1, MATCH7); + addi(index, index, 8); + addi(str1, str1, isL ? 8 : 16); + blt(index, cnt1, LOOP); + j(NOMATCH); - bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; - bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; + bind(LOOP4); + isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); + beq(ch, ch1, MATCH); + isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); + beq(ch, ch1, MATCH1); + isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); + beq(ch, ch1, MATCH2); + isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); + beq(ch, ch1, MATCH3); + addi(index, index, 4); + addi(str1, str1, isL ? 4 : 8); + bge(index, cnt1, NOMATCH); + + bind(LOOP1); + isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); + beq(ch, ch1, MATCH); + addi(index, index, 1); + addi(str1, str1, isL ? 1 : 2); + blt(index, cnt1, LOOP1); + j(NOMATCH); + + bind(MATCH1); + addi(index, index, 1); + j(MATCH); + + bind(MATCH2); + addi(index, index, 2); + j(MATCH); + + bind(MATCH3); + addi(index, index, 3); + j(MATCH); + + bind(MATCH4); + addi(index, index, 4); + j(MATCH); + + bind(MATCH5); + addi(index, index, 5); + j(MATCH); + + bind(MATCH6); + addi(index, index, 6); + j(MATCH); + + bind(MATCH7); + addi(index, index, 7); + + bind(MATCH); + mv(result, index); + bind(NOMATCH); + BLOCK_COMMENT("} string_indexof_char_short"); +} + +// StringUTF16.indexOfChar +// StringLatin1.indexOfChar +void MacroAssembler::string_indexof_char(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + bool isL) +{ + Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; + Register ch1 = t0; + Register orig_cnt = t1; + Register mask1 = tmp3; + Register mask2 = tmp2; + Register match_mask = tmp1; + Register trailing_char = tmp4; + Register unaligned_elems = tmp4; + + BLOCK_COMMENT("string_indexof_char {"); + beqz(cnt1, NOMATCH); + + addi(t0, cnt1, isL ? -32 : -16); + bgtz(t0, DO_LONG); + string_indexof_char_short(str1, cnt1, ch, result, isL); + j(DONE); + + bind(DO_LONG); + mv(orig_cnt, cnt1); + if (AvoidUnalignedAccesses) { + Label ALIGNED; + andi(unaligned_elems, str1, 0x7); + beqz(unaligned_elems, ALIGNED); + sub(unaligned_elems, unaligned_elems, 8); + neg(unaligned_elems, unaligned_elems); + if (!isL) { + srli(unaligned_elems, unaligned_elems, 1); + } + // do unaligned part per element + string_indexof_char_short(str1, unaligned_elems, ch, result, isL); + bgez(result, DONE); + mv(orig_cnt, cnt1); + sub(cnt1, cnt1, unaligned_elems); + bind(ALIGNED); + } + + // duplicate ch + if (isL) { + slli(ch1, ch, 8); + orr(ch, ch1, ch); + } + slli(ch1, ch, 16); + orr(ch, ch1, ch); + slli(ch1, ch, 32); + orr(ch, ch1, ch); + + if (!isL) { + slli(cnt1, cnt1, 1); + } + + uint64_t mask0101 = UCONST64(0x0101010101010101); + uint64_t mask0001 = UCONST64(0x0001000100010001); + mv(mask1, isL ? mask0101 : mask0001); + uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); + uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); + mv(mask2, isL ? mask7f7f : mask7fff); + + bind(CH1_LOOP); + ld(ch1, Address(str1)); + addi(str1, str1, 8); + addi(cnt1, cnt1, -8); + compute_match_mask(ch1, ch, match_mask, mask1, mask2); + bnez(match_mask, HIT); + bgtz(cnt1, CH1_LOOP); + j(NOMATCH); + + bind(HIT); + ctzc_bit(trailing_char, match_mask, isL, ch1, result); + srli(trailing_char, trailing_char, 3); + addi(cnt1, cnt1, 8); + ble(cnt1, trailing_char, NOMATCH); + // match case + if (!isL) { + srli(cnt1, cnt1, 1); + srli(trailing_char, trailing_char, 1); + } + + sub(result, orig_cnt, cnt1); + add(result, result, trailing_char); + j(DONE); + + bind(NOMATCH); + mv(result, -1); + + bind(DONE); + BLOCK_COMMENT("} string_indexof_char"); +} + +typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); + +// Search for needle in haystack and return index or -1 +// x10: result +// x11: haystack +// x12: haystack_len +// x13: needle +// x14: needle_len +void MacroAssembler::string_indexof(Register haystack, Register needle, + Register haystack_len, Register needle_len, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + Register tmp5, Register tmp6, + Register result, int ae) +{ + assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); + + Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; + + Register ch1 = t0; + Register ch2 = t1; + Register nlen_tmp = tmp1; // needle len tmp + Register hlen_tmp = tmp2; // haystack len tmp + Register result_tmp = tmp4; + + bool isLL = ae == StrIntrinsicNode::LL; + + bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; + bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; int needle_chr_shift = needle_isL ? 0 : 1; int haystack_chr_shift = haystack_isL ? 0 : 1; int needle_chr_size = needle_isL ? 1 : 2; @@ -4459,8 +4707,8 @@ void MacroAssembler::string_indexof(Register haystack, Register needle, sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern Register orig_haystack = tmp5; mv(orig_haystack, haystack); - slli(haystack_end, result_tmp, haystack_chr_shift); // result_tmp = tmp4 - add(haystack_end, haystack, haystack_end); + // result_tmp = tmp4 + shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 mv(tmp3, needle); @@ -4489,8 +4737,8 @@ void MacroAssembler::string_indexof(Register haystack, Register needle, sub(ch2, ch2, 1); // for next pattern element, skip distance -1 bgtz(ch2, BCLOOP); - slli(tmp6, needle_len, needle_chr_shift); - add(tmp6, tmp6, needle); // tmp6: pattern end, address after needle + // tmp6: pattern end, address after needle + shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); if (needle_isL == haystack_isL) { // load last 8 bytes (8LL/4UU symbols) ld(tmp6, Address(tmp6, -wordSize)); @@ -4500,10 +4748,10 @@ void MacroAssembler::string_indexof(Register haystack, Register needle, // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d // We'll have to wait until load completed, but it's still faster than per-character loads+checks srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a - slli(ch2, tmp6, registerSize - 24); - srli(ch2, ch2, registerSize - 8); // pattern[m-2], 0x0000000b - slli(ch1, tmp6, registerSize - 16); - srli(ch1, ch1, registerSize - 8); // pattern[m-3], 0x0000000c + slli(ch2, tmp6, XLEN - 24); + srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b + slli(ch1, tmp6, XLEN - 16); + srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d slli(ch2, ch2, 16); orr(ch2, ch2, ch1); // 0x00000b0c @@ -4521,8 +4769,7 @@ void MacroAssembler::string_indexof(Register haystack, Register needle, // move j with bad char offset table bind(BMLOOPSTR2); // compare pattern to source string backward - slli(result, nlen_tmp, haystack_chr_shift); - add(result, haystack, result); + shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); (this->*haystack_load_1chr)(skipch, Address(result), noreg); sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 if (needle_isL == haystack_isL) { @@ -4546,11 +4793,9 @@ void MacroAssembler::string_indexof(Register haystack, Register needle, } bind(BMLOOPSTR1); - slli(ch1, nlen_tmp, needle_chr_shift); - add(ch1, ch1, needle); + shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); (this->*needle_load_1chr)(ch1, Address(ch1), noreg); - slli(ch2, nlen_tmp, haystack_chr_shift); - add(ch2, haystack, ch2); + shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); bind(BMLOOPSTR1_AFTER_LOAD); @@ -4577,8 +4822,8 @@ void MacroAssembler::string_indexof(Register haystack, Register needle, bind(BMADV); sub(nlen_tmp, needle_len, 1); - slli(result, result_tmp, haystack_chr_shift); - add(haystack, haystack, result); // move haystack after bad char skip offset + // move haystack after bad char skip offset + shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); ble(haystack, haystack_end, BMLOOPSTR2); add(sp, sp, ASIZE); j(NOMATCH); @@ -4600,13 +4845,13 @@ void MacroAssembler::string_indexof(Register haystack, Register needle, mv(result, zr); RuntimeAddress stub = NULL; if (isLL) { - stub = RuntimeAddress(StubRoutines::riscv64::string_indexof_linear_ll()); + stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); } else if (needle_isL) { - stub = RuntimeAddress(StubRoutines::riscv64::string_indexof_linear_ul()); + stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); } else { - stub = RuntimeAddress(StubRoutines::riscv64::string_indexof_linear_uu()); + stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); } trampoline_call(stub); @@ -4662,162 +4907,675 @@ void MacroAssembler::string_indexof_linearscan(Register haystack, Register needl load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; - Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; + Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; + + Register first = tmp3; + + if (needle_con_cnt == -1) { + Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; + + sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); + bltz(t0, DOSHORT); + + (this->*needle_load_1chr)(first, Address(needle), noreg); + slli(t0, needle_len, needle_chr_shift); + add(needle, needle, t0); + neg(nlen_neg, t0); + slli(t0, result_tmp, haystack_chr_shift); + add(haystack, haystack, t0); + neg(hlen_neg, t0); + + bind(FIRST_LOOP); + add(t0, haystack, hlen_neg); + (this->*haystack_load_1chr)(ch2, Address(t0), noreg); + beq(first, ch2, STR1_LOOP); + + bind(STR2_NEXT); + add(hlen_neg, hlen_neg, haystack_chr_size); + blez(hlen_neg, FIRST_LOOP); + j(NOMATCH); + + bind(STR1_LOOP); + add(nlen_tmp, nlen_neg, needle_chr_size); + add(hlen_tmp, hlen_neg, haystack_chr_size); + bgez(nlen_tmp, MATCH); + + bind(STR1_NEXT); + add(ch1, needle, nlen_tmp); + (this->*needle_load_1chr)(ch1, Address(ch1), noreg); + add(ch2, haystack, hlen_tmp); + (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); + bne(ch1, ch2, STR2_NEXT); + add(nlen_tmp, nlen_tmp, needle_chr_size); + add(hlen_tmp, hlen_tmp, haystack_chr_size); + bltz(nlen_tmp, STR1_NEXT); + j(MATCH); + + bind(DOSHORT); + if (needle_isL == haystack_isL) { + sub(t0, needle_len, 2); + bltz(t0, DO1); + bgtz(t0, DO3); + } + } + + if (needle_con_cnt == 4) { + Label CH1_LOOP; + (this->*load_4chr)(ch1, Address(needle), noreg); + sub(result_tmp, haystack_len, 4); + slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp + add(haystack, haystack, tmp3); + neg(hlen_neg, tmp3); + + bind(CH1_LOOP); + add(ch2, haystack, hlen_neg); + (this->*load_4chr)(ch2, Address(ch2), noreg); + beq(ch1, ch2, MATCH); + add(hlen_neg, hlen_neg, haystack_chr_size); + blez(hlen_neg, CH1_LOOP); + j(NOMATCH); + } + + if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { + Label CH1_LOOP; + BLOCK_COMMENT("string_indexof DO2 {"); + bind(DO2); + (this->*load_2chr)(ch1, Address(needle), noreg); + if (needle_con_cnt == 2) { + sub(result_tmp, haystack_len, 2); + } + slli(tmp3, result_tmp, haystack_chr_shift); + add(haystack, haystack, tmp3); + neg(hlen_neg, tmp3); + + bind(CH1_LOOP); + add(tmp3, haystack, hlen_neg); + (this->*load_2chr)(ch2, Address(tmp3), noreg); + beq(ch1, ch2, MATCH); + add(hlen_neg, hlen_neg, haystack_chr_size); + blez(hlen_neg, CH1_LOOP); + j(NOMATCH); + BLOCK_COMMENT("} string_indexof DO2"); + } + + if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { + Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; + BLOCK_COMMENT("string_indexof DO3 {"); + + bind(DO3); + (this->*load_2chr)(first, Address(needle), noreg); + (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); + if (needle_con_cnt == 3) { + sub(result_tmp, haystack_len, 3); + } + slli(hlen_tmp, result_tmp, haystack_chr_shift); + add(haystack, haystack, hlen_tmp); + neg(hlen_neg, hlen_tmp); + + bind(FIRST_LOOP); + add(ch2, haystack, hlen_neg); + (this->*load_2chr)(ch2, Address(ch2), noreg); + beq(first, ch2, STR1_LOOP); + + bind(STR2_NEXT); + add(hlen_neg, hlen_neg, haystack_chr_size); + blez(hlen_neg, FIRST_LOOP); + j(NOMATCH); + + bind(STR1_LOOP); + add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); + add(ch2, haystack, hlen_tmp); + (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); + bne(ch1, ch2, STR2_NEXT); + j(MATCH); + BLOCK_COMMENT("} string_indexof DO3"); + } + + if (needle_con_cnt == -1 || needle_con_cnt == 1) { + Label DO1_LOOP; + + BLOCK_COMMENT("string_indexof DO1 {"); + bind(DO1); + (this->*needle_load_1chr)(ch1, Address(needle), noreg); + sub(result_tmp, haystack_len, 1); + mv(tmp3, result_tmp); + if (haystack_chr_shift) { + slli(tmp3, result_tmp, haystack_chr_shift); + } + add(haystack, haystack, tmp3); + neg(hlen_neg, tmp3); + + bind(DO1_LOOP); + add(tmp3, haystack, hlen_neg); + (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); + beq(ch1, ch2, MATCH); + add(hlen_neg, hlen_neg, haystack_chr_size); + blez(hlen_neg, DO1_LOOP); + BLOCK_COMMENT("} string_indexof DO1"); + } + + bind(NOMATCH); + mv(result, -1); + j(DONE); + + bind(MATCH); + srai(t0, hlen_neg, haystack_chr_shift); + add(result, result_tmp, t0); + + bind(DONE); +} + +// Compare strings. +void MacroAssembler::string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, + Register tmp3, int ae) +{ + Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, + DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, + SHORT_LOOP_START, TAIL_CHECK, L; + + const int STUB_THRESHOLD = 64 + 8; + bool isLL = ae == StrIntrinsicNode::LL; + bool isLU = ae == StrIntrinsicNode::LU; + bool isUL = ae == StrIntrinsicNode::UL; + + bool str1_isL = isLL || isLU; + bool str2_isL = isLL || isUL; + + // for L strings, 1 byte for 1 character + // for U strings, 2 bytes for 1 character + int str1_chr_size = str1_isL ? 1 : 2; + int str2_chr_size = str2_isL ? 1 : 2; + int minCharsInWord = isLL ? wordSize : wordSize / 2; + + load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; + load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; + + BLOCK_COMMENT("string_compare {"); + + // Bizzarely, the counts are passed in bytes, regardless of whether they + // are L or U strings, however the result is always in characters. + if (!str1_isL) { + sraiw(cnt1, cnt1, 1); + } + if (!str2_isL) { + sraiw(cnt2, cnt2, 1); + } + + // Compute the minimum of the string lengths and save the difference in result. + sub(result, cnt1, cnt2); + bgt(cnt1, cnt2, L); + mv(cnt2, cnt1); + bind(L); + + // A very short string + li(t0, minCharsInWord); + ble(cnt2, t0, SHORT_STRING); + + // Compare longwords + // load first parts of strings and finish initialization while loading + { + if (str1_isL == str2_isL) { // LL or UU + // load 8 bytes once to compare + ld(tmp1, Address(str1)); + beq(str1, str2, DONE); + ld(tmp2, Address(str2)); + li(t0, STUB_THRESHOLD); + bge(cnt2, t0, STUB); + sub(cnt2, cnt2, minCharsInWord); + beqz(cnt2, TAIL_CHECK); + // convert cnt2 from characters to bytes + if (!str1_isL) { + slli(cnt2, cnt2, 1); + } + add(str2, str2, cnt2); + add(str1, str1, cnt2); + sub(cnt2, zr, cnt2); + } else if (isLU) { // LU case + lwu(tmp1, Address(str1)); + ld(tmp2, Address(str2)); + li(t0, STUB_THRESHOLD); + bge(cnt2, t0, STUB); + addi(cnt2, cnt2, -4); + add(str1, str1, cnt2); + sub(cnt1, zr, cnt2); + slli(cnt2, cnt2, 1); + add(str2, str2, cnt2); + inflate_lo32(tmp3, tmp1); + mv(tmp1, tmp3); + sub(cnt2, zr, cnt2); + addi(cnt1, cnt1, 4); + } else { // UL case + ld(tmp1, Address(str1)); + lwu(tmp2, Address(str2)); + li(t0, STUB_THRESHOLD); + bge(cnt2, t0, STUB); + addi(cnt2, cnt2, -4); + slli(t0, cnt2, 1); + sub(cnt1, zr, t0); + add(str1, str1, t0); + add(str2, str2, cnt2); + inflate_lo32(tmp3, tmp2); + mv(tmp2, tmp3); + sub(cnt2, zr, cnt2); + addi(cnt1, cnt1, 8); + } + addi(cnt2, cnt2, isUL ? 4 : 8); + bgez(cnt2, TAIL); + xorr(tmp3, tmp1, tmp2); + bnez(tmp3, DIFFERENCE); + + // main loop + bind(NEXT_WORD); + if (str1_isL == str2_isL) { // LL or UU + add(t0, str1, cnt2); + ld(tmp1, Address(t0)); + add(t0, str2, cnt2); + ld(tmp2, Address(t0)); + addi(cnt2, cnt2, 8); + } else if (isLU) { // LU case + add(t0, str1, cnt1); + lwu(tmp1, Address(t0)); + add(t0, str2, cnt2); + ld(tmp2, Address(t0)); + addi(cnt1, cnt1, 4); + inflate_lo32(tmp3, tmp1); + mv(tmp1, tmp3); + addi(cnt2, cnt2, 8); + } else { // UL case + add(t0, str2, cnt2); + lwu(tmp2, Address(t0)); + add(t0, str1, cnt1); + ld(tmp1, Address(t0)); + inflate_lo32(tmp3, tmp2); + mv(tmp2, tmp3); + addi(cnt1, cnt1, 8); + addi(cnt2, cnt2, 4); + } + bgez(cnt2, TAIL); + + xorr(tmp3, tmp1, tmp2); + beqz(tmp3, NEXT_WORD); + j(DIFFERENCE); + bind(TAIL); + xorr(tmp3, tmp1, tmp2); + bnez(tmp3, DIFFERENCE); + // Last longword. In the case where length == 4 we compare the + // same longword twice, but that's still faster than another + // conditional branch. + if (str1_isL == str2_isL) { // LL or UU + ld(tmp1, Address(str1)); + ld(tmp2, Address(str2)); + } else if (isLU) { // LU case + lwu(tmp1, Address(str1)); + ld(tmp2, Address(str2)); + inflate_lo32(tmp3, tmp1); + mv(tmp1, tmp3); + } else { // UL case + lwu(tmp2, Address(str2)); + ld(tmp1, Address(str1)); + inflate_lo32(tmp3, tmp2); + mv(tmp2, tmp3); + } + bind(TAIL_CHECK); + xorr(tmp3, tmp1, tmp2); + beqz(tmp3, DONE); + + // Find the first different characters in the longwords and + // compute their difference. + bind(DIFFERENCE); + ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb + srl(tmp1, tmp1, result); + srl(tmp2, tmp2, result); + if (isLL) { + andi(tmp1, tmp1, 0xFF); + andi(tmp2, tmp2, 0xFF); + } else { + andi(tmp1, tmp1, 0xFFFF); + andi(tmp2, tmp2, 0xFFFF); + } + sub(result, tmp1, tmp2); + j(DONE); + } + + bind(STUB); + RuntimeAddress stub = NULL; + switch (ae) { + case StrIntrinsicNode::LL: + stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); + break; + case StrIntrinsicNode::UU: + stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); + break; + case StrIntrinsicNode::LU: + stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); + break; + case StrIntrinsicNode::UL: + stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); + break; + default: + ShouldNotReachHere(); + } + assert(stub.target() != NULL, "compare_long_string stub has not been generated"); + trampoline_call(stub); + j(DONE); + + bind(SHORT_STRING); + // Is the minimum length zero? + beqz(cnt2, DONE); + // arrange code to do most branches while loading and loading next characters + // while comparing previous + (this->*str1_load_chr)(tmp1, Address(str1), t0); + addi(str1, str1, str1_chr_size); + addi(cnt2, cnt2, -1); + beqz(cnt2, SHORT_LAST_INIT); + (this->*str2_load_chr)(cnt1, Address(str2), t0); + addi(str2, str2, str2_chr_size); + j(SHORT_LOOP_START); + bind(SHORT_LOOP); + addi(cnt2, cnt2, -1); + beqz(cnt2, SHORT_LAST); + bind(SHORT_LOOP_START); + (this->*str1_load_chr)(tmp2, Address(str1), t0); + addi(str1, str1, str1_chr_size); + (this->*str2_load_chr)(t0, Address(str2), t0); + addi(str2, str2, str2_chr_size); + bne(tmp1, cnt1, SHORT_LOOP_TAIL); + addi(cnt2, cnt2, -1); + beqz(cnt2, SHORT_LAST2); + (this->*str1_load_chr)(tmp1, Address(str1), t0); + addi(str1, str1, str1_chr_size); + (this->*str2_load_chr)(cnt1, Address(str2), t0); + addi(str2, str2, str2_chr_size); + beq(tmp2, t0, SHORT_LOOP); + sub(result, tmp2, t0); + j(DONE); + bind(SHORT_LOOP_TAIL); + sub(result, tmp1, cnt1); + j(DONE); + bind(SHORT_LAST2); + beq(tmp2, t0, DONE); + sub(result, tmp2, t0); + + j(DONE); + bind(SHORT_LAST_INIT); + (this->*str2_load_chr)(cnt1, Address(str2), t0); + addi(str2, str2, str2_chr_size); + bind(SHORT_LAST); + beq(tmp1, cnt1, DONE); + sub(result, tmp1, cnt1); + + bind(DONE); + + BLOCK_COMMENT("} string_compare"); +} + +void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, + Register tmp4, Register tmp5, Register tmp6, Register result, + Register cnt1, int elem_size) { + Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; + Register tmp1 = t0; + Register tmp2 = t1; + Register cnt2 = tmp2; // cnt2 only used in array length compare + Register elem_per_word = tmp6; + int log_elem_size = exact_log2(elem_size); + int length_offset = arrayOopDesc::length_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); + + assert(elem_size == 1 || elem_size == 2, "must be char or byte"); + assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); + li(elem_per_word, wordSize / elem_size); + + BLOCK_COMMENT("arrays_equals {"); + + // if (a1 == a2), return true + beq(a1, a2, SAME); + + mv(result, false); + beqz(a1, DONE); + beqz(a2, DONE); + lwu(cnt1, Address(a1, length_offset)); + lwu(cnt2, Address(a2, length_offset)); + bne(cnt2, cnt1, DONE); + beqz(cnt1, SAME); + + slli(tmp5, cnt1, 3 + log_elem_size); + sub(tmp5, zr, tmp5); + add(a1, a1, base_offset); + add(a2, a2, base_offset); + ld(tmp3, Address(a1, 0)); + ld(tmp4, Address(a2, 0)); + ble(cnt1, elem_per_word, SHORT); // short or same + + // Main 16 byte comparison loop with 2 exits + bind(NEXT_DWORD); { + ld(tmp1, Address(a1, wordSize)); + ld(tmp2, Address(a2, wordSize)); + sub(cnt1, cnt1, 2 * wordSize / elem_size); + blez(cnt1, TAIL); + bne(tmp3, tmp4, DONE); + ld(tmp3, Address(a1, 2 * wordSize)); + ld(tmp4, Address(a2, 2 * wordSize)); + add(a1, a1, 2 * wordSize); + add(a2, a2, 2 * wordSize); + ble(cnt1, elem_per_word, TAIL2); + } beq(tmp1, tmp2, NEXT_DWORD); + j(DONE); + + bind(TAIL); + xorr(tmp4, tmp3, tmp4); + xorr(tmp2, tmp1, tmp2); + sll(tmp2, tmp2, tmp5); + orr(tmp5, tmp4, tmp2); + j(IS_TMP5_ZR); + + bind(TAIL2); + bne(tmp1, tmp2, DONE); + + bind(SHORT); + xorr(tmp4, tmp3, tmp4); + sll(tmp5, tmp4, tmp5); + + bind(IS_TMP5_ZR); + bnez(tmp5, DONE); + + bind(SAME); + mv(result, true); + // That's it. + bind(DONE); + + BLOCK_COMMENT("} array_equals"); +} + +// Compare Strings - Register first = tmp3; +// For Strings we're passed the address of the first characters in a1 +// and a2 and the length in cnt1. +// elem_size is the element size in bytes: either 1 or 2. +// There are two implementations. For arrays >= 8 bytes, all +// comparisons (including the final one, which may overlap) are +// performed 8 bytes at a time. For strings < 8 bytes, we compare a +// halfword, then a short, and then a byte. - if (needle_con_cnt == -1) { - Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; +void MacroAssembler::string_equals(Register a1, Register a2, + Register result, Register cnt1, int elem_size) +{ + Label SAME, DONE, SHORT, NEXT_WORD; + Register tmp1 = t0; + Register tmp2 = t1; - sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); - bltz(t0, DOSHORT); + assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); + assert_different_registers(a1, a2, result, cnt1, t0, t1); - (this->*needle_load_1chr)(first, Address(needle), noreg); - slli(t0, needle_len, needle_chr_shift); - add(needle, needle, t0); - neg(nlen_neg, t0); - slli(t0, result_tmp, haystack_chr_shift); - add(haystack, haystack, t0); - neg(hlen_neg, t0); + BLOCK_COMMENT("string_equals {"); - bind(FIRST_LOOP); - add(t0, haystack, hlen_neg); - (this->*haystack_load_1chr)(ch2, Address(t0), noreg); - beq(first, ch2, STR1_LOOP); + mv(result, false); - bind(STR2_NEXT); - add(hlen_neg, hlen_neg, haystack_chr_size); - blez(hlen_neg, FIRST_LOOP); - j(NOMATCH); + // Check for short strings, i.e. smaller than wordSize. + sub(cnt1, cnt1, wordSize); + bltz(cnt1, SHORT); - bind(STR1_LOOP); - add(nlen_tmp, nlen_neg, needle_chr_size); - add(hlen_tmp, hlen_neg, haystack_chr_size); - bgez(nlen_tmp, MATCH); + // Main 8 byte comparison loop. + bind(NEXT_WORD); { + ld(tmp1, Address(a1, 0)); + add(a1, a1, wordSize); + ld(tmp2, Address(a2, 0)); + add(a2, a2, wordSize); + sub(cnt1, cnt1, wordSize); + bne(tmp1, tmp2, DONE); + } bgtz(cnt1, NEXT_WORD); - bind(STR1_NEXT); - add(ch1, needle, nlen_tmp); - (this->*needle_load_1chr)(ch1, Address(ch1), noreg); - add(ch2, haystack, hlen_tmp); - (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); - bne(ch1, ch2, STR2_NEXT); - add(nlen_tmp, nlen_tmp, needle_chr_size); - add(hlen_tmp, hlen_tmp, haystack_chr_size); - bltz(nlen_tmp, STR1_NEXT); - j(MATCH); + // Last longword. In the case where length == 4 we compare the + // same longword twice, but that's still faster than another + // conditional branch. + // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when + // length == 4. + add(tmp1, a1, cnt1); + ld(tmp1, Address(tmp1, 0)); + add(tmp2, a2, cnt1); + ld(tmp2, Address(tmp2, 0)); + bne(tmp1, tmp2, DONE); + j(SAME); - bind(DOSHORT); - if (needle_isL == haystack_isL) { - sub(t0, needle_len, 2); - bltz(t0, DO1); - bgtz(t0, DO3); - } - } + bind(SHORT); + Label TAIL03, TAIL01; - if (needle_con_cnt == 4) { - Label CH1_LOOP; - (this->*load_4chr)(ch1, Address(needle), noreg); - sub(result_tmp, haystack_len, 4); - slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp - add(haystack, haystack, tmp3); - neg(hlen_neg, tmp3); + // 0-7 bytes left. + andi(t0, cnt1, 4); + beqz(t0, TAIL03); + { + lwu(tmp1, Address(a1, 0)); + add(a1, a1, 4); + lwu(tmp2, Address(a2, 0)); + add(a2, a2, 4); + bne(tmp1, tmp2, DONE); + } - bind(CH1_LOOP); - add(ch2, haystack, hlen_neg); - (this->*load_4chr)(ch2, Address(ch2), noreg); - beq(ch1, ch2, MATCH); - add(hlen_neg, hlen_neg, haystack_chr_size); - blez(hlen_neg, CH1_LOOP); - j(NOMATCH); + bind(TAIL03); + // 0-3 bytes left. + andi(t0, cnt1, 2); + beqz(t0, TAIL01); + { + lhu(tmp1, Address(a1, 0)); + add(a1, a1, 2); + lhu(tmp2, Address(a2, 0)); + add(a2, a2, 2); + bne(tmp1, tmp2, DONE); } - if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { - Label CH1_LOOP; - BLOCK_COMMENT("string_indexof DO2 {"); - bind(DO2); - (this->*load_2chr)(ch1, Address(needle), noreg); - if (needle_con_cnt == 2) { - sub(result_tmp, haystack_len, 2); + bind(TAIL01); + if (elem_size == 1) { // Only needed when comparing 1-byte elements + // 0-1 bytes left. + andi(t0, cnt1, 1); + beqz(t0, SAME); + { + lbu(tmp1, a1, 0); + lbu(tmp2, a2, 0); + bne(tmp1, tmp2, DONE); } - slli(tmp3, result_tmp, haystack_chr_shift); - add(haystack, haystack, tmp3); - neg(hlen_neg, tmp3); - - bind(CH1_LOOP); - add(tmp3, haystack, hlen_neg); - (this->*load_2chr)(ch2, Address(tmp3), noreg); - beq(ch1, ch2, MATCH); - add(hlen_neg, hlen_neg, haystack_chr_size); - blez(hlen_neg, CH1_LOOP); - j(NOMATCH); - BLOCK_COMMENT("} string_indexof DO2"); } - if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { - Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; - BLOCK_COMMENT("string_indexof DO3 {"); + // Arrays are equal. + bind(SAME); + mv(result, true); - bind(DO3); - (this->*load_2chr)(first, Address(needle), noreg); - (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); - if (needle_con_cnt == 3) { - sub(result_tmp, haystack_len, 3); - } - slli(hlen_tmp, result_tmp, haystack_chr_shift); - add(haystack, haystack, hlen_tmp); - neg(hlen_neg, hlen_tmp); + // That's it. + bind(DONE); + BLOCK_COMMENT("} string_equals"); +} - bind(FIRST_LOOP); - add(ch2, haystack, hlen_neg); - (this->*load_2chr)(ch2, Address(ch2), noreg); - beq(first, ch2, STR1_LOOP); +typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); +typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, + bool is_far, bool is_unordered); - bind(STR2_NEXT); - add(hlen_neg, hlen_neg, haystack_chr_size); - blez(hlen_neg, FIRST_LOOP); - j(NOMATCH); +static conditional_branch_insn conditional_branches[] = +{ + /* SHORT branches */ + (conditional_branch_insn)&Assembler::beq, + (conditional_branch_insn)&Assembler::bgt, + NULL, // BoolTest::overflow + (conditional_branch_insn)&Assembler::blt, + (conditional_branch_insn)&Assembler::bne, + (conditional_branch_insn)&Assembler::ble, + NULL, // BoolTest::no_overflow + (conditional_branch_insn)&Assembler::bge, - bind(STR1_LOOP); - add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); - add(ch2, haystack, hlen_tmp); - (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); - bne(ch1, ch2, STR2_NEXT); - j(MATCH); - BLOCK_COMMENT("} string_indexof DO3"); - } + /* UNSIGNED branches */ + (conditional_branch_insn)&Assembler::beq, + (conditional_branch_insn)&Assembler::bgtu, + NULL, + (conditional_branch_insn)&Assembler::bltu, + (conditional_branch_insn)&Assembler::bne, + (conditional_branch_insn)&Assembler::bleu, + NULL, + (conditional_branch_insn)&Assembler::bgeu +}; - if (needle_con_cnt == -1 || needle_con_cnt == 1) { - Label DO1_LOOP; +static float_conditional_branch_insn float_conditional_branches[] = +{ + /* FLOAT SHORT branches */ + (float_conditional_branch_insn)&MacroAssembler::float_beq, + (float_conditional_branch_insn)&MacroAssembler::float_bgt, + NULL, // BoolTest::overflow + (float_conditional_branch_insn)&MacroAssembler::float_blt, + (float_conditional_branch_insn)&MacroAssembler::float_bne, + (float_conditional_branch_insn)&MacroAssembler::float_ble, + NULL, // BoolTest::no_overflow + (float_conditional_branch_insn)&MacroAssembler::float_bge, - BLOCK_COMMENT("string_indexof DO1 {"); - bind(DO1); - (this->*needle_load_1chr)(ch1, Address(needle), noreg); - sub(result_tmp, haystack_len, 1); - mv(tmp3, result_tmp); - if (haystack_chr_shift) { - slli(tmp3, result_tmp, haystack_chr_shift); - } - add(haystack, haystack, tmp3); - neg(hlen_neg, tmp3); + /* DOUBLE SHORT branches */ + (float_conditional_branch_insn)&MacroAssembler::double_beq, + (float_conditional_branch_insn)&MacroAssembler::double_bgt, + NULL, + (float_conditional_branch_insn)&MacroAssembler::double_blt, + (float_conditional_branch_insn)&MacroAssembler::double_bne, + (float_conditional_branch_insn)&MacroAssembler::double_ble, + NULL, + (float_conditional_branch_insn)&MacroAssembler::double_bge +}; - bind(DO1_LOOP); - add(tmp3, haystack, hlen_neg); - (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); - beq(ch1, ch2, MATCH); - add(hlen_neg, hlen_neg, haystack_chr_size); - blez(hlen_neg, DO1_LOOP); - BLOCK_COMMENT("} string_indexof DO1"); - } +void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { + assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), + "invalid conditional branch index"); + (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); +} - bind(NOMATCH); - mv(result, -1); - j(DONE); +// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use +// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). +void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { + assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), + "invalid float conditional branch index"); + int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask); + (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, + (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); +} - bind(MATCH); - srai(t0, hlen_neg, haystack_chr_shift); - add(result, result_tmp, t0); +void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { + switch (cmpFlag) { + case BoolTest::eq: + case BoolTest::le: + beqz(op1, L, is_far); + break; + case BoolTest::ne: + case BoolTest::gt: + bnez(op1, L, is_far); + break; + default: + ShouldNotReachHere(); + } +} - bind(DONE); +void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { + switch (cmpFlag) { + case BoolTest::eq: + beqz(op1, L, is_far); + break; + case BoolTest::ne: + bnez(op1, L, is_far); + break; + default: + ShouldNotReachHere(); + } } void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { @@ -5140,3 +5898,4 @@ void MacroAssembler::reduce_minmax_FD_v(FloatRegister dst, } #endif // COMPILER2 + diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index e1474e49276..cb62bb0d875 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,6 +28,7 @@ #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP #include "asm/assembler.hpp" +#include "metaprogramming/enableIf.hpp" // MacroAssembler extends Assembler by frequently used macros. // @@ -44,21 +45,52 @@ class MacroAssembler: public Assembler { void safepoint_poll(Label& slow_path); void safepoint_poll_acquire(Label& slow_path); + // Biased locking support + // lock_reg and obj_reg must be loaded up with the appropriate values. + // swap_reg is killed. + // tmp_reg must be supplied and must not be rscratch1 or rscratch2 + // Optional slow case is for implementations (interpreter and C1) which branch to + // slow case directly. Leaves condition codes set for C2's Fast_Lock node. + // Returns offset of first potentially-faulting instruction for null + // check info (currently consumed only by C1). If + // swap_reg_contains_mark is true then returns -1 as it is assumed + // the calling code has already passed any potential faults. + int biased_locking_enter(Register lock_reg, Register obj_reg, + Register swap_reg, Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, Label* slow_case = NULL, + BiasedLockingCounters* counters = NULL, + Register flag = noreg); + void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done, Register flag = noreg); + + // Helper functions for statistics gathering. + // Unconditional atomic increment. + void atomic_incw(Register counter_addr, Register tmp); + void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { + la(tmp1, counter_addr); + atomic_incw(tmp1, tmp2); + } + + // Place a fence.i after code may have been modified due to a safepoint. + void safepoint_ifence(); + // Alignment - void align(int modulus); + void align(int modulus, int extra_offset = 0); // Stack frame creation/removal + // Note that SP must be updated to the right place before saving/restoring RA and FP + // because signal based thread suspend/resume could happen asynchronously. void enter() { addi(sp, sp, - 2 * wordSize); - sd(lr, Address(sp, wordSize)); + sd(ra, Address(sp, wordSize)); sd(fp, Address(sp)); - mv(fp, sp); + addi(fp, sp, 2 * wordSize); } void leave() { - mv(sp, fp); + addi(sp, fp, - 2 * wordSize); ld(fp, Address(sp)); - ld(lr, Address(sp, wordSize)); + ld(ra, Address(sp, wordSize)); addi(sp, sp, 2 * wordSize); } @@ -133,9 +165,9 @@ class MacroAssembler: public Assembler { void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3); // last Java Frame (fills frame anchor) - void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register temp); - void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register temp); - void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc,Register temp); + void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp); + void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp); + void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp); // thread in the default location (xthread) void reset_last_Java_frame(bool clear_fp); @@ -216,6 +248,8 @@ class MacroAssembler: public Assembler { // stored using routines that take a jobject. void store_heap_oop_null(Address dst); + void load_prototype_header(Register dst, Register src); + // This dummy is to prevent a call to store_heap_oop from // converting a zero (linke NULL) into a Register by giving // the compiler two choices it can't resolve @@ -231,6 +265,7 @@ class MacroAssembler: public Assembler { virtual void null_check(Register reg, int offset = -1); static bool needs_explicit_null_check(intptr_t offset); + static bool uses_implicit_null_check(void* address); // idiv variant which deals with MINLONG as dividend and -1 as divisor int corrected_idivl(Register result, Register rs1, Register rs2, @@ -243,7 +278,7 @@ class MacroAssembler: public Assembler { Register intf_klass, RegisterOrConstant itable_index, Register method_result, - Register scan_temp, + Register scan_tmp, Label& no_such_interface, bool return_method = true); @@ -260,21 +295,22 @@ class MacroAssembler: public Assembler { Address form_address(Register Rd, Register base, long byte_offset); // allocation - void eden_allocate( + void tlab_allocate( Register obj, // result: pointer to object after successful allocation Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise int con_size_in_bytes, // object size in bytes if known at compile time Register tmp1, // temp register - Label& slow_case, // continuation point if fast allocation fails + Register tmp2, // temp register + Label& slow_case, // continuation point of fast allocation fails bool is_far = false ); - void tlab_allocate( + + void eden_allocate( Register obj, // result: pointer to object after successful allocation Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise int con_size_in_bytes, // object size in bytes if known at compile time - Register tmp1, // temp register - Register tmp2, // temp register - Label& slow_case, // continuation point of fast allocation fails + Register tmp, // temp register + Label& slow_case, // continuation point if fast allocation fails bool is_far = false ); @@ -283,10 +319,10 @@ class MacroAssembler: public Assembler { // The fast path produces a tri-state answer: yes / no / maybe-slow. // One of the three labels can be NULL, meaning take the fall-through. // If super_check_offset is -1, the value is loaded up from super_klass. - // No registers are killed, except temp_reg + // No registers are killed, except tmp_reg void check_klass_subtype_fast_path(Register sub_klass, Register super_klass, - Register temp_reg, + Register tmp_reg, Label* L_success, Label* L_failure, Label* L_slow_path, @@ -294,18 +330,18 @@ class MacroAssembler: public Assembler { // The reset of the type cehck; must be wired to a corresponding fast path. // It does not repeat the fast path logic, so don't use it standalone. - // The temp_reg and temp2_reg can be noreg, if no temps are avaliable. + // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable. // Updates the sub's secondary super cache as necessary. void check_klass_subtype_slow_path(Register sub_klass, Register super_klass, - Register temp_reg, - Register temp2_reg, + Register tmp1_reg, + Register tmp2_reg, Label* L_success, Label* L_failure); void check_klass_subtype(Register sub_klass, Register super_klass, - Register temp_reg, + Register tmp_reg, Label& L_success); Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); @@ -386,17 +422,11 @@ class MacroAssembler: public Assembler { void should_not_reach_here() { stop("should not reach here"); } - virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, - Register tmp, - int offset) { - return RegisterOrConstant(tmp); - } - static address target_addr_for_insn(address insn_addr); // Required platform-specific helpers for Label::patch_instructions. // They _shadow_ the declarations in AbstractAssembler, which are undefined. - static int pd_patch_instruction_size(address branch, address target) ; + static int pd_patch_instruction_size(address branch, address target); static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) { pd_patch_instruction_size(branch, target); } @@ -423,16 +453,14 @@ class MacroAssembler: public Assembler { void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); public: - // enum used for riscv64--x86 linkage to define return type of x86 function - enum ret_type { ret_type_void, ret_type_integral, ret_type_float, ret_type_double}; - // Standard pseudoinstruction void nop(); - void mv(Register Rd, Register Rs) ; + void mv(Register Rd, Register Rs); void notr(Register Rd, Register Rs); void neg(Register Rd, Register Rs); void negw(Register Rd, Register Rs); - void sext_w(Register Rd, Register Rs); // mv Rd[31:0], Rs[31:0] + void sext_w(Register Rd, Register Rs); + void zext_b(Register Rd, Register Rs); void seqz(Register Rd, Register Rs); // set if = zero void snez(Register Rd, Register Rs); // set if != zero void sltz(Register Rd, Register Rs); // set if < zero @@ -508,8 +536,6 @@ class MacroAssembler: public Assembler { void pop_reg(Register Rd); int push_reg(unsigned int bitset, Register stack); int pop_reg(unsigned int bitset, Register stack); - void push_fp(RegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } - void pop_fp(RegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } // Push and pop everything that might be clobbered by a native // runtime call except t0 and t1. (They are always @@ -540,11 +566,17 @@ class MacroAssembler: public Assembler { } // mv - void mv(Register Rd, int64_t imm64); - void mv(Register Rd, int imm); - void mvw(Register Rd, int32_t imm32); + inline void mv(Register Rd, int imm64) { li(Rd, (int64_t)imm64); } + inline void mv(Register Rd, long imm64) { li(Rd, (int64_t)imm64); } + inline void mv(Register Rd, long long imm64) { li(Rd, (int64_t)imm64); } + inline void mv(Register Rd, unsigned int imm64) { li(Rd, (int64_t)imm64); } + inline void mv(Register Rd, unsigned long imm64) { li(Rd, (int64_t)imm64); } + inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); } + + inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } + void mv(Register Rd, Address dest); - void mv(Register Rd, address addr); + void mv(Register Rd, address dest); void mv(Register Rd, RegisterOrConstant src); // logic @@ -552,28 +584,22 @@ class MacroAssembler: public Assembler { void orrw(Register Rd, Register Rs1, Register Rs2); void xorrw(Register Rd, Register Rs1, Register Rs2); - // grev - void reverseb16(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2= t1); // reverse bytes in 16-bit and move to lower - void reverseh32(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2= t1); // reverse half-words in 32-bit and move to lower - void grevh(Register Rd, Register Rs, Register Rtmp = t0); // basic reverse bytes in 16-bit halfwords, sign-extend - void grev16w(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1); // reverse bytes in 16-bit halfwords(32), sign-extend - void grevw(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1); // reverse bytes(32), sign-extend - void grev16(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2= t1); // reverse bytes in 16-bit halfwords - void grev32(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2= t1); // reverse bytes in 32-bit words - void grev(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1); // reverse bytes in 64-bit double-words - void grevhu(Register Rd, Register Rs, Register Rtmp = t0); // basic reverse bytes in 16-bit halfwords, zero-extend - void grev16wu(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1); // reverse bytes in 16-bit halfwords(32), zero-extend - void grevwu(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1); // reverse bytes(32), zero-extend - - - void andi(Register Rd, Register Rn, int64_t increment, Register temp = t0); + // revb + void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend + void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend + void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend + void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend + void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower + void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword + void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word + void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword + + void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); + void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0); void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1); - // Support for serializing memory accesses between threads - void serialize_memory(Register thread, Register tmp1, Register tmp2); - void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail); - void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail) ; + void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail); void cmpxchg(Register addr, Register expected, Register new_val, enum operand_size size, @@ -613,35 +639,10 @@ class MacroAssembler: public Assembler { void atomic_xchgwu(Register prev, Register newv, Register addr); void atomic_xchgalwu(Register prev, Register newv, Register addr); - // Biased locking support - // lock_reg and obj_reg must be loaded up with the appropriate values. - // swap_reg is killed. - // tmp_reg must be supplied and must not be t0 or t1 - // Optional slow case is for implementations (interpreter and C1) which branch to - // slow case directly. Leaves condition codes set for C2's Fast_Lock node. - // Returns offset of first potentially-faulting instruction for null - // check info (currently consumed only by C1). If - // swap_reg_contains_mark is true then returns -1 as it is assumed - // the calling code has already passed any potential faults. - int biased_locking_enter(Register lock_reg, Register obj_reg, - Register swap_reg, Register tmp_reg, - bool swap_reg_contains_mark, - Label& done, Label* slow_case = NULL, - BiasedLockingCounters* counters = NULL, - Register flag = noreg); - void biased_locking_exit(Register obj_reg, Register temp_reg, Label& done, Register flag = noreg); - static bool far_branches() { return ReservedCodeCacheSize > branch_range; } - //atomic - void atomic_incw(Register counter_addr, Register tmp1); - void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { - la(tmp1, counter_addr); - atomic_incw(tmp1, tmp2); - } - // Jumps that can reach anywhere in the code cache. // Trashes tmp. void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); @@ -660,8 +661,8 @@ class MacroAssembler: public Assembler { void bang_stack_with_offset(int offset) { // stack grows down, caller passes positive offset assert(offset > 0, "must bang with negative offset"); - sub(t1, sp, offset); - sd(zr, Address(t1)); + sub(t0, sp, offset); + sd(zr, Address(t0)); } void la_patchable(Register reg1, const Address &dest, int32_t &offset); @@ -672,53 +673,19 @@ class MacroAssembler: public Assembler { #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) -#ifdef COMPILER2 - void spill(Register r, bool is64, int offset) { - is64 ? sd(r, Address(sp, offset)) - : sw(r, Address(sp, offset)); - } - - void spill(FloatRegister f, bool is64, int offset) { - is64 ? fsd(f, Address(sp, offset)) - : fsw(f, Address(sp, offset)); - } - - void unspill(Register r, bool is64, int offset) { - is64 ? ld(r, Address(sp, offset)) - : lw(r, Address(sp, offset)); - } - - void unspillu(Register r, bool is64, int offset) { - is64 ? ld(r, Address(sp, offset)) - : lwu(r, Address(sp, offset)); - } - - void unspill(FloatRegister f, bool is64, int offset) { - is64 ? fld(f, Address(sp, offset)) - : flw(f, Address(sp, offset)); - } -#endif // COMPILER2 - - void clear_upper_bits(Register r, unsigned upper_bits) { - assert(upper_bits < 64, "bit count to clear must be less than 64"); - - int sig_bits = 64 - upper_bits; // significance bits - if (sig_bits < 12) { - andi(r, r, (1UL << sig_bits) - 1); - } else { - zero_ext(r, r, upper_bits); - } - } - // Frame creation and destruction shared between JITs. void build_frame(int framesize); void remove_frame(int framesize); void reserved_stack_check(); + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset); + void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype); - address read_polling_page(Register r, address page, relocInfo::relocType rtype); - address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); + void read_polling_page(Register r, address page, relocInfo::relocType rtype); + void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); address trampoline_call(Address entry, CodeBuffer* cbuf = NULL); address ic_call(address entry, jint method_index = 0); @@ -728,79 +695,61 @@ class MacroAssembler: public Assembler { void cmpptr(Register src1, Address src2, Label& equal); - void load_method_holder(Register holder, Register method); - - void oop_beq(Register obj1, Register obj2, Label& L_equal, bool is_far = false); - void oop_bne(Register obj1, Register obj2, Label& L_nequal, bool is_far = false); - -#ifdef COMPILER2 - void arrays_equals(Register r1, Register r2, - Register tmp3, Register tmp4, - Register tmp5, Register tmp6, - Register result, Register cnt1, - int elem_size); - - void string_equals(Register r1, Register r2, - Register result, Register cnt1, - int elem_size); - - void string_compare(Register str1, Register str2, - Register cnt1, Register cnt2, Register result, - Register tmp1, Register tmp2, Register tmp3, - int ae); -#endif - - void compute_index(Register str1, Register trailing_zero, Register match_mask, + void compute_index(Register str1, Register trailing_zeros, Register match_mask, Register result, Register char_tmp, Register tmp, bool haystack_isL); void compute_match_mask(Register src, Register pattern, Register match_mask, Register mask1, Register mask2); - void inflate_lo32(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1); - void inflate_hi32(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1); +#ifdef COMPILER2 + void mul_add(Register out, Register in, Register offset, + Register len, Register k, Register tmp); + void cad(Register dst, Register src1, Register src2, Register carry); + void cadc(Register dst, Register src1, Register src2, Register carry); + void adc(Register dst, Register src1, Register src2, Register carry); + void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, + Register src1, Register src2, Register carry); + void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx); + void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx); + void multiply_128_x_128_loop(Register y, Register z, + Register carry, Register carry2, + Register idx, Register jdx, + Register yz_idx1, Register yz_idx2, + Register tmp, Register tmp3, Register tmp4, + Register tmp6, Register product_hi); + void multiply_to_len(Register x, Register xlen, Register y, Register ylen, + Register z, Register zlen, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, + Register tmp5, Register tmp6, Register product_hi); +#endif + + void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); + void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); - void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register Rtmp1 = t0, Register Rtmp2 = t1); + void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1); void zero_words(Register base, u_int64_t cnt); address zero_words(Register ptr, Register cnt); void fill_words(Register base, Register cnt, Register value); - void zero_memory(Register addr, Register len, Register tmp1); + void zero_memory(Register addr, Register len, Register tmp); // shift left by shamt and add void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt); -#ifdef COMPILER2 - // refer to conditional_branches and float_conditional_branches - static const int bool_test_bits = 3; - static const int neg_cond_bits = 2; - static const int unsigned_branch_mask = 1 << bool_test_bits; - static const int double_branch_mask = 1 << bool_test_bits; - - // cmp - void cmp_branch(int cmpFlag, - Register op1, Register op2, - Label& label, bool is_far = false); - - void float_cmp_branch(int cmpFlag, - FloatRegister op1, FloatRegister op2, - Label& label, bool is_far = false); - - void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op, - Label& L, bool is_far = false); - - void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op, - Label& L, bool is_far = false); - -#endif - // Here the float instructions with safe deal with some exceptions. // e.g. convert from NaN, +Inf, -Inf to int, float, double // will trigger exception, we need to deal with these situations // to get correct results. - void fcvt_w_s_safe(Register dst, FloatRegister src, Register temp = t0); - void fcvt_l_s_safe(Register dst, FloatRegister src, Register temp = t0); - void fcvt_w_d_safe(Register dst, FloatRegister src, Register temp = t0); - void fcvt_l_d_safe(Register dst, FloatRegister src, Register temp = t0); + void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0); + void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0); + void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0); + void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0); // vector load/store unit-stride instructions void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { @@ -847,13 +796,13 @@ class MacroAssembler: public Assembler { sltu(Rt, zr, Rt); break; case T_CHAR : - zero_ext(Rt, Rt, registerSize - 16); + zero_extend(Rt, Rt, 16); break; case T_BYTE : - sign_ext(Rt, Rt, registerSize - 8); + sign_extend(Rt, Rt, 8); break; case T_SHORT : - sign_ext(Rt, Rt, registerSize - 16); + sign_extend(Rt, Rt, 16); break; case T_INT : addw(Rt, Rt, zr); @@ -871,8 +820,8 @@ class MacroAssembler: public Assembler { void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); // Zero/Sign-extend - void zero_ext(Register dst, Register src, int clear_bits); - void sign_ext(Register dst, Register src, int clear_bits); + void zero_extend(Register dst, Register src, int bits); + void sign_extend(Register dst, Register src, int bits); // compare src1 and src2 and get -1/0/1 in dst. // if [src1 > src2], dst = 1; @@ -888,7 +837,57 @@ class MacroAssembler: public Assembler { void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); void vfneg_v(VectorRegister vd, VectorRegister vs); -#ifdef COMPILER2 +private: + +#ifdef ASSERT + // Macro short-hand support to clean-up after a failed call to trampoline + // call generation (see trampoline_call() below), when a set of Labels must + // be reset (before returning). +#define reset_labels1(L1) L1.reset() +#define reset_labels2(L1, L2) L1.reset(); L2.reset() +#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3) +#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5) +#endif + void repne_scan(Register addr, Register value, Register count, Register tmp); + + // Return true if an address is within the 48-bit RISCV64 address space. + bool is_valid_riscv64_address(address addr) { + // sv48: must have bits 63–48 all equal to bit 47 + return ((uintptr_t)addr >> 47) == 0; + } + + void ld_constant(Register dest, const Address &const_addr) { + if (NearCpool) { + ld(dest, const_addr); + } else { + int32_t offset = 0; + la_patchable(dest, InternalAddress(const_addr.target()), offset); + ld(dest, Address(dest, offset)); + } + } + + int bitset_to_regs(unsigned int bitset, unsigned char* regs); + Address add_memory_helper(const Address dst); + + void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); + void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); + +public: + void string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, + Register tmp1, Register tmp2, Register tmp3, + int ae); + + void string_indexof_char_short(Register str1, Register cnt1, + Register ch, Register result, + bool isL); + + void string_indexof_char(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + bool isL); + void string_indexof(Register str1, Register str2, Register cnt1, Register cnt2, Register tmp1, Register tmp2, @@ -902,20 +901,71 @@ class MacroAssembler: public Assembler { Register tmp3, Register tmp4, int needle_con_cnt, Register result, int ae); + void arrays_equals(Register r1, Register r2, + Register tmp3, Register tmp4, + Register tmp5, Register tmp6, + Register result, Register cnt1, + int elem_size); + + void string_equals(Register r1, Register r2, + Register result, Register cnt1, + int elem_size); + + // refer to conditional_branches and float_conditional_branches + static const int bool_test_bits = 3; + static const int neg_cond_bits = 2; + static const int unsigned_branch_mask = 1 << bool_test_bits; + static const int double_branch_mask = 1 << bool_test_bits; + + // cmp + void cmp_branch(int cmpFlag, + Register op1, Register op2, + Label& label, bool is_far = false); + + void float_cmp_branch(int cmpFlag, + FloatRegister op1, FloatRegister op2, + Label& label, bool is_far = false); + + void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op, + Label& L, bool is_far = false); + + void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op, + Label& L, bool is_far = false); void enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src); - void minmax_FD(FloatRegister dst, - FloatRegister src1, FloatRegister src2, - bool is_double, bool is_min); + void spill(Register r, bool is64, int offset) { + is64 ? sd(r, Address(sp, offset)) + : sw(r, Address(sp, offset)); + } + + void spill(FloatRegister f, bool is64, int offset) { + is64 ? fsd(f, Address(sp, offset)) + : fsw(f, Address(sp, offset)); + } void spill(VectorRegister v, int offset) { add(t0, sp, offset); vs1r_v(v, t0); } + void unspill(Register r, bool is64, int offset) { + is64 ? ld(r, Address(sp, offset)) + : lw(r, Address(sp, offset)); + } + + void unspillu(Register r, bool is64, int offset) { + is64 ? ld(r, Address(sp, offset)) + : lwu(r, Address(sp, offset)); + } + + void unspill(FloatRegister f, bool is64, int offset) { + is64 ? fld(f, Address(sp, offset)) + : flw(f, Address(sp, offset)); + } + void unspill(VectorRegister v, int offset) { add(t0, sp, offset); vl1r_v(v, t0); @@ -927,6 +977,10 @@ class MacroAssembler: public Assembler { spill(v0, dst_offset); } + void minmax_FD(FloatRegister dst, + FloatRegister src1, FloatRegister src2, + bool is_double, bool is_min); + private: void element_compare(Register r1, Register r2, Register result, Register cnt, @@ -972,45 +1026,6 @@ class MacroAssembler: public Assembler { VectorRegister tmp1, VectorRegister tmp2, bool is_double, bool is_min); -#endif // COMPILER2 - -private: - -#ifdef ASSERT - // Template short-hand support to clean-up after a failed call to trampoline - // call generation (see trampoline_call() below), when a set of Labels must - // be reset (before returning). - #define reset_labels1(L1) L1.reset() - #define reset_labels2(L1, L2) L1.reset(); L2.reset() - #define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3) - #define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5) -#endif - void load_prototype_header(Register dst, Register src); - void repne_scan(Register addr, Register value, Register count, Register temp); - - // Return true if an addres is within the 48-bit Riscv64 address - // space. - bool is_valid_riscv64_address(address addr) { - // sv48: must have bits 63–48 all equal to bit 47 - return ((uintptr_t)addr >> 47) == 0; - } - - void ld_constant(Register dest, const Address &const_addr) { - if (NearCpool) { - ld(dest, const_addr); - } else { - int32_t offset = 0; - la_patchable(dest, InternalAddress(const_addr.target()), offset); - ld(dest, Address(dest, offset)); - } - } - - int bitset_to_fregs(unsigned int bitset, unsigned char* regs); - int bitset_to_regs(unsigned int bitset, unsigned char* regs); - Address add_memory_helper(const Address dst); - - void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); - void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); }; #ifdef ASSERT @@ -1034,4 +1049,5 @@ class SkipIfEqual { SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); ~SkipIfEqual(); }; + #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp index ced89faef1d..ef968ccd96d 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp @@ -1,7 +1,6 @@ /* * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,4 +26,6 @@ #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP #define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP +// Still empty. + #endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp deleted file mode 100644 index e32cce0a32a..00000000000 --- a/src/hotspot/cpu/riscv/matcher_riscv.hpp +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef CPU_RISCV_MATCHER_RISCV_HPP -#define CPU_RISCV_MATCHER_RISCV_HPP - - // Defined within class Matcher - - // false => size gets scaled to BytesPerLong, ok. - static const bool init_array_count_is_in_bytes = false; - - // riscv64 supports misaligned vectors store/load. - static constexpr bool misaligned_vectors_ok() { - return false; - } - - // Whether code generation need accurate ConvI2L types. - static const bool convi2l_type_required = false; - - // Does the CPU require late expand (see block.cpp for description of late expand)? - static const bool require_postalloc_expand = false; - - // Do we need to mask the count passed to shift instructions or does - // the cpu only look at the lower 5/6 bits anyway? - static const bool need_masked_shift_count = false; - - // No support for generic vector operands. - static const bool supports_generic_vector_operands = false; - - static constexpr bool isSimpleConstant64(jlong value) { - // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. - // Probably always true, even if a temp register is required. - return true; - } - - // Use conditional move (CMOVL) - static constexpr int long_cmove_cost() { - // long cmoves are no more expensive than int cmoves - return 0; - } - - static constexpr int float_cmove_cost() { - // float cmoves are no more expensive than int cmoves - return 0; - } - - // This affects two different things: - // - how Decode nodes are matched - // - how ImplicitNullCheck opportunities are recognized - // If true, the matcher will try to remove all Decodes and match them - // (as operands) into nodes. NullChecks are not prepared to deal with - // Decodes by final_graph_reshaping(). - // If false, final_graph_reshaping() forces the decode behind the Cmp - // for a NullCheck. The matcher matches the Decode node into a register. - // Implicit_null_check optimization moves the Decode along with the - // memory operation back up before the NullCheck. - static bool narrow_oop_use_complex_address() { - return Universe::narrow_oop_shift() == 0; - } - - static bool narrow_klass_use_complex_address() { - return false; - } - - static bool const_oop_prefer_decode() { - // Prefer ConN+DecodeN over ConP in simple compressed oops mode. - return Universe::narrow_oop_base() == NULL; - } - - static bool const_klass_prefer_decode() { - // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. - return Universe::narrow_klass_base() == NULL; - } - - // Is it better to copy float constants, or load them directly from - // memory? Intel can load a float constant from a direct address, - // requiring no extra registers. Most RISCs will have to materialize - // an address into a register first, so they would do better to copy - // the constant from stack. - static const bool rematerialize_float_constants = false; - - // If CPU can load and store mis-aligned doubles directly then no - // fixup is needed. Else we split the double into 2 integer pieces - // and move it piece-by-piece. Only happens when passing doubles into - // C code as the Java calling convention forces doubles to be aligned. - static const bool misaligned_doubles_ok = true; - - // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. - static const bool strict_fp_requires_explicit_rounding = false; - - // Are floats converted to double when stored to stack during - // deoptimization? - static constexpr bool float_in_double() { return false; } - - // Do ints take an entire long register or just half? - // The relevant question is how the int is callee-saved: - // the whole long is written but de-opt'ing will have to extract - // the relevant 32 bits. - static const bool int_in_long = true; - - // Does the CPU supports vector variable shift instructions? - static constexpr bool supports_vector_variable_shifts(void) { - return false; - } - - // Does the CPU supports vector variable rotate instructions? - static constexpr bool supports_vector_variable_rotates(void) { - return false; - } - - // Some microarchitectures have mask registers used on vectors - static const bool has_predicated_vectors(void) { - return false; - } - - // true means we have fast l2f convers - // false means that conversion is done by runtime call - static constexpr bool convL2FSupported(void) { - return true; - } - -#endif // CPU_RISCV_MATCHER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp index 26eedd09b1d..fd907f77afb 100644 --- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,6 +34,7 @@ #include "prims/methodHandles.hpp" #include "runtime/flags/flagSetting.hpp" #include "runtime/frame.inline.hpp" +#include "runtime/stubRoutines.hpp" #define __ _masm-> @@ -91,7 +92,7 @@ void MethodHandles::verify_klass(MacroAssembler* _masm, BLOCK_COMMENT("} verify_klass"); } -void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { } +void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {} #endif //ASSERT @@ -154,7 +155,7 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, sizeof(u2), /*is_signed*/ false); Label L; __ ld(t0, __ argument_address(temp2, -1)); - __ oop_beq(recv, t0, L); + __ beq(recv, t0, L); __ ld(x10, __ argument_address(temp2, -1)); __ ebreak(); __ BIND(L); @@ -181,8 +182,9 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted) // xmethod: Method* - // x13: argument locator (parameter slot count, added to rsp) + // x13: argument locator (parameter slot count, added to sp) // x11: used as temp to hold mh or receiver + // x10, x29: garbage temps, blown away Register argp = x13; // argument list ptr, live on error paths Register mh = x11; // MH receiver; dies quickly and is recycled @@ -232,7 +234,6 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* trace_method_handle_interpreter_entry(_masm, iid); if (iid == vmIntrinsics::_invokeBasic) { generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry); - } else { // Adjust argument list by popping the trailing MemberName argument. Register recv = noreg; @@ -430,7 +431,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, #ifndef PRODUCT void trace_method_handle_stub(const char* adaptername, - oop mh, + oopDesc* mh, intptr_t* saved_regs, intptr_t* entry_sp) { } diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp index 48e9554635d..65493eba764 100644 --- a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp +++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp index 8520b41aedf..6bd0cb997dd 100644 --- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -235,24 +235,20 @@ void NativeMovConstReg::set_data(intptr_t x) { // Find and replace the oop/metadata corresponding to this // instruction in oops section. CodeBlob* cb = CodeCache::find_blob(instruction_address()); - if(cb != NULL) { - nmethod* nm = cb->as_nmethod_or_null(); - if (nm != NULL) { - RelocIterator iter(nm, instruction_address(), next_instruction_address()); - while (iter.next()) { - if (iter.type() == relocInfo::oop_type) { - oop* oop_addr = iter.oop_reloc()->oop_addr(); - *oop_addr = cast_to_oop(x); - break; - } else if (iter.type() == relocInfo::metadata_type) { - Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); - *metadata_addr = (Metadata*)x; - break; - } + nmethod* nm = cb->as_nmethod_or_null(); + if (nm != NULL) { + RelocIterator iter(nm, instruction_address(), next_instruction_address()); + while (iter.next()) { + if (iter.type() == relocInfo::oop_type) { + oop* oop_addr = iter.oop_reloc()->oop_addr(); + *oop_addr = cast_to_oop(x); + break; + } else if (iter.type() == relocInfo::metadata_type) { + Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); + *metadata_addr = (Metadata*)x; + break; } } - } else { - ShouldNotReachHere(); } } @@ -326,10 +322,9 @@ bool NativeInstruction::is_safepoint_poll() { bool NativeInstruction::is_lwu_to_zr(address instr) { assert_cond(instr != NULL); - unsigned insn = *(unsigned*)instr; - return (Assembler::extract(insn, 6, 0) == 0b0000011 && - Assembler::extract(insn, 14, 12) == 0b110 && - Assembler::extract(insn, 11, 7) == 0b00000); // zr + return (extract_opcode(instr) == 0b0000011 && + extract_funct3(instr) == 0b110 && + extract_rd(instr) == zr); // zr } // A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction. @@ -358,7 +353,7 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() || nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(), - "riscv64 cannot replace non-jump with jump"); + "riscv cannot replace non-jump with jump"); // Patch this nmethod atomically. if (Assembler::reachable_from_branch_at(verified_entry, dest)) { @@ -384,8 +379,6 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add } void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { - NativeGeneralJump* n_jump = (NativeGeneralJump*)code_pos; - CodeBuffer cb(code_pos, instruction_size); MacroAssembler a(&cb); @@ -434,4 +427,3 @@ void NativeMembar::set_kind(uint32_t order_kind) { address membar = addr_at(0); *(unsigned int*) membar = insn; } - diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp index eab5467aab2..781df54e0b8 100644 --- a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp +++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -54,6 +54,7 @@ class NativeInstruction { public: enum { instruction_size = 4, + compressed_instruction_size = 2, }; juint encoding() const { @@ -65,35 +66,25 @@ class NativeInstruction { bool is_call() const { return is_call_at(addr_at(0)); } bool is_jump() const { return is_jump_at(addr_at(0)); } - static bool is_jal_at(address instr) { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b1101111; } - static bool is_jalr_at(address instr) { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b1100111 && - Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); } - static bool is_branch_at(address instr) { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b1100011; } - static bool is_ld_at(address instr) { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0000011 && - Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b011); } - static bool is_load_at(address instr) { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0000011; } + static bool is_jal_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; } + static bool is_jalr_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; } + static bool is_branch_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; } + static bool is_ld_at(address instr) { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; } + static bool is_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; } static bool is_store_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0100011; } static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; } - static bool is_auipc_at(address instr) { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0010111; } - static bool is_jump_at(address instr) { assert_cond(instr != NULL); return (is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr)); } - static bool is_addi_at(address instr) { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0010011 && - Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); } - static bool is_addiw_at(address instr) { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0011011 && - Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); } - static bool is_lui_at(address instr) { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0110111; } + static bool is_auipc_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; } + static bool is_jump_at(address instr) { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); } + static bool is_addi_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; } + static bool is_addiw_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; } + static bool is_lui_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; } static bool is_slli_shift_at(address instr, uint32_t shift) { assert_cond(instr != NULL); - return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0010011 && // opcode field - Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b001 && // funct3 field, select the type of operation + return (extract_opcode(instr) == 0b0010011 && // opcode field + extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift); // shamt field } - // return true if the (index1~index2) field of instr1 is equal to (index3~index4) field of instr2, otherwise false - static bool compare_instr_field(address instr1, int index1, int index2, address instr2, int index3, int index4) { - assert_cond(instr1 != NULL && instr2 != NULL); - return Assembler::extract(((unsigned*)instr1)[0], index1, index2) == Assembler::extract(((unsigned*)instr2)[0], index3, index4); - } - static Register extract_rs1(address instr); static Register extract_rs2(address instr); static Register extract_rd(address instr); @@ -109,15 +100,21 @@ class NativeInstruction { // slli // addi/jalr/load static bool check_movptr_data_dependency(address instr) { - return compare_instr_field(instr + 4, 19, 15, instr, 11, 7) && // check the rs1 field of addi and the rd field of lui - compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7) && // check the rs1 field and the rd field of addi - compare_instr_field(instr + 8, 19, 15, instr + 4, 11, 7) && // check the rs1 field of slli and the rd field of addi - compare_instr_field(instr + 8, 19, 15, instr + 8, 11, 7) && // check the rs1 field and the rd field of slli - compare_instr_field(instr + 12, 19, 15, instr + 8, 11, 7) && // check the rs1 field of addi and the rd field of slli - compare_instr_field(instr + 12, 19, 15, instr + 12, 11, 7) && // check the rs1 field and the rd field of addi - compare_instr_field(instr + 16, 19, 15, instr + 12, 11, 7) && // check the rs1 field of slli and the rd field of addi - compare_instr_field(instr + 16, 19, 15, instr + 16, 11, 7) && // check the rs1 field and the rd field of slli - compare_instr_field(instr + 20, 19, 15, instr + 16, 11, 7); // check the rs1 field of addi/jalr/load and the rd field of slli + address lui = instr; + address addi1 = lui + instruction_size; + address slli1 = addi1 + instruction_size; + address addi2 = slli1 + instruction_size; + address slli2 = addi2 + instruction_size; + address last_instr = slli2 + instruction_size; + return extract_rs1(addi1) == extract_rd(lui) && + extract_rs1(addi1) == extract_rd(addi1) && + extract_rs1(slli1) == extract_rd(addi1) && + extract_rs1(slli1) == extract_rd(slli1) && + extract_rs1(addi2) == extract_rd(slli1) && + extract_rs1(addi2) == extract_rd(addi2) && + extract_rs1(slli2) == extract_rd(addi2) && + extract_rs1(slli2) == extract_rd(slli2) && + extract_rs1(last_instr) == extract_rd(slli2); } // the instruction sequence of li64 is as below: @@ -130,43 +127,60 @@ class NativeInstruction { // slli // addi static bool check_li64_data_dependency(address instr) { - return compare_instr_field(instr + 4, 19, 15, instr, 11, 7) && // check the rs1 field of addi and the rd field of lui - compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7) && // check the rs1 field and the rd field of addi - compare_instr_field(instr + 8, 19, 15, instr + 4, 11, 7) && // check the rs1 field of slli and the rd field of addi - compare_instr_field(instr + 8, 19, 15, instr + 8, 11, 7) && // check the rs1 field and the rd field of slli - compare_instr_field(instr + 12, 19, 15, instr + 8, 11, 7) && // check the rs1 field of addi and the rd field of slli - compare_instr_field(instr + 12, 19, 15, instr + 12, 11, 7) && // check the rs1 field and the rd field of addi - compare_instr_field(instr + 16, 19, 15, instr + 12, 11, 7) && // check the rs1 field of slli and the rd field of addi - compare_instr_field(instr + 16, 19, 15, instr + 16, 11, 7) && // check the rs1 field and the rd field fof slli - compare_instr_field(instr + 20, 19, 15, instr + 16, 11, 7) && // check the rs1 field of addi and the rd field of slli - compare_instr_field(instr + 20, 19, 15, instr + 20, 11, 7) && // check the rs1 field and the rd field of addi - compare_instr_field(instr + 24, 19, 15, instr + 20, 11, 7) && // check the rs1 field of slli and the rd field of addi - compare_instr_field(instr + 24, 19, 15, instr + 24, 11, 7) && // check the rs1 field and the rd field of slli - compare_instr_field(instr + 28, 19, 15, instr + 24, 11, 7) && // check the rs1 field of addi and the rd field of slli - compare_instr_field(instr + 28, 19, 15, instr + 28, 11, 7); // check the rs1 field and the rd field of addi + address lui = instr; + address addi1 = lui + instruction_size; + address slli1 = addi1 + instruction_size; + address addi2 = slli1 + instruction_size; + address slli2 = addi2 + instruction_size; + address addi3 = slli2 + instruction_size; + address slli3 = addi3 + instruction_size; + address addi4 = slli3 + instruction_size; + return extract_rs1(addi1) == extract_rd(lui) && + extract_rs1(addi1) == extract_rd(addi1) && + extract_rs1(slli1) == extract_rd(addi1) && + extract_rs1(slli1) == extract_rd(slli1) && + extract_rs1(addi2) == extract_rd(slli1) && + extract_rs1(addi2) == extract_rd(addi2) && + extract_rs1(slli2) == extract_rd(addi2) && + extract_rs1(slli2) == extract_rd(slli2) && + extract_rs1(addi3) == extract_rd(slli2) && + extract_rs1(addi3) == extract_rd(addi3) && + extract_rs1(slli3) == extract_rd(addi3) && + extract_rs1(slli3) == extract_rd(slli3) && + extract_rs1(addi4) == extract_rd(slli3) && + extract_rs1(addi4) == extract_rd(addi4); } // the instruction sequence of li32 is as below: // lui // addiw static bool check_li32_data_dependency(address instr) { - return compare_instr_field(instr + 4, 19, 15, instr, 11, 7) && // check the rs1 field of addiw and the rd field of lui - compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7); // check the rs1 field and the rd field of addiw + address lui = instr; + address addiw = lui + instruction_size; + + return extract_rs1(addiw) == extract_rd(lui) && + extract_rs1(addiw) == extract_rd(addiw); } // the instruction sequence of pc-relative is as below: // auipc // jalr/addi/load/float_load static bool check_pc_relative_data_dependency(address instr) { - return compare_instr_field(instr, 11, 7, instr + 4, 19, 15); // check the rd field of auipc and the rs1 field of jalr/addi/load/float_load + address auipc = instr; + address last_instr = auipc + instruction_size; + + return extract_rs1(last_instr) == extract_rd(auipc); } // the instruction sequence of load_label is as below: // auipc // load static bool check_load_pc_relative_data_dependency(address instr) { - return compare_instr_field(instr, 11, 7, instr + 4, 11, 7) && // check the rd field of auipc and the rd field of load - compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7); // check the rs1 field of load and the rd field of load + address auipc = instr; + address load = auipc + instruction_size; + + return extract_rd(load) == extract_rd(auipc) && + extract_rs1(load) == extract_rd(load); } static bool is_movptr_at(address instr); @@ -184,10 +198,7 @@ class NativeInstruction { static bool is_lwu_to_zr(address instr); inline bool is_nop(); - inline bool is_illegal(); - inline bool is_return(); inline bool is_jump_or_nop(); - inline bool is_cond_jump(); bool is_safepoint_poll(); bool is_sigill_zombie_not_entrant(); bool is_stop(); @@ -197,7 +208,6 @@ class NativeInstruction { jint int_at(int offset) const { return *(jint*) addr_at(offset); } juint uint_at(int offset) const { return *(juint*) addr_at(offset); } - jushort uint16_at(int offset) const { return *(jushort *) addr_at(offset); } address ptr_at(int offset) const { return *(address*) addr_at(offset); } @@ -218,8 +228,7 @@ class NativeInstruction { } bool is_membar() { - unsigned int insn = uint_at(0); - return (insn & 0x7f) == 0b1111 && Assembler::extract(insn, 14, 12) == 0; + return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0; } }; @@ -227,7 +236,7 @@ inline NativeInstruction* nativeInstruction_at(address addr) { return (NativeInstruction*)addr; } -// The natural type of an RISCV64 instruction is uint32_t +// The natural type of an RISCV instruction is uint32_t inline NativeInstruction* nativeInstruction_at(uint32_t *addr) { return (NativeInstruction*)addr; } @@ -239,7 +248,7 @@ inline NativeCall* nativeCall_at(address addr); class NativeCall: public NativeInstruction { public: - enum RISCV64_specific_constants { + enum RISCV_specific_constants { instruction_size = 4, instruction_offset = 0, displacement_offset = 0, @@ -251,27 +260,24 @@ class NativeCall: public NativeInstruction { address return_address() const { return addr_at(return_address_offset); } address destination() const; - void set_destination(address dest) { - if (is_jal()) { - intptr_t offset = (intptr_t)(dest - instruction_address()); - assert((offset & 0x1) == 0, "should be aligned"); - assert(is_imm_in_range(offset, 20, 1), "set_destination, offset is too large to be patched in one jal insrusction\n"); - unsigned int insn = 0b1101111; // jal - address pInsn = (address)(&insn); - Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); - Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); - Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); - Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); - Assembler::patch(pInsn, 11, 7, lr->encoding()); // Rd must be x1, need lr - set_int_at(displacement_offset, insn); - return; - } - ShouldNotReachHere(); + void set_destination(address dest) { + assert(is_jal(), "Should be jal instruction!"); + intptr_t offset = (intptr_t)(dest - instruction_address()); + assert((offset & 0x1) == 0, "bad alignment"); + assert(is_imm_in_range(offset, 20, 1), "encoding constraint"); + unsigned int insn = 0b1101111; // jal + address pInsn = (address)(&insn); + Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); + Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); + Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); + Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); + Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra + set_int_at(displacement_offset, insn); } - void verify_alignment() { ; } - void verify(); - void print(); + void verify_alignment() {} // do nothing on riscv + void verify(); + void print(); // Creation inline friend NativeCall* nativeCall_at(address addr); @@ -326,8 +332,8 @@ inline NativeCall* nativeCall_before(address return_address) { // (used to manipulate inlined 64-bit data calls, etc.) class NativeMovConstReg: public NativeInstruction { public: - enum RISCV64_specific_constants { - movptr_instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi. See movptr(). + enum RISCV_specific_constants { + movptr_instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi. See movptr(). movptr_with_offset_instruction_size = 5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset(). load_pc_relative_instruction_size = 2 * NativeInstruction::instruction_size, // auipc, ld instruction_offset = 0, @@ -392,7 +398,7 @@ inline NativeMovConstReg* nativeMovConstReg_before(address addr) { return test; } -// RISCV64 should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented. +// RISCV should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented. class NativeMovRegMem: public NativeInstruction { public: int instruction_start() const { @@ -430,7 +436,7 @@ inline NativeMovRegMem* nativeMovRegMem_at (address addr) { class NativeJump: public NativeInstruction { public: - enum RISCV64_specific_constants { + enum RISCV_specific_constants { instruction_size = NativeInstruction::instruction_size, instruction_offset = 0, data_offset = 0, @@ -447,9 +453,6 @@ class NativeJump: public NativeInstruction { void verify(); - // Unit testing stuff - static void test() {} - // Insertion of native jump instruction static void insert(address code_pos, address entry); // MT-safe insertion of native jump at verified method entry @@ -467,11 +470,11 @@ inline NativeJump* nativeJump_at(address addr) { class NativeGeneralJump: public NativeJump { public: - enum RISCV64_specific_constants { - instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr + enum RISCV_specific_constants { + instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr instruction_offset = 0, data_offset = 0, - normal_next_instruction_offset = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr + next_instruction_offset = 6 * NativeInstruction::instruction_size // lui, addi, slli, addi, slli, jalr }; address jump_destination() const; @@ -493,9 +496,8 @@ class NativeIllegalInstruction: public NativeInstruction { static void insert(address code_pos); }; -inline bool NativeInstruction::is_nop() { - address instr_addr = addr_at(0); - uint32_t insn = *(uint32_t*)instr_addr; +inline bool NativeInstruction::is_nop() { + uint32_t insn = *(uint32_t*)addr_at(0); return insn == 0x13; } @@ -507,7 +509,7 @@ inline bool NativeInstruction::is_jump_or_nop() { class NativeCallTrampolineStub : public NativeInstruction { public: - enum RISCV64_specific_constants { + enum RISCV_specific_constants { // Refer to function emit_trampoline_stub. instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address data_offset = 3 * NativeInstruction::instruction_size, // auipc + ld + jr @@ -529,11 +531,14 @@ inline bool is_NativeCallTrampolineStub_at(address addr) { // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0 // 3). check if the offset in ld[31:20] equals the data_offset assert_cond(addr != NULL); - if (NativeInstruction::is_auipc_at(addr) && NativeInstruction::is_ld_at(addr + 4) && NativeInstruction::is_jalr_at(addr + 8) && - ((Register)(intptr_t)Assembler::extract(((unsigned*)addr)[0], 11, 7) == x5) && - ((Register)(intptr_t)Assembler::extract(((unsigned*)addr)[1], 11, 7) == x5) && - ((Register)(intptr_t)Assembler::extract(((unsigned*)addr)[1], 19, 15) == x5) && - ((Register)(intptr_t)Assembler::extract(((unsigned*)addr)[2], 19, 15) == x5) && + const int instr_size = NativeInstruction::instruction_size; + if (NativeInstruction::is_auipc_at(addr) && + NativeInstruction::is_ld_at(addr + instr_size) && + NativeInstruction::is_jalr_at(addr + 2 * instr_size) && + (NativeInstruction::extract_rd(addr) == x5) && + (NativeInstruction::extract_rd(addr + instr_size) == x5) && + (NativeInstruction::extract_rs1(addr + instr_size) == x5) && + (NativeInstruction::extract_rs1(addr + 2 * instr_size) == x5) && (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) { return true; } diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp index f2b87c01fa3..fef8ca9b64e 100644 --- a/src/hotspot/cpu/riscv/registerMap_riscv.hpp +++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp @@ -1,6 +1,5 @@ /* * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -34,7 +33,6 @@ // This is the hook for finding a register in an "well-known" location, // such as a register block of a predetermined format. address pd_location(VMReg reg) const { return NULL; } - address pd_location(VMReg base_reg, int slot_idx) const; // no PD state to clear or copy: void pd_clear() {} diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp index 61f7f77e793..583f67573ca 100644 --- a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp +++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp @@ -1,7 +1,7 @@ /* - * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,7 +26,6 @@ #include "precompiled.hpp" #include "asm/assembler.hpp" -#include "asm/macroAssembler.inline.hpp" #include "asm/register.hpp" #include "interp_masm_riscv.hpp" #include "register_riscv.hpp" @@ -177,7 +176,6 @@ REGISTER_DEFINITION(Register, gp); REGISTER_DEFINITION(Register, tp); REGISTER_DEFINITION(Register, xmethod); REGISTER_DEFINITION(Register, ra); -REGISTER_DEFINITION(Register, lr); REGISTER_DEFINITION(Register, sp); REGISTER_DEFINITION(Register, fp); REGISTER_DEFINITION(Register, xheapbase); diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp index 2e9c88c223a..ef60cb3bb05 100644 --- a/src/hotspot/cpu/riscv/register_riscv.cpp +++ b/src/hotspot/cpu/riscv/register_riscv.cpp @@ -1,7 +1,6 @@ /* * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,18 +28,14 @@ const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * RegisterImpl::max_slots_per_register; + const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; -const int ConcreteRegisterImpl::max_vpr = - ConcreteRegisterImpl::max_fpr + - VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register; - - const char* RegisterImpl::name() const { - const char* names[number_of_registers] = { - "zr", "ra", "sp", "gp", "tp", "x5", "x6", "x7", "fp", "x9", + static const char *const names[number_of_registers] = { + "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9", "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7", "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals", "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod" @@ -49,7 +44,7 @@ const char* RegisterImpl::name() const { } const char* FloatRegisterImpl::name() const { - const char* names[number_of_registers] = { + static const char *const names[number_of_registers] = { "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", @@ -59,7 +54,7 @@ const char* FloatRegisterImpl::name() const { } const char* VectorRegisterImpl::name() const { - const char* names[number_of_registers] = { + static const char *const names[number_of_registers] = { "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp index fa8b80ef3b9..ca859569bd2 100644 --- a/src/hotspot/cpu/riscv/register_riscv.hpp +++ b/src/hotspot/cpu/riscv/register_riscv.hpp @@ -1,7 +1,6 @@ /* * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -58,31 +57,49 @@ class RegisterImpl: public AbstractRegisterImpl { public: enum { number_of_registers = 32, - number_of_byte_registers = 32, max_slots_per_register = 2, + + // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable + // for compressed instructions. See Table 17.2 in spec. + compressed_register_base = 8, + compressed_register_top = 15, }; // derived registers, offsets, and addresses - Register successor() const { return as_Register(encoding() + 1); } + const Register successor() const { return as_Register(encoding() + 1); } // construction inline friend Register as_Register(int encoding); - VMReg as_VMReg(); + VMReg as_VMReg() const; // accessors - int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } - bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } - bool has_byte_register() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_byte_registers; } + int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } + int encoding_nocheck() const { return (intptr_t)this; } + bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } const char* name() const; - int encoding_nocheck() const { return (intptr_t)this; } + + // for rvc + int compressed_encoding() const { + assert(is_compressed_valid(), "invalid compressed register"); + return encoding() - compressed_register_base; + } + + int compressed_encoding_nocheck() const { + return encoding_nocheck() - compressed_register_base; + } + + bool is_compressed_valid() const { + return encoding_nocheck() >= compressed_register_base && + encoding_nocheck() <= compressed_register_top; + } // Return the bit which represents this register. This is intended // to be ORed into a bitmask: for usage see class RegSet below. - unsigned long bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; } + uint64_t bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; } }; -// The integer registers of the riscv64 architecture +// The integer registers of the RISCV architecture CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); @@ -133,25 +150,43 @@ class FloatRegisterImpl: public AbstractRegisterImpl { enum { number_of_registers = 32, max_slots_per_register = 2, + + // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec. + compressed_register_base = 8, + compressed_register_top = 15, }; // construction inline friend FloatRegister as_FloatRegister(int encoding); - VMReg as_VMReg(); + VMReg as_VMReg() const; // derived registers, offsets, and addresses - FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } + FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } // accessors - int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } - int encoding_nocheck() const { return (intptr_t)this; } - bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } + int encoding_nocheck() const { return (intptr_t)this; } + int is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } const char* name() const; + // for rvc + int compressed_encoding() const { + assert(is_compressed_valid(), "invalid compressed register"); + return encoding() - compressed_register_base; + } + + int compressed_encoding_nocheck() const { + return encoding_nocheck() - compressed_register_base; + } + + bool is_compressed_valid() const { + return encoding_nocheck() >= compressed_register_base && + encoding_nocheck() <= compressed_register_top; + } }; -// The float registers of the RISCV64 architecture +// The float registers of the RISCV architecture CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); @@ -196,7 +231,7 @@ inline VectorRegister as_VectorRegister(int encoding) { return (VectorRegister)(intptr_t) encoding; } -// The implementation of vector registers for riscv-v +// The implementation of vector registers for RVV class VectorRegisterImpl: public AbstractRegisterImpl { public: enum { @@ -207,15 +242,15 @@ class VectorRegisterImpl: public AbstractRegisterImpl { // construction inline friend VectorRegister as_VectorRegister(int encoding); - VMReg as_VMReg(); + VMReg as_VMReg() const; // derived registers, offsets, and addresses VectorRegister successor() const { return as_VectorRegister(encoding() + 1); } // accessors - int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } - int encoding_nocheck() const { return (intptr_t)this; } - bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } + int encoding_nocheck() const { return (intptr_t)this; } + bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } const char* name() const; }; @@ -275,22 +310,20 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl { // added to make it compile static const int max_gpr; static const int max_fpr; - static const int max_vpr; }; // A set of registers class RegSet { uint32_t _bitset; -public: RegSet(uint32_t bitset) : _bitset(bitset) { } +public: + RegSet() : _bitset(0) { } RegSet(Register r1) : _bitset(r1->bit()) { } - ~RegSet() {} - RegSet operator+(const RegSet aSet) const { RegSet result(_bitset | aSet._bitset); return result; @@ -330,13 +363,20 @@ class RegSet { static RegSet range(Register start, Register end) { uint32_t bits = ~0; bits <<= start->encoding(); - bits <<= (31 - end->encoding()); - bits >>= (31 - end->encoding()); + bits <<= 31 - end->encoding(); + bits >>= 31 - end->encoding(); return RegSet(bits); } uint32_t bits() const { return _bitset; } + +private: + + Register first() { + uint32_t first = _bitset & -_bitset; + return first ? as_Register(exact_log2(first)) : noreg; + } }; #endif // CPU_RISCV_REGISTER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp index 6b90922f8dc..047ea2276ca 100644 --- a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp +++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,10 +38,9 @@ void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { int bytes; - switch(type()) { + switch (type()) { case relocInfo::oop_type: { oop_Relocation *reloc = (oop_Relocation *)this; - // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate if (NativeInstruction::is_load_pc_relative_at(addr())) { address constptr = (address)code()->oop_addr_at(reloc->oop_index()); bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr); diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp index c5dd75bf682..840ed935d88 100644 --- a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp +++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp @@ -1,6 +1,5 @@ /* * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 52d4c23cf33..0a358b7223d 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -1,7 +1,7 @@ // // Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. // Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. -// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -24,7 +24,7 @@ // // -// RISCV64 Architecture Description File +// RISCV Architecture Description File //----------REGISTER DEFINITION BLOCK------------------------------------------ // This information is used by the matcher and the register allocator to @@ -68,8 +68,8 @@ register %{ // // follow the C1 compiler in making registers // -// x7, x9-x17, x28-x31 volatile (caller save) -// x0-x4, x8, x27 system (no save, no allocate) +// x7, x9-x17, x27-x31 volatile (caller save) +// x0-x4, x8, x23 system (no save, no allocate) // x5-x6 non-allocatable (so we can use them as temporary regs) // @@ -82,8 +82,8 @@ register %{ reg_def R0 ( NS, NS, Op_RegI, 0, x0->as_VMReg() ); // zr reg_def R0_H ( NS, NS, Op_RegI, 0, x0->as_VMReg()->next() ); -reg_def R1 ( SOC, SOC, Op_RegI, 1, x1->as_VMReg() ); // lr -reg_def R1_H ( SOC, SOC, Op_RegI, 1, x1->as_VMReg()->next() ); +reg_def R1 ( NS, SOC, Op_RegI, 1, x1->as_VMReg() ); // ra +reg_def R1_H ( NS, SOC, Op_RegI, 1, x1->as_VMReg()->next() ); reg_def R2 ( NS, SOE, Op_RegI, 2, x2->as_VMReg() ); // sp reg_def R2_H ( NS, SOE, Op_RegI, 2, x2->as_VMReg()->next() ); reg_def R3 ( NS, NS, Op_RegI, 3, x3->as_VMReg() ); // gp @@ -154,7 +154,7 @@ reg_def R31_H ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next()); // CPU stores such a register pair to memory, the word associated with // the lower ADLC-assigned number must be stored to the lower address. -// RISCV64 has 32 floating-point registers. Each can store a single +// RISCV has 32 floating-point registers. Each can store a single // or double precision floating-point value. // for Java use float registers f0-f31 are always save on call whereas @@ -397,24 +397,6 @@ reg_def V31_H ( SOC, SOC, Op_VecX, 31, v31->as_VMReg()->next() ); reg_def V31_J ( SOC, SOC, Op_VecX, 31, v31->as_VMReg()->next(2) ); reg_def V31_K ( SOC, SOC, Op_VecX, 31, v31->as_VMReg()->next(3) ); -// Double Registers - -// The rules of ADL require that double registers be defined in pairs. -// Each pair must be two 32-bit values, but not necessarily a pair of -// single float registers. In each pair, ADLC-assigned register numbers -// must be adjacent, with the lower number even. Finally, when the -// CPU stores such a register pair to memory, the word associated with -// the lower ADLC-assigned number must be stored to the lower address. - -// RISCV64 has 32 floating-point registers. Each can store a vector of -// single or double precision floating-point values up to 8 * 32 -// floats, 4 * 64 bit floats or 2 * 128 bit floats. We currently only -// use the first float or double element of the vector. - -// for Java use float registers v0-v15 are always save on call whereas -// the platform ABI treats v8-v15 as callee save). float registers -// v16-v31 are SOC as per the platform spec - // ---------------------------- // Special Registers // ---------------------------- @@ -467,7 +449,7 @@ alloc_class chunk0( R4, R4_H, // thread R8, R8_H, // fp R0, R0_H, // zero - R1, R1_H, // lr + R1, R1_H, // ra R2, R2_H, // sp R3, R3_H, // gp ); @@ -554,8 +536,8 @@ alloc_class chunk3(RFLAGS); // Several register classes are automatically defined based upon information in // this architecture description. // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) -// 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) -// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) +// 2) reg_class compiler_method_reg ( /* as def'd in frame section */ ) +// 2) reg_class interpreter_method_reg ( /* as def'd in frame section */ ) // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) // @@ -652,7 +634,7 @@ reg_class any_reg %{ // Class for non-allocatable 32 bit registers reg_class non_allocatable_reg32( R0, // zr - R1, // lr + R1, // ra R2, // sp R3, // gp R4, // tp @@ -662,7 +644,7 @@ reg_class non_allocatable_reg32( // Class for non-allocatable 64 bit registers reg_class non_allocatable_reg( R0, R0_H, // zr - R1, R1_H, // lr + R1, R1_H, // ra R2, R2_H, // sp R3, R3_H, // gp R4, R4_H, // tp @@ -763,7 +745,7 @@ reg_class fp_reg( ); // Class for link register -reg_class lr_reg( +reg_class ra_reg( R1, R1_H ); @@ -846,10 +828,6 @@ reg_class double_reg( F31, F31_H ); -// Class for all 64bit vector registers -reg_class vectord_reg( -); - // Class for all 128bit vector registers reg_class vectorx_reg( V1, V1_H, V1_J, V1_K, @@ -1028,19 +1006,16 @@ class HandlerImpl { } }; -// predicate controlling translation of StoreCM -bool unnecessary_storestore(const Node *storecm); - bool is_CAS(int opcode, bool maybe_volatile); // predicate controlling translation of CompareAndSwapX -bool needs_acquiring_load_exclusive(const Node *load); +bool needs_acquiring_load_reserved(const Node *load); + +// predicate controlling translation of StoreCM +bool unnecessary_storestore(const Node *storecm); // predicate controlling addressing modes bool size_fits_all_mem_uses(AddPNode* addp, int shift); - -// predicate using the temp register for decoding klass -bool maybe_use_tmp_register_decoding_klass(); %} source %{ @@ -1089,34 +1064,13 @@ void reg_mask_init() { } } -// predicate controlling translation of StoreCM -// -// returns true if a StoreStore must precede the card write otherwise -// false -bool unnecessary_storestore(const Node *storecm) -{ - assert(storecm != NULL && storecm->Opcode() == Op_StoreCM, "expecting a StoreCM"); - - // we need to generate a membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore) - // between an object put and the associated card mark when we are using - // CMS without conditional card marking - - if (UseConcMarkSweepGC && !UseCondCardMark) { - return false; - } - - // a storestore is unnecesary in all other cases - - return true; -} - // is_CAS(int opcode, bool maybe_volatile) // // return true if opcode is one of the possible CompareAndSwapX // values otherwise false. bool is_CAS(int opcode, bool maybe_volatile) { - switch(opcode) { + switch (opcode) { // We handle these case Op_CompareAndSwapI: case Op_CompareAndSwapL: @@ -1147,12 +1101,6 @@ bool is_CAS(int opcode, bool maybe_volatile) case Op_WeakCompareAndSwapL: case Op_WeakCompareAndSwapP: case Op_WeakCompareAndSwapN: -#if INCLUDE_SHENANDOAHGC - case Op_ShenandoahWeakCompareAndSwapP: - case Op_ShenandoahWeakCompareAndSwapN: - case Op_ShenandoahCompareAndExchangeP: - case Op_ShenandoahCompareAndExchangeN: -#endif return maybe_volatile; default: return false; @@ -1162,12 +1110,9 @@ bool is_CAS(int opcode, bool maybe_volatile) // predicate controlling translation of CAS // // returns true if CAS needs to use an acquiring load otherwise false -bool needs_acquiring_load_exclusive(const Node *n) +bool needs_acquiring_load_reserved(const Node *n) { assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap"); - if (UseBarriersForVolatile) { - return false; - } LoadStoreNode* ldst = n->as_LoadStore(); if (n != NULL && is_CAS(n->Opcode(), false)) { @@ -1179,11 +1124,28 @@ bool needs_acquiring_load_exclusive(const Node *n) return true; } -bool maybe_use_tmp_register_decoding_klass() { - return !UseCompressedOops && - Universe::narrow_klass_base() != NULL && - Universe::narrow_klass_shift() != 0; +// predicate controlling translation of StoreCM +// +// returns true if a StoreStore must precede the card write otherwise +// false + +bool unnecessary_storestore(const Node *storecm) +{ + assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM"); + + // we need to generate a dmb ishst between an object put and the + // associated card mark when we are using CMS without conditional + // card marking + + if (UseConcMarkSweepGC && !UseCondCardMark) { + return false; + } + + // a storestore is unnecesary in all other cases + + return true; } + #define __ _masm. // advance declarations for helper functions to convert register @@ -1216,25 +1178,49 @@ int MachCallRuntimeNode::ret_addr_offset() { // jal(addr) // or with far branches // jal(trampoline_stub) - // for real runtime callouts it will be 12 instructions - // see riscv64_enc_java_to_runtime + // for real runtime callouts it will be 11 instructions + // see riscv_enc_java_to_runtime // la(t1, retaddr) -> auipc + addi // la(t0, RuntimeAddress(addr)) -> lui + addi + slli + addi + slli + addi // addi(sp, sp, -2 * wordSize) -> addi - // sd(zr, Address(sp)) -> sd - // sd(t1, Address(sp, wordSize)) -> sd -> sdd in CSky + // sd(t1, Address(sp, wordSize)) -> sd // jalr(t0) -> jalr CodeBlob *cb = CodeCache::find_blob(_entry_point); if (cb != NULL) { return 1 * NativeInstruction::instruction_size; } else { - if (UseCSky) - return 11 * NativeInstruction::instruction_size; - else - return 12 * NativeInstruction::instruction_size; + return 11 * NativeInstruction::instruction_size; } } +// +// Compute padding required for nodes which need alignment +// + +// With RVC a call instruction may get 2-byte aligned. +// The address of the call instruction needs to be 4-byte aligned to +// ensure that it does not span a cache line so that it can be patched. +int CallStaticJavaDirectNode::compute_padding(int current_offset) const +{ + // to make sure the address of jal 4-byte aligned. + return align_up(current_offset, alignment_required()) - current_offset; +} + +// With RVC a call instruction may get 2-byte aligned. +// The address of the call instruction needs to be 4-byte aligned to +// ensure that it does not span a cache line so that it can be patched. +int CallDynamicJavaDirectNode::compute_padding(int current_offset) const +{ + // skip the movptr in MacroAssembler::ic_call(): + // lui + addi + slli + addi + slli + addi + // Though movptr() has already 4-byte aligned with or without RVC, + // We need to prevent from further changes by explicitly calculating the size. + const int movptr_size = 6 * NativeInstruction::instruction_size; + current_offset += movptr_size; + // to make sure the address of jal 4-byte aligned. + return align_up(current_offset, alignment_required()) - current_offset; +} + // Indicate if the safepoint node needs the polling page as an input // the shared code plants the oop data at the start of the generated @@ -1260,6 +1246,7 @@ void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const { void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); __ ebreak(); } @@ -1277,13 +1264,14 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes. for (int i = 0; i < _count; i++) { __ nop(); } } uint MachNopNode::size(PhaseRegAlloc*) const { - return _count * NativeInstruction::instruction_size; + return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size); } //============================================================================= @@ -1324,9 +1312,9 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { st->print("# stack bang size=%d\n\t", framesize); } - st->print("sd fp, [sp, #%d]", - 2 * wordSize); - st->print("sd lr, [sp, #%d]", - wordSize); - if (PreserveFramePointer) { st->print("\n\tsub fp, sp, #%d", 2 * wordSize); } + st->print("sd fp, [sp, #%d]\n\t", - 2 * wordSize); + st->print("sd ra, [sp, #%d]\n\t", - wordSize); + if (PreserveFramePointer) { st->print("sub fp, sp, #%d\n\t", 2 * wordSize); } st->print("sub sp, sp, #%d\n\t", framesize); } #endif @@ -1337,17 +1325,16 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { MacroAssembler _masm(&cbuf); // n.b. frame size includes space for return pc and fp - const long framesize = C->frame_size_in_bytes(); - assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment"); + const int framesize = C->frame_size_in_bytes(); // insert a nop at the start of the prolog so we can patch in a // branch if we need to invalidate the method later - __ nop(); // 4 bytes + __ nop(); assert_cond(C != NULL); int bangsize = C->bang_size_in_bytes(); - if (C->need_stack_bang(bangsize) && UseStackBanging) { + if (C->need_stack_bang(bangsize)) { __ generate_stack_overflow_check(bangsize); } @@ -1391,19 +1378,19 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { st->print("# pop frame %d\n\t", framesize); if (framesize == 0) { - st->print("ld lr, [sp,#%d]\n\t", (2 * wordSize)); + st->print("ld ra, [sp,#%d]\n\t", (2 * wordSize)); st->print("ld fp, [sp,#%d]\n\t", (3 * wordSize)); st->print("add sp, sp, #%d\n\t", (2 * wordSize)); } else { st->print("add sp, sp, #%d\n\t", framesize); - st->print("ld lr, [sp,#%d]\n\t", - 2 * wordSize); + st->print("ld ra, [sp,#%d]\n\t", - 2 * wordSize); st->print("ld fp, [sp,#%d]\n\t", - wordSize); } if (do_polling() && C->is_method_compilation()) { st->print("# touch polling page\n\t"); st->print("li t0, #0x%lx\n\t", p2i(os::get_polling_page())); - st->print("ld zr, [t0]"); + st->print("ld zr, [t0]"); } } #endif @@ -1440,6 +1427,9 @@ const Pipeline * MachEpilogNode::pipeline() const { return MachNode::pipeline_class(); } +// This method seems to be obsolete. It is declared in machnode.hpp +// and defined in all *.ad files, but it is never called. Should we +// get rid of it? int MachEpilogNode::safepoint_offset() const { assert(do_polling(), "no return for this epilog node"); return 4; @@ -1534,13 +1524,15 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo } } else if (cbuf != NULL) { MacroAssembler _masm(cbuf); + Assembler::CompressibleRegion cr(&_masm); switch (src_lo_rc) { case rc_int: if (dst_lo_rc == rc_int) { // gpr --> gpr copy - __ mv(as_Register(Matcher::_regEncode[dst_lo]), - as_Register(Matcher::_regEncode[src_lo])); - if (!is64 && this->ideal_reg() != Op_RegI) // zero extended for narrow oop or klass - __ clear_upper_bits(as_Register(Matcher::_regEncode[dst_lo]), 32); + if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass + __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32); + } else { + __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo])); + } } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy if (is64) { __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]), @@ -1614,11 +1606,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo } else { st->print("%s", Matcher::regName[dst_lo]); } - if (bottom_type() == NULL || bottom_type()->isa_vect() != NULL) { - ShouldNotReachHere(); - } else { - st->print("\t# spill size = %d", is64 ? 64 : 32); - } + st->print("\t# spill size = %d", is64 ? 64 : 32); } return 0; @@ -1690,14 +1678,16 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const assert_cond(st != NULL); st->print_cr("# MachUEPNode"); if (UseCompressedClassPointers) { - st->print_cr("\tlw t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); + st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); if (Universe::narrow_klass_shift() != 0) { st->print_cr("\tdecode_klass_not_null t0, t0"); } } else { - st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); + st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); } - st->print_cr("\tbne x10, t0, SharedRuntime::_ic_miss_stub\t # Inline cache check"); + st->print_cr("\tbeq t0, t1, ic_hit"); + st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check"); + st->print_cr("\tic_hit:"); } #endif @@ -1775,16 +1765,22 @@ const bool Matcher::match_rule_supported(int opcode) { } switch (opcode) { + case Op_PopCountI: + case Op_PopCountL: + return UsePopCountInstruction; + + case Op_CountLeadingZerosI: + case Op_CountLeadingZerosL: + case Op_CountTrailingZerosI: + case Op_CountTrailingZerosL: + return UseRVB; + case Op_StrCompressedCopy: // fall through case Op_StrInflatedCopy: // fall through return UseRVV; case Op_EncodeISOArray: return UseRVV && SpecialEncodeISOArray; - - case Op_PopCountI: - case Op_PopCountL: - return UsePopCountInstruction; } return true; // Per default match rules are supported. @@ -1850,12 +1846,7 @@ const bool Matcher::convL2FSupported(void) { // Vector width in bytes. const int Matcher::vector_width_in_bytes(BasicType bt) { - int size = MIN2(16, (int)MaxVectorSize); - // Minimum 2 values in vector - if (size < 2 * type2aelembytes(bt)) { size = 0; } - // But never < 4 - if (size < 4) { size = 0; } - return size; + return 0; } // Limits on vector size (number of elements) loaded into vector. @@ -1863,10 +1854,7 @@ const int Matcher::max_vector_size(const BasicType bt) { return vector_width_in_bytes(bt) / type2aelembytes(bt); } const int Matcher::min_vector_size(const BasicType bt) { - // For the moment limit the vector size to 8 bytes - int size = 8 / type2aelembytes(bt); - if (size < 2) { size = 2; } - return size; + return max_vector_size(bt); } // Vector ideal reg. @@ -1885,8 +1873,8 @@ const bool Matcher::pass_original_key_for_aes() { return false; } +// RISC-V supports misaligned vectors store/load. const bool Matcher::misaligned_vectors_ok() { - ShouldNotReachHere(); return true; } @@ -1940,7 +1928,7 @@ bool Matcher::is_generic_vector(MachOper* opnd) { // Implicit_null_check optimization moves the Decode along with the // memory operation back up before the NullCheck. bool Matcher::narrow_oop_use_complex_address() { - return false; + return Universe::narrow_oop_shift() == 0; } bool Matcher::narrow_klass_use_complex_address() { @@ -2113,14 +2101,15 @@ void Compile::reshape_address(AddPNode* addp) { encode %{ // BEGIN Non-volatile memory access - enc_class riscv64_enc_li_imm(iRegIorL dst, immIorL src) %{ + enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); int64_t con = (int64_t)$src$$constant; Register dst_reg = as_Register($dst$$reg); __ li(dst_reg, con); %} - enc_class riscv64_enc_mov_p(iRegP dst, immP src) %{ + enc_class riscv_enc_mov_p(iRegP dst, immP src) %{ MacroAssembler _masm(&cbuf); Register dst_reg = as_Register($dst$$reg); address con = (address)$src$$constant; @@ -2139,13 +2128,14 @@ encode %{ } %} - enc_class riscv64_enc_mov_p1(iRegP dst) %{ + enc_class riscv_enc_mov_p1(iRegP dst) %{ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); Register dst_reg = as_Register($dst$$reg); __ li(dst_reg, 1); %} - enc_class riscv64_enc_mov_poll_page(iRegP dst, immPollPage src) %{ + enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{ MacroAssembler _masm(&cbuf); int32_t offset = 0; address page = (address)$src$$constant; @@ -2156,12 +2146,12 @@ encode %{ __ addi(dst_reg, dst_reg, offset); %} - enc_class riscv64_enc_mov_byte_map_base(iRegP dst) %{ + enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{ MacroAssembler _masm(&cbuf); __ load_byte_map_base($dst$$Register); %} - enc_class riscv64_enc_mov_n(iRegN dst, immN src) %{ + enc_class riscv_enc_mov_n(iRegN dst, immN src) %{ MacroAssembler _masm(&cbuf); Register dst_reg = as_Register($dst$$reg); address con = (address)$src$$constant; @@ -2174,13 +2164,13 @@ encode %{ } %} - enc_class riscv64_enc_mov_zero(iRegNorP dst) %{ + enc_class riscv_enc_mov_zero(iRegNorP dst) %{ MacroAssembler _masm(&cbuf); Register dst_reg = as_Register($dst$$reg); __ mv(dst_reg, zr); %} - enc_class riscv64_enc_mov_nk(iRegN dst, immNKlass src) %{ + enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{ MacroAssembler _masm(&cbuf); Register dst_reg = as_Register($dst$$reg); address con = (address)$src$$constant; @@ -2193,42 +2183,42 @@ encode %{ } %} - enc_class riscv64_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ + enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ MacroAssembler _masm(&cbuf); __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, /*result as bool*/ true); %} - enc_class riscv64_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ + enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ MacroAssembler _masm(&cbuf); __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, /*result as bool*/ true); %} - enc_class riscv64_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ + enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ MacroAssembler _masm(&cbuf); __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, /*result as bool*/ true); %} - enc_class riscv64_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ + enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ MacroAssembler _masm(&cbuf); __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, /*result as bool*/ true); %} - enc_class riscv64_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ + enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ MacroAssembler _masm(&cbuf); __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, /*result as bool*/ true); %} - enc_class riscv64_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ + enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ MacroAssembler _masm(&cbuf); __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, @@ -2237,16 +2227,16 @@ encode %{ // compare and branch instruction encodings - enc_class riscv64_enc_j(label lbl) %{ + enc_class riscv_enc_j(label lbl) %{ MacroAssembler _masm(&cbuf); Label* L = $lbl$$label; __ j(*L); %} - enc_class riscv64_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{ + enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{ MacroAssembler _masm(&cbuf); Label* L = $lbl$$label; - switch($cmp$$cmpcode) { + switch ($cmp$$cmpcode) { case(BoolTest::ge): __ j(*L); break; @@ -2259,7 +2249,7 @@ encode %{ // call instruction encodings - enc_class riscv64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{ + enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{ Register sub_reg = as_Register($sub$$reg); Register super_reg = as_Register($super$$reg); Register temp_reg = as_Register($temp$$reg); @@ -2286,7 +2276,7 @@ encode %{ __ bind(done); %} - enc_class riscv64_enc_java_static_call(method meth) %{ + enc_class riscv_enc_java_static_call(method meth) %{ MacroAssembler _masm(&cbuf); address addr = (address)$meth$$method; @@ -2318,7 +2308,7 @@ encode %{ } %} - enc_class riscv64_enc_java_dynamic_call(method meth) %{ + enc_class riscv_enc_java_dynamic_call(method meth) %{ MacroAssembler _masm(&cbuf); int method_index = resolved_method_index(cbuf); address call = __ ic_call((address)$meth$$method, method_index); @@ -2328,7 +2318,7 @@ encode %{ } %} - enc_class riscv64_enc_call_epilog() %{ + enc_class riscv_enc_call_epilog() %{ MacroAssembler _masm(&cbuf); if (VerifyStackAtCalls) { // Check that stack depth is unchanged: find majik cookie on stack @@ -2336,7 +2326,7 @@ encode %{ } %} - enc_class riscv64_enc_java_to_runtime(method meth) %{ + enc_class riscv_enc_java_to_runtime(method meth) %{ MacroAssembler _masm(&cbuf); // some calls to generated routines (arraycopy code) are scheduled @@ -2357,7 +2347,6 @@ encode %{ __ la(t0, RuntimeAddress(entry)); // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc() __ addi(sp, sp, -2 * wordSize); - __ sd(zr, Address(sp)); __ sd(t1, Address(sp, wordSize)); __ jalr(t0); __ bind(retaddr); @@ -2366,29 +2355,28 @@ encode %{ %} // using the cr register as the bool result: 0 for success; others failed. - enc_class riscv64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{ + enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ MacroAssembler _masm(&cbuf); Register flag = t1; Register oop = as_Register($object$$reg); Register box = as_Register($box$$reg); - Register disp_hdr = as_Register($tmp$$reg); + Register disp_hdr = as_Register($tmp1$$reg); Register tmp = as_Register($tmp2$$reg); Label cont; Label object_has_monitor; assert_different_registers(oop, box, tmp, disp_hdr, t0); - // Load markOop from object into displaced_header. + // Load markWord from object into displaced_header. __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); // Always do locking in runtime. if (EmitSync & 0x01) { - __ li(flag, 1); + __ mv(flag, 1); return; } if (UseBiasedLocking && !UseOptoBiasInlining) { - // ignore slow case here __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag); } @@ -2398,15 +2386,15 @@ encode %{ __ bnez(t0, object_has_monitor); } - // Set tmp to be (markOop of object | UNLOCK_VALUE). + // Set tmp to be (markWord of object | UNLOCK_VALUE). __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); // Initialize the box. (Must happen before we update the object mark!) __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); - // Compare object markOop with an unlocked value (tmp) and if - // equal exchange the stack address of our box with object markOop. - // On failure disp_hdr contains the possibly locked markOop. + // Compare object markWord with an unlocked value (tmp) and if + // equal exchange the stack address of our box with object markWord. + // On failure disp_hdr contains the possibly locked markWord. __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, Assembler::rl, /*result*/disp_hdr); __ mv(flag, zr); @@ -2419,7 +2407,7 @@ encode %{ // We did not see an unlocked object so try the fast recursive case. // Check if the owner is self by comparing the value in the - // markOop of object (disp_hdr) with the stack pointer. + // markWord of object (disp_hdr) with the stack pointer. __ sub(disp_hdr, disp_hdr, sp); __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, @@ -2440,7 +2428,7 @@ encode %{ // Try to CAS m->owner from NULL to current thread. __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value)); __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, - Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) + Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) // Store a non-null value into the box to avoid looking like a re-entrant // lock. The fast-path monitor unlock code checks for @@ -2454,12 +2442,12 @@ encode %{ %} // using cr flag to indicate the fast_unlock result: 0 for success; others failed. - enc_class riscv64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{ + enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ MacroAssembler _masm(&cbuf); Register flag = t1; Register oop = as_Register($object$$reg); Register box = as_Register($box$$reg); - Register disp_hdr = as_Register($tmp$$reg); + Register disp_hdr = as_Register($tmp1$$reg); Register tmp = as_Register($tmp2$$reg); Label cont; Label object_has_monitor; @@ -2468,7 +2456,7 @@ encode %{ // Always do locking in runtime. if (EmitSync & 0x01) { - __ li(flag, 1); + __ mv(flag, 1); return; } @@ -2491,7 +2479,7 @@ encode %{ } // Check if it is still a light weight lock, this is true if we - // see the stack address of the basicLock in the markOop of the + // see the stack address of the basicLock in the markWord of the // object. __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, @@ -2504,7 +2492,8 @@ encode %{ // Handle existing monitor. if ((EmitSync & 0x02) == 0) { __ bind(object_has_monitor); - __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor + STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX); + __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. @@ -2526,7 +2515,7 @@ encode %{ // arithmetic encodings - enc_class riscv64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ + enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ MacroAssembler _masm(&cbuf); Register dst_reg = as_Register($dst$$reg); Register src1_reg = as_Register($src1$$reg); @@ -2534,7 +2523,7 @@ encode %{ __ corrected_idivl(dst_reg, src1_reg, src2_reg, false); %} - enc_class riscv64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ + enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ MacroAssembler _masm(&cbuf); Register dst_reg = as_Register($dst$$reg); Register src1_reg = as_Register($src1$$reg); @@ -2542,7 +2531,7 @@ encode %{ __ corrected_idivq(dst_reg, src1_reg, src2_reg, false); %} - enc_class riscv64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ + enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ MacroAssembler _masm(&cbuf); Register dst_reg = as_Register($dst$$reg); Register src1_reg = as_Register($src1$$reg); @@ -2550,7 +2539,7 @@ encode %{ __ corrected_idivl(dst_reg, src1_reg, src2_reg, true); %} - enc_class riscv64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ + enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ MacroAssembler _masm(&cbuf); Register dst_reg = as_Register($dst$$reg); Register src1_reg = as_Register($src1$$reg); @@ -2558,29 +2547,32 @@ encode %{ __ corrected_idivq(dst_reg, src1_reg, src2_reg, true); %} - enc_class riscv64_enc_tail_call(iRegP jump_target) %{ + enc_class riscv_enc_tail_call(iRegP jump_target) %{ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); Register target_reg = as_Register($jump_target$$reg); __ jr(target_reg); %} - enc_class riscv64_enc_tail_jmp(iRegP jump_target) %{ + enc_class riscv_enc_tail_jmp(iRegP jump_target) %{ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); Register target_reg = as_Register($jump_target$$reg); // exception oop should be in x10 - // ret addr has been popped into lr + // ret addr has been popped into ra // callee expects it in x13 - __ mv(x13, lr); + __ mv(x13, ra); __ jr(target_reg); %} - enc_class riscv64_enc_rethrow() %{ + enc_class riscv_enc_rethrow() %{ MacroAssembler _masm(&cbuf); __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); %} - enc_class riscv64_enc_ret() %{ + enc_class riscv_enc_ret() %{ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); __ ret(); %} @@ -2637,7 +2629,7 @@ encode %{ // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is // even aligned with pad0 as needed. // Region 6 is even aligned. Region 6-7 is NOT even aligned; -// (the latter is true on Intel but is it false on RISCV64?) +// (the latter is true on Intel but is it false on RISCV?) // region 6-11 is even aligned; it may be padded out more so that // the region from SP to FP meets the minimum stack alignment. // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack @@ -2678,8 +2670,8 @@ frame %{ // Number of stack slots between incoming argument block and the start of // a new frame. The PROLOG must add this many slots to the stack. The - // EPILOG must remove this many slots. - // Riscv64 needs two words for LR (return address) and FP (frame pointer). + // EPILOG must remove this many slots. RISC-V needs two slots for + // return address and fp. in_preserve_stack_slots(2 * VMRegImpl::slots_per_word); // Number of outgoing stack slots killed above the out_preserve_stack_slots @@ -2839,6 +2831,24 @@ operand immI_le_4() interface(CONST_INTER); %} +operand immI_16() +%{ + predicate(n->get_int() == 16); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_24() +%{ + predicate(n->get_int() == 24); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + operand immI_31() %{ predicate(n->get_int() == 31); @@ -2981,6 +2991,16 @@ operand immByteMapBase() interface(CONST_INTER); %} +// Int Immediate: low 16-bit mask +operand immI_16bits() +%{ + predicate(n->get_int() == 0xFFFF); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + // Long Immediate: low 32-bit mask operand immL_32bits() %{ @@ -3126,6 +3146,17 @@ operand immLOffset() interface(CONST_INTER); %} +// Scale values +operand immIScale() +%{ + predicate(1 <= n->get_int() && (n->get_int() <= 3)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + // Integer 32 bit Register Operands operand iRegI() %{ @@ -3253,7 +3284,6 @@ operand iRegP() match(RegP); match(iRegPNoSp); match(iRegP_R10); - match(iRegP_R15); match(javaThread_RegP); op_cost(0); format %{ %} @@ -3586,7 +3616,7 @@ operand indOffLN(iRegN reg, immLOffset off) %} %} -// Riscv64 opto stubs need to write to the pc slot in the thread anchor +// RISCV opto stubs need to write to the pc slot in the thread anchor operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off) %{ constraint(ALLOC_IN_RC(ptr_reg)); @@ -4568,6 +4598,7 @@ instruct loadI(iRegINoSp dst, memory mem) format %{ "lw $dst, $mem\t# int, #@loadI" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); %} @@ -4583,6 +4614,7 @@ instruct loadI2L(iRegLNoSp dst, memory mem) format %{ "lw $dst, $mem\t# int, #@loadI2L" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); %} @@ -4613,6 +4645,7 @@ instruct loadL(iRegLNoSp dst, memory mem) format %{ "ld $dst, $mem\t# int, #@loadL" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); %} @@ -4643,6 +4676,7 @@ instruct loadP(iRegPNoSp dst, memory mem) format %{ "ld $dst, $mem\t# ptr, #@loadP" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); %} @@ -4673,6 +4707,7 @@ instruct loadKlass(iRegPNoSp dst, memory mem) format %{ "ld $dst, $mem\t# class, #@loadKlass" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); %} @@ -4718,6 +4753,7 @@ instruct loadD(fRegD dst, memory mem) format %{ "fld $dst, $mem\t# double, #@loadD" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); %} @@ -4732,7 +4768,7 @@ instruct loadConI(iRegINoSp dst, immI src) ins_cost(ALU_COST); format %{ "li $dst, $src\t# int, #@loadConI" %} - ins_encode(riscv64_enc_li_imm(dst, src)); + ins_encode(riscv_enc_li_imm(dst, src)); ins_pipe(ialu_imm); %} @@ -4745,7 +4781,7 @@ instruct loadConL(iRegLNoSp dst, immL src) ins_cost(ALU_COST); format %{ "li $dst, $src\t# long, #@loadConL" %} - ins_encode(riscv64_enc_li_imm(dst, src)); + ins_encode(riscv_enc_li_imm(dst, src)); ins_pipe(ialu_imm); %} @@ -4758,7 +4794,7 @@ instruct loadConP(iRegPNoSp dst, immP con) ins_cost(ALU_COST); format %{ "mv $dst, $con\t# ptr, #@loadConP" %} - ins_encode(riscv64_enc_mov_p(dst, con)); + ins_encode(riscv_enc_mov_p(dst, con)); ins_pipe(ialu_imm); %} @@ -4771,7 +4807,7 @@ instruct loadConP0(iRegPNoSp dst, immP0 con) ins_cost(ALU_COST); format %{ "mv $dst, $con\t# NULL ptr, #@loadConP0" %} - ins_encode(riscv64_enc_mov_zero(dst)); + ins_encode(riscv_enc_mov_zero(dst)); ins_pipe(ialu_imm); %} @@ -4784,7 +4820,7 @@ instruct loadConP1(iRegPNoSp dst, immP_1 con) ins_cost(ALU_COST); format %{ "mv $dst, $con\t# load ptr constant one, #@loadConP1" %} - ins_encode(riscv64_enc_mov_p1(dst)); + ins_encode(riscv_enc_mov_p1(dst)); ins_pipe(ialu_imm); %} @@ -4797,7 +4833,7 @@ instruct loadConPollPage(iRegPNoSp dst, immPollPage con) ins_cost(ALU_COST * 6); format %{ "movptr $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %} - ins_encode(riscv64_enc_mov_poll_page(dst, con)); + ins_encode(riscv_enc_mov_poll_page(dst, con)); ins_pipe(ialu_imm); %} @@ -4809,7 +4845,7 @@ instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con) ins_cost(ALU_COST); format %{ "mv $dst, $con\t# Byte Map Base, #@loadByteMapBase" %} - ins_encode(riscv64_enc_mov_byte_map_base(dst)); + ins_encode(riscv_enc_mov_byte_map_base(dst)); ins_pipe(ialu_imm); %} @@ -4822,7 +4858,7 @@ instruct loadConN(iRegNNoSp dst, immN con) ins_cost(ALU_COST * 4); format %{ "mv $dst, $con\t# compressed ptr, #@loadConN" %} - ins_encode(riscv64_enc_mov_n(dst, con)); + ins_encode(riscv_enc_mov_n(dst, con)); ins_pipe(ialu_imm); %} @@ -4835,7 +4871,7 @@ instruct loadConN0(iRegNNoSp dst, immN0 con) ins_cost(ALU_COST); format %{ "mv $dst, $con\t# compressed NULL ptr, #@loadConN0" %} - ins_encode(riscv64_enc_mov_zero(dst)); + ins_encode(riscv_enc_mov_zero(dst)); ins_pipe(ialu_imm); %} @@ -4848,7 +4884,7 @@ instruct loadConNKlass(iRegNNoSp dst, immNKlass con) ins_cost(ALU_COST * 6); format %{ "mv $dst, $con\t# compressed klass ptr, #@loadConNKlass" %} - ins_encode(riscv64_enc_mov_nk(dst, con)); + ins_encode(riscv_enc_mov_nk(dst, con)); ins_pipe(ialu_imm); %} @@ -5016,6 +5052,7 @@ instruct storeI(iRegIorL2I src, memory mem) format %{ "sw $src, $mem\t# int, #@storeI" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); %} @@ -5045,6 +5082,7 @@ instruct storeL(iRegL src, memory mem) format %{ "sd $src, $mem\t# long, #@storeL" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); %} @@ -5075,6 +5113,7 @@ instruct storeP(iRegP src, memory mem) format %{ "sd $src, $mem\t# ptr, #@storeP" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); %} @@ -5105,6 +5144,7 @@ instruct storeN(iRegN src, memory mem) format %{ "sw $src, $mem\t# compressed ptr, #@storeN" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); %} @@ -5115,7 +5155,7 @@ instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) %{ match(Set mem (StoreN mem zero)); predicate(Universe::narrow_oop_base() == NULL && - Universe::narrow_klass_base() == NULL); + Universe::narrow_klass_base() == NULL); ins_cost(STORE_COST); format %{ "sw rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %} @@ -5151,6 +5191,7 @@ instruct storeD(fRegD src, memory mem) format %{ "fsd $src, $mem\t# double, #@storeD" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); %} @@ -5166,6 +5207,7 @@ instruct storeNKlass(iRegN src, memory mem) format %{ "sw $src, $mem\t# compressed klass ptr, #@storeNKlass" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); %} @@ -5189,7 +5231,7 @@ instruct storeNKlass(iRegN src, memory mem) // only for 64-bit. // // We implement LoadPLocked and storePConditional instructions using, -// respectively the RISCV64 hw load-reserve and store-conditional +// respectively the RISCV hw load-reserve and store-conditional // instructions. Whereas we must implement each of // Store{IL}Conditional using a CAS which employs a pair of // instructions comprising a load-reserve followed by a @@ -5237,7 +5279,9 @@ instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFla %} // storeLConditional is used by PhaseMacroExpand::expand_lock_node -// when attempting to rebias a lock towards the current thread. +// when attempting to rebias a lock towards the current thread. We +// must use the acquire form of cmpxchg in order to guarantee acquire +// semantics in this case. instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{ match(Set cr (StoreLConditional mem (Binary oldval newval))); @@ -5282,13 +5326,14 @@ instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFl // standard CompareAndSwapX when we are using barriers // these have higher priority than the rules selected by a predicate -instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) %{ match(Set res (CompareAndSwapB mem (Binary oldval newval))); ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); - effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); format %{ "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" @@ -5304,13 +5349,14 @@ instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R1 ins_pipe(pipe_slow); %} -instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) %{ match(Set res (CompareAndSwapS mem (Binary oldval newval))); ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); - effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); format %{ "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" @@ -5337,7 +5383,7 @@ instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoS "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI" %} - ins_encode(riscv64_enc_cmpxchgw(res, mem, oldval, newval)); + ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval)); ins_pipe(pipe_slow); %} @@ -5353,7 +5399,7 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoS "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL" %} - ins_encode(riscv64_enc_cmpxchg(res, mem, oldval, newval)); + ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ins_pipe(pipe_slow); %} @@ -5369,7 +5415,7 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP" %} - ins_encode(riscv64_enc_cmpxchg(res, mem, oldval, newval)); + ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ins_pipe(pipe_slow); %} @@ -5385,7 +5431,7 @@ instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoS "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN" %} - ins_encode(riscv64_enc_cmpxchgn(res, mem, oldval, newval)); + ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval)); ins_pipe(pipe_slow); %} @@ -5394,7 +5440,7 @@ instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoS instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (CompareAndSwapB mem (Binary oldval newval))); @@ -5419,7 +5465,7 @@ instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (CompareAndSwapS mem (Binary oldval newval))); @@ -5443,7 +5489,7 @@ instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (CompareAndSwapI mem (Binary oldval newval))); @@ -5454,14 +5500,14 @@ instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegI "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq" %} - ins_encode(riscv64_enc_cmpxchgw_acq(res, mem, oldval, newval)); + ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval)); ins_pipe(pipe_slow); %} instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (CompareAndSwapL mem (Binary oldval newval))); @@ -5472,14 +5518,14 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegL "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq" %} - ins_encode(riscv64_enc_cmpxchg_acq(res, mem, oldval, newval)); + ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); ins_pipe(pipe_slow); %} instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (CompareAndSwapP mem (Binary oldval newval))); @@ -5490,14 +5536,14 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP new "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq" %} - ins_encode(riscv64_enc_cmpxchg_acq(res, mem, oldval, newval)); + ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); ins_pipe(pipe_slow); %} instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (CompareAndSwapN mem (Binary oldval newval))); @@ -5508,7 +5554,7 @@ instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegN "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq" %} - ins_encode(riscv64_enc_cmpxchgn_acq(res, mem, oldval, newval)); + ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval)); ins_pipe(pipe_slow); %} @@ -5646,7 +5692,7 @@ instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP ne instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (CompareAndExchangeB mem (Binary oldval newval))); @@ -5670,7 +5716,7 @@ instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (CompareAndExchangeS mem (Binary oldval newval))); @@ -5693,7 +5739,7 @@ instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (CompareAndExchangeI mem (Binary oldval newval))); @@ -5715,7 +5761,7 @@ instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (CompareAndExchangeL mem (Binary oldval newval))); @@ -5737,7 +5783,7 @@ instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (CompareAndExchangeN mem (Binary oldval newval))); @@ -5759,7 +5805,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (CompareAndExchangeP mem (Binary oldval newval))); @@ -5910,7 +5956,7 @@ instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP ne instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); @@ -5936,7 +5982,7 @@ instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); @@ -5961,7 +6007,7 @@ instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); @@ -5983,7 +6029,7 @@ instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); @@ -6005,7 +6051,7 @@ instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); @@ -6027,7 +6073,7 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); @@ -6109,7 +6155,7 @@ instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set prev (GetAndSetI mem newv)); @@ -6126,7 +6172,7 @@ instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set prev (GetAndSetL mem newv)); @@ -6143,7 +6189,7 @@ instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set prev (GetAndSetN mem newv)); @@ -6160,7 +6206,7 @@ instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set prev (GetAndSetP mem newv)); @@ -6305,7 +6351,7 @@ instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr) instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set newval (GetAndAddL mem incr)); @@ -6321,7 +6367,7 @@ instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %} instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{ - predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); + predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); match(Set dummy (GetAndAddL mem incr)); @@ -6338,7 +6384,7 @@ instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{ instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set newval (GetAndAddL mem incr)); @@ -6355,7 +6401,7 @@ instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr) instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr) %{ - predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); + predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); match(Set dummy (GetAndAddL mem incr)); @@ -6372,7 +6418,7 @@ instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr) instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set newval (GetAndAddI mem incr)); @@ -6389,7 +6435,7 @@ instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) %{ - predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); + predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); match(Set dummy (GetAndAddI mem incr)); @@ -6406,7 +6452,7 @@ instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_reserved(n)); match(Set newval (GetAndAddI mem incr)); @@ -6423,7 +6469,7 @@ instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr) instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr) %{ - predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); + predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); match(Set dummy (GetAndAddI mem incr)); @@ -6456,6 +6502,7 @@ instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ format %{ "addw $dst, $src1, $src2\t#@addI_reg_reg" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ addw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); @@ -6471,6 +6518,7 @@ instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); int32_t con = (int32_t)$src2$$constant; __ addiw(as_Register($dst$$reg), as_Register($src1$$reg), @@ -6487,6 +6535,7 @@ instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm_l2i" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ addiw(as_Register($dst$$reg), as_Register($src1$$reg), $src2$$constant); @@ -6503,6 +6552,7 @@ instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{ format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); @@ -6512,12 +6562,13 @@ instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{ %} // If we shift more than 32 bits, we need not convert I2L. -instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegIorL2I src, uimmI6_ge32 scale) %{ +instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{ match(Set dst (LShiftL (ConvI2L src) scale)); ins_cost(ALU_COST); format %{ "slli $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63); %} @@ -6533,6 +6584,7 @@ instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{ format %{ "addi $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); // src2 is imm, so actually call the addi __ add(as_Register($dst$$reg), as_Register($src1$$reg), @@ -6549,6 +6601,7 @@ instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ format %{ "add $dst, $src1, $src2\t#@addL_reg_reg" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); @@ -6564,6 +6617,7 @@ instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ format %{ "addi $dst, $src1, $src2\t#@addL_reg_imm" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); // src2 is imm, so actually call the addi __ add(as_Register($dst$$reg), as_Register($src1$$reg), @@ -6581,6 +6635,7 @@ instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ format %{ "subw $dst, $src1, $src2\t#@subI_reg_reg" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ subw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); @@ -6597,6 +6652,7 @@ instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{ format %{ "addiw $dst, $src1, -$src2\t#@subI_reg_imm" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); // src2 is imm, so actually call the addiw __ subw(as_Register($dst$$reg), as_Register($src1$$reg), @@ -6613,6 +6669,7 @@ instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ format %{ "sub $dst, $src1, $src2\t#@subL_reg_reg" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ sub(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); @@ -6628,6 +6685,7 @@ instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{ format %{ "addi $dst, $src1, -$src2\t#@subL_reg_imm" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); // src2 is imm, so actually call the addi __ sub(as_Register($dst$$reg), as_Register($src1$$reg), @@ -6687,21 +6745,6 @@ instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ins_pipe(imul_reg_reg); %} -instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ - match(Set dst (MulL (ConvI2L src1) (ConvI2L src2))); - ins_cost(IMUL_COST); - format %{ "mul $dst, $src1, $src2\t#@smulI" %} - - // Signed Multiply Long multiplies two 32-bit signed values to produce a 64-bit result. - ins_encode %{ - __ mul(as_Register($dst$$reg), - as_Register($src1$$reg), - as_Register($src2$$reg)); - %} - - ins_pipe(imul_reg_reg); -%} - // Long Multiply instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ @@ -6740,7 +6783,7 @@ instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ins_cost(IDIVSI_COST); format %{ "divw $dst, $src1, $src2\t#@divI"%} - ins_encode(riscv64_enc_divw(dst, src1, src2)); + ins_encode(riscv_enc_divw(dst, src1, src2)); ins_pipe(idiv_reg_reg); %} @@ -6762,7 +6805,7 @@ instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ins_cost(IDIVDI_COST); format %{ "div $dst, $src1, $src2\t#@divL" %} - ins_encode(riscv64_enc_div(dst, src1, src2)); + ins_encode(riscv_enc_div(dst, src1, src2)); ins_pipe(ldiv_reg_reg); %} @@ -6772,6 +6815,7 @@ instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{ format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63); %} ins_pipe(ialu_reg_shift); @@ -6784,7 +6828,7 @@ instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ins_cost(IDIVSI_COST); format %{ "remw $dst, $src1, $src2\t#@modI" %} - ins_encode(riscv64_enc_modw(dst, src1, src2)); + ins_encode(riscv_enc_modw(dst, src1, src2)); ins_pipe(ialu_reg_reg); %} @@ -6795,13 +6839,14 @@ instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ins_cost(IDIVDI_COST); format %{ "rem $dst, $src1, $src2\t#@modL" %} - ins_encode(riscv64_enc_mod(dst, src1, src2)); + ins_encode(riscv_enc_mod(dst, src1, src2)); ins_pipe(ialu_reg_reg); %} // Integer Shifts // Shift Left Register +// In RV64I, only the low 5 bits of src2 are considered for the shift amount instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ match(Set dst (LShiftI src1 src2)); ins_cost(ALU_COST); @@ -6834,6 +6879,7 @@ instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ %} // Shift Right Logical Register +// In RV64I, only the low 5 bits of src2 are considered for the shift amount instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ match(Set dst (URShiftI src1 src2)); ins_cost(ALU_COST); @@ -6866,6 +6912,7 @@ instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ %} // Shift Right Arithmetic Register +// In RV64I, only the low 5 bits of src2 are considered for the shift amount instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ match(Set dst (RShiftI src1 src2)); ins_cost(ALU_COST); @@ -6900,6 +6947,7 @@ instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ // Long Shifts // Shift Left Register +// In RV64I, only the low 6 bits of src2 are considered for the shift amount instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ match(Set dst (LShiftL src1 src2)); @@ -6923,6 +6971,7 @@ instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ format %{ "slli $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); // the shift amount is encoded in the lower // 6 bits of the I-immediate field for RV64I __ slli(as_Register($dst$$reg), @@ -6934,6 +6983,7 @@ instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ %} // Shift Right Logical Register +// In RV64I, only the low 6 bits of src2 are considered for the shift amount instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ match(Set dst (URShiftL src1 src2)); @@ -6957,6 +7007,7 @@ instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ format %{ "srli $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); // the shift amount is encoded in the lower // 6 bits of the I-immediate field for RV64I __ srli(as_Register($dst$$reg), @@ -6975,6 +7026,7 @@ instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{ format %{ "srli $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); // the shift amount is encoded in the lower // 6 bits of the I-immediate field for RV64I __ srli(as_Register($dst$$reg), @@ -6986,6 +7038,7 @@ instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{ %} // Shift Right Arithmetic Register +// In RV64I, only the low 6 bits of src2 are considered for the shift amount instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ match(Set dst (RShiftL src1 src2)); @@ -7009,6 +7062,7 @@ instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ format %{ "srai $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); // the shift amount is encoded in the lower // 6 bits of the I-immediate field for RV64I __ srai(as_Register($dst$$reg), @@ -7019,8 +7073,7 @@ instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ ins_pipe(ialu_reg_shift); %} -instruct regI_not_reg(iRegINoSp dst, - iRegIorL2I src1, immI_M1 m1) %{ +instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{ match(Set dst (XorI src1 m1)); ins_cost(ALU_COST); format %{ "xori $dst, $src1, -1\t#@regI_not_reg" %} @@ -7032,8 +7085,7 @@ instruct regI_not_reg(iRegINoSp dst, ins_pipe(ialu_reg); %} -instruct regL_not_reg(iRegLNoSp dst, - iRegL src1, immL_M1 m1) %{ +instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{ match(Set dst (XorL src1 m1)); ins_cost(ALU_COST); format %{ "xori $dst, $src1, -1\t#@regL_not_reg" %} @@ -7409,6 +7461,44 @@ instruct negD_reg_reg(fRegD dst, fRegD src) %{ ins_pipe(fp_uop_d); %} +instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{ + match(Set dst (AbsI src)); + + ins_cost(ALU_COST * 3); + format %{ + "sraiw t0, $src, 0x1f\n\t" + "addw $dst, $src, t0\n\t" + "xorr $dst, $dst, t0\t#@absI_reg" + %} + + ins_encode %{ + __ sraiw(t0, as_Register($src$$reg), 0x1f); + __ addw(as_Register($dst$$reg), as_Register($src$$reg), t0); + __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct absL_reg(iRegLNoSp dst, iRegL src) %{ + match(Set dst (AbsL src)); + + ins_cost(ALU_COST * 3); + format %{ + "srai t0, $src, 0x3f\n\t" + "add $dst, $src, t0\n\t" + "xorr $dst, $dst, t0\t#@absL_reg" + %} + + ins_encode %{ + __ srai(t0, as_Register($src$$reg), 0x3f); + __ add(as_Register($dst$$reg), as_Register($src$$reg), t0); + __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0); + %} + + ins_pipe(ialu_reg_reg); +%} + instruct absF_reg(fRegF dst, fRegF src) %{ match(Set dst (AbsF src)); @@ -7436,7 +7526,7 @@ instruct absD_reg(fRegD dst, fRegD src) %{ %} instruct sqrtF_reg(fRegF dst, fRegF src) %{ - match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + match(Set dst (SqrtF src)); ins_cost(FSQRT_COST); format %{ "fsqrt.s $dst, $src\t#@sqrtF_reg" %} @@ -7467,13 +7557,14 @@ instruct sqrtD_reg(fRegD dst, fRegD src) %{ // Logical Instructions // Register And -instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ +instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ match(Set dst (AndI src1 src2)); format %{ "andr $dst, $src1, $src2\t#@andI_reg_reg" %} ins_cost(ALU_COST); ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ andr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); @@ -7483,13 +7574,14 @@ instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ %} // Immediate And -instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{ +instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ match(Set dst (AndI src1 src2)); format %{ "andi $dst, $src1, $src2\t#@andI_reg_imm" %} ins_cost(ALU_COST); ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ andi(as_Register($dst$$reg), as_Register($src1$$reg), (int32_t)($src2$$constant)); @@ -7499,13 +7591,14 @@ instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{ %} // Register Or -instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ +instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ match(Set dst (OrI src1 src2)); format %{ "orr $dst, $src1, $src2\t#@orI_reg_reg" %} ins_cost(ALU_COST); ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ orr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); @@ -7515,7 +7608,7 @@ instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ %} // Immediate Or -instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{ +instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ match(Set dst (OrI src1 src2)); format %{ "ori $dst, $src1, $src2\t#@orI_reg_imm" %} @@ -7531,13 +7624,14 @@ instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{ %} // Register Xor -instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ +instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ match(Set dst (XorI src1 src2)); format %{ "xorr $dst, $src1, $src2\t#@xorI_reg_reg" %} ins_cost(ALU_COST); ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ xorr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); @@ -7547,7 +7641,7 @@ instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ %} // Immediate Xor -instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{ +instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ match(Set dst (XorI src1 src2)); format %{ "xori $dst, $src1, $src2\t#@xorI_reg_imm" %} @@ -7570,6 +7664,7 @@ instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ins_cost(ALU_COST); ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ andr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); @@ -7586,6 +7681,7 @@ instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ ins_cost(ALU_COST); ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ andi(as_Register($dst$$reg), as_Register($src1$$reg), (int32_t)($src2$$constant)); @@ -7602,6 +7698,7 @@ instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ins_cost(ALU_COST); ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ orr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); @@ -7634,6 +7731,7 @@ instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ins_cost(ALU_COST); ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ xorr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); @@ -7661,29 +7759,29 @@ instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ // ============================================================================ // BSWAP Instructions -instruct bytes_reverse_int(rFlagsReg cr, iRegINoSp dst, iRegIorL2I src) %{ +instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr) %{ match(Set dst (ReverseBytesI src)); effect(TEMP cr); - ins_cost(ALU_COST * 17); - format %{ "grevw $dst, $src\t#@bytes_reverse_int" %} + ins_cost(ALU_COST * 13); + format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int" %} ins_encode %{ - __ grevw(as_Register($dst$$reg), as_Register($src$$reg)); + __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); %} ins_pipe(ialu_reg); %} -instruct bytes_reverse_long(rFlagsReg cr, iRegLNoSp dst, iRegL src) %{ +instruct bytes_reverse_long(iRegLNoSp dst, iRegL src, rFlagsReg cr) %{ match(Set dst (ReverseBytesL src)); effect(TEMP cr); - ins_cost(ALU_COST * 45); - format %{ "grev $dst, $src\t#@bytes_reverse_long" %} + ins_cost(ALU_COST * 29); + format %{ "revb $dst, $src\t#@bytes_reverse_long" %} ins_encode %{ - __ grev(as_Register($dst$$reg), as_Register($src$$reg)); + __ revb(as_Register($dst$$reg), as_Register($src$$reg)); %} ins_pipe(ialu_reg); @@ -7693,10 +7791,10 @@ instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{ match(Set dst (ReverseBytesUS src)); ins_cost(ALU_COST * 5); - format %{ "grevhu $dst, $src\t#@bytes_reverse_unsigned_short" %} + format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short" %} ins_encode %{ - __ grevhu(as_Register($dst$$reg), as_Register($src$$reg)); + __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); %} ins_pipe(ialu_reg); @@ -7706,10 +7804,10 @@ instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{ match(Set dst (ReverseBytesS src)); ins_cost(ALU_COST * 5); - format %{ "grevh $dst, $src\t#@bytes_reverse_short" %} + format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short" %} ins_encode %{ - __ grevh(as_Register($dst$$reg), as_Register($src$$reg)); + __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); %} ins_pipe(ialu_reg); @@ -7834,6 +7932,7 @@ instruct castX2P(iRegPNoSp dst, iRegL src) %{ format %{ "mv $dst, $src\t# long -> ptr, #@castX2P" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); if ($dst$$reg != $src$$reg) { __ mv(as_Register($dst$$reg), as_Register($src$$reg)); } @@ -7849,6 +7948,7 @@ instruct castP2X(iRegLNoSp dst, iRegP src) %{ format %{ "mv $dst, $src\t# ptr -> long, #@castP2X" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); if ($dst$$reg != $src$$reg) { __ mv(as_Register($dst$$reg), as_Register($src$$reg)); } @@ -7894,7 +7994,7 @@ instruct checkCastPP(iRegPNoSp dst) // Convert Instructions // int to bool -instruct convI2Bool(iRegINoSp dst, iRegIorL2I src) +instruct convI2Bool(iRegINoSp dst, iRegI src) %{ match(Set dst (Conv2B src)); @@ -7930,7 +8030,7 @@ instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src) match(Set dst (ConvI2L src)); ins_cost(ALU_COST); - format %{ "addw $dst, $src\t#@convI2L_reg_reg" %} + format %{ "addw $dst, $src, zr\t#@convI2L_reg_reg" %} ins_encode %{ __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr); %} @@ -7950,18 +8050,17 @@ instruct convL2I_reg(iRegINoSp dst, iRegL src) %{ ins_pipe(ialu_reg); %} -// unsigned int to long (Zero-extend) -// this pattern occurs in bigmath arithmetic -instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) +// int to unsigned long (Zero-extend) +instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{ match(Set dst (AndL (ConvI2L src) mask)); ins_cost(ALU_COST * 2); - format %{ "slli $dst, $src, 32\t# ui2l\n\t" - "srli $dst, $dst, 32\t# ui2l, #@convUI2L_reg_reg" %} + format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %} ins_encode %{ - __ zero_ext(as_Register($dst$$reg), as_Register($src$$reg), 32); + Assembler::CompressibleRegion cr(&_masm); + __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32); %} ins_pipe(ialu_reg_shift); @@ -8111,15 +8210,12 @@ instruct convL2D_reg_reg(fRegD dst, iRegL src) %{ instruct convP2I(iRegINoSp dst, iRegP src) %{ match(Set dst (ConvL2I (CastP2X src))); - ins_cost(ALU_COST); - format %{ "mv $dst, $src\t# ptr -> int\n\t" - "slli $dst, $dst, 32\n\t" - "srli $dst, $dst, 32\t#@convP2I" - %} + ins_cost(ALU_COST * 2); + format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %} ins_encode %{ - __ mv($dst$$Register, $src$$Register); - __ clear_upper_bits($dst$$Register, 32); + Assembler::CompressibleRegion cr(&_masm); + __ zero_extend($dst$$Register, $src$$Register, 32); %} ins_pipe(ialu_reg); @@ -8136,6 +8232,7 @@ instruct convN2I(iRegINoSp dst, iRegN src) format %{ "mv $dst, $src\t# compressed ptr -> int, #@convN2I" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ mv($dst$$Register, $src$$Register); %} @@ -8201,26 +8298,7 @@ instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{ ins_pipe(ialu_reg); %} -instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{ - predicate(!maybe_use_tmp_register_decoding_klass()); - - match(Set dst (DecodeNKlass src)); - - ins_cost(ALU_COST); - format %{ "decode_klass_not_null $dst, $src\t#@decodeKlass_not_null" %} - - ins_encode %{ - Register src_reg = as_Register($src$$reg); - Register dst_reg = as_Register($dst$$reg); - __ decode_klass_not_null(dst_reg, src_reg, UseCompressedOops ? xheapbase : t0); - %} - - ins_pipe(ialu_reg); -%} - -instruct decodeKlass_not_null_with_tmp(iRegPNoSp dst, iRegN src, rFlagsReg tmp) %{ - predicate(maybe_use_tmp_register_decoding_klass()); - +instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src, iRegPNoSp tmp) %{ match(Set dst (DecodeNKlass src)); effect(TEMP tmp); @@ -8251,6 +8329,7 @@ instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{ format %{ "lw $dst, $src\t#@MoveF2I_stack_reg" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ lw(as_Register($dst$$reg), Address(sp, $src$$disp)); %} @@ -8287,6 +8366,7 @@ instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{ format %{ "ld $dst, $src\t#@MoveD2L_stack_reg" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ ld(as_Register($dst$$reg), Address(sp, $src$$disp)); %} @@ -8305,6 +8385,7 @@ instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{ format %{ "fld $dst, $src\t#@MoveL2D_stack_reg" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); %} @@ -8341,6 +8422,7 @@ instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{ format %{ "sw $src, $dst\t#@MoveI2F_reg_stack" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ sw(as_Register($src$$reg), Address(sp, $dst$$disp)); %} @@ -8359,6 +8441,7 @@ instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{ format %{ "fsd $dst, $src\t#@MoveD2L_reg_stack" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); %} @@ -8377,6 +8460,7 @@ instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{ format %{ "sd $src, $dst\t#@MoveL2D_reg_stack" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); __ sd(as_Register($src$$reg), Address(sp, $dst$$disp)); %} @@ -8515,7 +8599,7 @@ instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2) ins_pipe(pipe_class_default); %} -instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q) +instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q) %{ match(Set dst (CmpLTMask p q)); @@ -8623,7 +8707,7 @@ instruct branch(label lbl) ins_cost(BRANCH_COST); format %{ "j $lbl\t#@branch" %} - ins_encode(riscv64_enc_j(lbl)); + ins_encode(riscv_enc_j(lbl)); ins_pipe(pipe_branch); %} @@ -9573,7 +9657,7 @@ instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, lab format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %} - ins_encode(riscv64_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); + ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ins_pipe(pipe_cmpz_branch); %} @@ -9588,7 +9672,7 @@ instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %} - ins_encode(riscv64_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); + ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ins_pipe(pipe_cmpz_branch); %} @@ -9672,7 +9756,7 @@ instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, la format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %} - ins_encode(riscv64_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); + ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ins_pipe(pipe_cmpz_branch); %} @@ -9687,7 +9771,7 @@ instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, labe format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %} - ins_encode(riscv64_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); + ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ins_pipe(pipe_cmpz_branch); %} @@ -9807,7 +9891,7 @@ instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t" "mv $dst, $src\n\t" "skip:" - %} + %} ins_encode %{ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, @@ -9898,7 +9982,8 @@ instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) // Procedure Call/Return Instructions // Call Java Static Instruction - +// Note: If this code changes, the corresponding ret_addr_offset() and +// compute_padding() functions will have to be adjusted. instruct CallStaticJavaDirect(method meth) %{ match(CallStaticJava); @@ -9909,15 +9994,18 @@ instruct CallStaticJavaDirect(method meth) format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %} - ins_encode( riscv64_enc_java_static_call(meth), - riscv64_enc_call_epilog ); + ins_encode(riscv_enc_java_static_call(meth), + riscv_enc_call_epilog); ins_pipe(pipe_class_call); + ins_alignment(4); %} // TO HERE // Call Java Dynamic Instruction +// Note: If this code changes, the corresponding ret_addr_offset() and +// compute_padding() functions will have to be adjusted. instruct CallDynamicJavaDirect(method meth, rFlagsReg cr) %{ match(CallDynamicJava); @@ -9928,10 +10016,11 @@ instruct CallDynamicJavaDirect(method meth, rFlagsReg cr) format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %} - ins_encode( riscv64_enc_java_dynamic_call(meth), - riscv64_enc_call_epilog ); + ins_encode(riscv_enc_java_dynamic_call(meth), + riscv_enc_call_epilog); ins_pipe(pipe_class_call); + ins_alignment(4); %} // Call Runtime Instruction @@ -9946,7 +10035,7 @@ instruct CallRuntimeDirect(method meth, rFlagsReg cr) format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %} - ins_encode( riscv64_enc_java_to_runtime(meth) ); + ins_encode(riscv_enc_java_to_runtime(meth)); ins_pipe(pipe_class_call); %} @@ -9963,7 +10052,7 @@ instruct CallLeafDirect(method meth, rFlagsReg cr) format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %} - ins_encode( riscv64_enc_java_to_runtime(meth) ); + ins_encode(riscv_enc_java_to_runtime(meth)); ins_pipe(pipe_class_call); %} @@ -9980,7 +10069,7 @@ instruct CallLeafNoFPDirect(method meth, rFlagsReg cr) format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %} - ins_encode( riscv64_enc_java_to_runtime(meth) ); + ins_encode(riscv_enc_java_to_runtime(meth)); ins_pipe(pipe_class_call); %} @@ -9993,31 +10082,31 @@ instruct CallLeafNoFPDirect(method meth, rFlagsReg cr) // gen_subtype_check()). Return zero for a hit. The encoding // ALSO sets flags. -instruct partialSubtypeCheck(rFlagsReg cr, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result) +instruct partialSubtypeCheck(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, rFlagsReg cr) %{ match(Set result (PartialSubtypeCheck sub super)); - effect(KILL temp, KILL cr); + effect(KILL tmp, KILL cr); ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %} - ins_encode(riscv64_enc_partial_subtype_check(sub, super, temp, result)); + ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result)); opcode(0x1); // Force zero of result reg on hit ins_pipe(pipe_class_memory); %} -instruct partialSubtypeCheckVsZero(iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result, - immP0 zero, rFlagsReg cr) +instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, + immP0 zero, rFlagsReg cr) %{ match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); - effect(KILL temp, KILL result); + effect(KILL tmp, KILL result); ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %} - ins_encode(riscv64_enc_partial_subtype_check(sub, super, temp, result)); + ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result)); opcode(0x0); // Don't zero result reg on hit @@ -10028,7 +10117,7 @@ instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) %{ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); - match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} @@ -10046,7 +10135,7 @@ instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) %{ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); - match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} @@ -10063,7 +10152,7 @@ instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) %{ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); - match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} @@ -10081,7 +10170,7 @@ instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_ rFlagsReg cr) %{ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); - match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} @@ -10095,15 +10184,15 @@ instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_ %} instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, - iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, - iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp) + iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, + iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr) %{ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, - TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp); - format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %} + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %} ins_encode %{ __ string_indexof($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, @@ -10116,15 +10205,15 @@ instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_ %} instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, - iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, - iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp) + iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, + iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr) %{ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, - TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp); - format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %} + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %} ins_encode %{ __ string_indexof($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, @@ -10137,13 +10226,13 @@ instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_ %} instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, - iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, - iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp) + iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, + iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr) %{ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, - TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp); + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %} ins_encode %{ @@ -10158,13 +10247,14 @@ instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_ %} instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, - immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, - iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) + immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) %{ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, - TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %} ins_encode %{ @@ -10179,15 +10269,15 @@ instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, %} instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, - immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, - iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) + immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) %{ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, - TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); - format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %} + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %} ins_encode %{ int icnt2 = (int)$int_cnt2$$constant; __ string_indexof_linearscan($str1$$Register, $str2$$Register, @@ -10200,15 +10290,15 @@ instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, %} instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, - immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, - iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) + immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) %{ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, - TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); - format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %} + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %} ins_encode %{ int icnt2 = (int)$int_cnt2$$constant; __ string_indexof_linearscan($str1$$Register, $str2$$Register, @@ -10220,8 +10310,26 @@ instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, ins_pipe(pipe_class_memory); %} +instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +%{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + + format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} + ins_encode %{ + __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, + $result$$Register, $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, false /* isU */); + %} + ins_pipe(pipe_class_memory); +%} + + // clearing of an array -instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) +instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy) %{ match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base); @@ -10242,8 +10350,7 @@ instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, rFlag instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) %{ - predicate((uint64_t)n->in(2)->get_long() - < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); + predicate((uint64_t)n->in(2)->get_long() < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); match(Set dummy (ClearArray cnt base)); effect(USE_KILL base, KILL cr); @@ -10291,34 +10398,34 @@ instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, - iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr) + iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) %{ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (AryEq ary1 ary2)); - effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); - format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} + format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp5" %} ins_encode %{ __ arrays_equals($ary1$$Register, $ary2$$Register, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, - $result$$Register, $tmp$$Register, 1); + $result$$Register, $tmp5$$Register, 1); %} ins_pipe(pipe_class_memory); %} instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, - iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr) + iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) %{ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (AryEq ary1 ary2)); - effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); - format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} + format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp5" %} ins_encode %{ __ arrays_equals($ary1$$Register, $ary2$$Register, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, - $result$$Register, $tmp$$Register, 2); + $result$$Register, $tmp5$$Register, 2); %} ins_pipe(pipe_class_memory); %} @@ -10359,49 +10466,31 @@ instruct tlsLoadP(javaThread_RegP dst) ins_pipe(pipe_class_empty); %} -// Thread refetch: -// take two main arguments: -// 1. register @rthread -// 2. one register which contains the `Coroutine *` -// and move Coroutine->_thread to @rthread -instruct tlsRefetchP(javaThread_RegP dst, iRegP src) -%{ - match(Set dst (ThreadRefetch src)); - - format %{ "Refetch the rthread register" %} - - ins_encode %{ - __ ld(xthread, Address($src$$Register, Coroutine::thread_offset())); - %} - - ins_pipe(pipe_class_empty); -%} - // inlined locking and unlocking // using t1 as the 'flag' register to bridge the BoolNode producers and consumers -instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) +instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{ match(Set cr (FastLock object box)); - effect(TEMP tmp, TEMP tmp2); + effect(TEMP tmp1, TEMP tmp2); ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3); - format %{ "fastlock $object,$box\t! kills $tmp,$tmp2, #@cmpFastLock" %} + format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2, #@cmpFastLock" %} - ins_encode(riscv64_enc_fast_lock(object, box, tmp, tmp2)); + ins_encode(riscv_enc_fast_lock(object, box, tmp1, tmp2)); ins_pipe(pipe_serial); %} // using t1 as the 'flag' register to bridge the BoolNode producers and consumers -instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) +instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{ match(Set cr (FastUnlock object box)); - effect(TEMP tmp, TEMP tmp2); + effect(TEMP tmp1, TEMP tmp2); ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4); - format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2, #@cmpFastUnlock" %} + format %{ "fastunlock $object,$box\t! kills $tmp1, $tmp2, #@cmpFastUnlock" %} - ins_encode(riscv64_enc_fast_unlock(object, box, tmp, tmp2)); + ins_encode(riscv_enc_fast_unlock(object, box, tmp1, tmp2)); ins_pipe(pipe_serial); %} @@ -10418,7 +10507,7 @@ instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop) format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %} - ins_encode(riscv64_enc_tail_call(jump_target)); + ins_encode(riscv_enc_tail_call(jump_target)); ins_pipe(pipe_class_call); %} @@ -10431,7 +10520,7 @@ instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop) format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %} - ins_encode(riscv64_enc_tail_jmp(jump_target)); + ins_encode(riscv_enc_tail_jmp(jump_target)); ins_pipe(pipe_class_call); %} @@ -10463,13 +10552,13 @@ instruct RethrowException() format %{ "j rethrow_stub\t#@RethrowException" %} - ins_encode( riscv64_enc_rethrow() ); + ins_encode(riscv_enc_rethrow()); ins_pipe(pipe_class_call); %} // Return Instruction -// epilog node loads ret address into lr as part of frame pop +// epilog node loads ret address into ra as part of frame pop instruct Ret() %{ match(Return); @@ -10477,7 +10566,7 @@ instruct Ret() ins_cost(BRANCH_COST); format %{ "ret\t// return register, #@Ret" %} - ins_encode(riscv64_enc_ret()); + ins_encode(riscv_enc_ret()); ins_pipe(pipe_branch); %} @@ -10491,6 +10580,7 @@ instruct ShouldNotReachHere() %{ format %{ "#@ShouldNotReachHere" %} ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); if (is_reachable()) { __ halt(); } diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad new file mode 100644 index 00000000000..b9e04c432e1 --- /dev/null +++ b/src/hotspot/cpu/riscv/riscv_b.ad @@ -0,0 +1,451 @@ +// +// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// RISCV Bit-Manipulation Extension Architecture Description File + +// Convert oop into int for vectors alignment masking +instruct convP2I_rvb(iRegINoSp dst, iRegP src) %{ + predicate(UseRVB); + match(Set dst (ConvL2I (CastP2X src))); + + format %{ "zext.w $dst, $src\t# ptr -> int @convP2I_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ + __ zext_w(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// byte to int +instruct convB2I_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{ + predicate(UseRVB); + match(Set dst (RShiftI (LShiftI src lshift) rshift)); + + format %{ "sext.b $dst, $src\t# b2i, #@convB2I_reg_reg_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ + __ sext_b(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// int to short +instruct convI2S_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{ + predicate(UseRVB); + match(Set dst (RShiftI (LShiftI src lshift) rshift)); + + format %{ "sext.h $dst, $src\t# i2s, #@convI2S_reg_reg_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ + __ sext_h(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// short to unsigned int +instruct convS2UI_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{ + predicate(UseRVB); + match(Set dst (AndI src mask)); + + format %{ "zext.h $dst, $src\t# s2ui, #@convS2UI_reg_reg_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ + __ zext_h(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// int to unsigned long (zero extend) +instruct convI2UL_reg_reg_rvb(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{ + predicate(UseRVB); + match(Set dst (AndL (ConvI2L src) mask)); + + format %{ "zext.w $dst, $src\t# i2ul, #@convI2UL_reg_reg_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ + __ zext_w(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg_shift); +%} + +// BSWAP instructions +instruct bytes_reverse_int_rvb(iRegINoSp dst, iRegIorL2I src) %{ + predicate(UseRVB); + match(Set dst (ReverseBytesI src)); + + ins_cost(ALU_COST * 2); + format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_rvb" %} + + ins_encode %{ + __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_long_rvb(iRegLNoSp dst, iRegL src) %{ + predicate(UseRVB); + match(Set dst (ReverseBytesL src)); + + ins_cost(ALU_COST); + format %{ "rev8 $dst, $src\t#@bytes_reverse_long_rvb" %} + + ins_encode %{ + __ rev8(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_unsigned_short_rvb(iRegINoSp dst, iRegIorL2I src) %{ + predicate(UseRVB); + match(Set dst (ReverseBytesUS src)); + + ins_cost(ALU_COST * 2); + format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_rvb" %} + + ins_encode %{ + __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_short_rvb(iRegINoSp dst, iRegIorL2I src) %{ + predicate(UseRVB); + match(Set dst (ReverseBytesS src)); + + ins_cost(ALU_COST * 2); + format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_rvb" %} + + ins_encode %{ + __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// Shift Add Pointer +instruct shaddP_reg_reg_rvb(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{ + predicate(UseRVB); + match(Set dst (AddP src1 (LShiftL src2 imm))); + + ins_cost(ALU_COST); + format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_rvb" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + t0, + $imm$$constant); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct shaddP_reg_reg_ext_rvb(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{ + predicate(UseRVB); + match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm))); + + ins_cost(ALU_COST); + format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_rvb" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + t0, + $imm$$constant); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Shift Add Long +instruct shaddL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{ + predicate(UseRVB); + match(Set dst (AddL src1 (LShiftL src2 imm))); + + ins_cost(ALU_COST); + format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_rvb" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + t0, + $imm$$constant); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct shaddL_reg_reg_ext_rvb(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{ + predicate(UseRVB); + match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm))); + + ins_cost(ALU_COST); + format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_rvb" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + t0, + $imm$$constant); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Zeros Count instructions +instruct countLeadingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{ + predicate(UseRVB); + match(Set dst (CountLeadingZerosI src)); + + ins_cost(ALU_COST); + format %{ "clzw $dst, $src\t#@countLeadingZerosI_rvb" %} + + ins_encode %{ + __ clzw(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL_rvb(iRegINoSp dst, iRegL src) %{ + predicate(UseRVB); + match(Set dst (CountLeadingZerosL src)); + + ins_cost(ALU_COST); + format %{ "clz $dst, $src\t#@countLeadingZerosL_rvb" %} + + ins_encode %{ + __ clz(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{ + predicate(UseRVB); + match(Set dst (CountTrailingZerosI src)); + + ins_cost(ALU_COST); + format %{ "ctzw $dst, $src\t#@countTrailingZerosI_rvb" %} + + ins_encode %{ + __ ctzw(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL_rvb(iRegINoSp dst, iRegL src) %{ + predicate(UseRVB); + match(Set dst (CountTrailingZerosL src)); + + ins_cost(ALU_COST); + format %{ "ctz $dst, $src\t#@countTrailingZerosL_rvb" %} + + ins_encode %{ + __ ctz(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// Population Count instructions +instruct popCountI_rvb(iRegINoSp dst, iRegIorL2I src) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI src)); + + ins_cost(ALU_COST); + format %{ "cpopw $dst, $src\t#@popCountI_rvb" %} + + ins_encode %{ + __ cpopw(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// Note: Long/bitCount(long) returns an int. +instruct popCountL_rvb(iRegINoSp dst, iRegL src) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL src)); + + ins_cost(ALU_COST); + format %{ "cpop $dst, $src\t#@popCountL_rvb" %} + + ins_encode %{ + __ cpop(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// Max and Min +instruct minI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{ + predicate(UseRVB); + match(Set dst (MinI src1 src2)); + + ins_cost(ALU_COST); + format %{ "min $dst, $src1, $src2\t#@minI_reg_rvb" %} + + ins_encode %{ + __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct maxI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{ + predicate(UseRVB); + match(Set dst (MaxI src1 src2)); + + ins_cost(ALU_COST); + format %{ "max $dst, $src1, $src2\t#@maxI_reg_rvb" %} + + ins_encode %{ + __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Abs +instruct absI_reg_rvb(iRegINoSp dst, iRegI src) %{ + predicate(UseRVB); + match(Set dst (AbsI src)); + + ins_cost(ALU_COST * 2); + format %{ + "negw t0, $src\n\t" + "max $dst, $src, t0\t#@absI_reg_rvb" + %} + + ins_encode %{ + __ negw(t0, as_Register($src$$reg)); + __ max(as_Register($dst$$reg), as_Register($src$$reg), t0); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct absL_reg_rvb(iRegLNoSp dst, iRegL src) %{ + predicate(UseRVB); + match(Set dst (AbsL src)); + + ins_cost(ALU_COST * 2); + format %{ + "neg t0, $src\n\t" + "max $dst, $src, t0\t#@absL_reg_rvb" + %} + + ins_encode %{ + __ neg(t0, as_Register($src$$reg)); + __ max(as_Register($dst$$reg), as_Register($src$$reg), t0); + %} + + ins_pipe(ialu_reg); +%} + +// And Not +instruct andnI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ + predicate(UseRVB); + match(Set dst (AndI src1 (XorI src2 m1))); + + ins_cost(ALU_COST); + format %{ "andn $dst, $src1, $src2\t#@andnI_reg_reg_rvb" %} + + ins_encode %{ + __ andn(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct andnL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ + predicate(UseRVB); + match(Set dst (AndL src1 (XorL src2 m1))); + + ins_cost(ALU_COST); + format %{ "andn $dst, $src1, $src2\t#@andnL_reg_reg_rvb" %} + + ins_encode %{ + __ andn(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Or Not +instruct ornI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ + predicate(UseRVB); + match(Set dst (OrI src1 (XorI src2 m1))); + + ins_cost(ALU_COST); + format %{ "orn $dst, $src1, $src2\t#@ornI_reg_reg_rvb" %} + + ins_encode %{ + __ orn(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct ornL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ + predicate(UseRVB); + match(Set dst (OrL src1 (XorL src2 m1))); + + ins_cost(ALU_COST); + format %{ "orn $dst, $src1, $src2\t#@ornL_reg_reg_rvb" %} + + ins_encode %{ + __ orn(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} \ No newline at end of file diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp index b5d2df2deb7..dc3ac548d73 100644 --- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -71,7 +71,7 @@ class SimpleRuntimeFrame { // The frame sender code expects that fp will be in the "natural" place and // will override any oopMap setting for it. We must therefore force the layout // so that it agrees with the frame sender code. - // we don't expect any arg reg save area so riscv64 asserts that + // we don't expect any arg reg save area so riscv asserts that // frame::arg_reg_save_area_bytes == 0 fp_off = 0, fp_off2, return_off, return_off2, @@ -81,57 +81,56 @@ class SimpleRuntimeFrame { class RegisterSaver { public: - static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); - static void restore_live_registers(MacroAssembler* masm); + RegisterSaver() {} + ~RegisterSaver() {} + OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); + void restore_live_registers(MacroAssembler* masm); // Offsets into the register save area // Used by deoptimization when it is managing result register // values on its own - // gregs:30, float_register:32; except: x1(ra) & x2(sp) + // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4) // |---f0---|<---SP // |---f1---| // | .. | // |---f31--| - // |---zr---| - // |---x3---| - // | x4 | + // |---reserved slot for stack alignment---| + // |---x5---| + // | x6 | // |---.. --| // |---x31--| // |---fp---| // |---ra---| - static int reg_offset_in_bytes(Register r) { - assert (r->encoding() > 2, "ra and sp not saved"); - return (32 /* floats*/ + r->encoding() - 2 /* x1, x2*/) * wordSize; + int f0_offset_in_bytes(void) { + return 0; + } + int reserved_slot_offset_in_bytes(void) { + return f0_offset_in_bytes() + + FloatRegisterImpl::max_slots_per_register * + FloatRegisterImpl::number_of_registers * + BytesPerInt; + } + + int reg_offset_in_bytes(Register r) { + assert (r->encoding() > 4, "ra, sp, gp and tp not saved"); + return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize; + } + + int freg_offset_in_bytes(FloatRegister f) { + return f0_offset_in_bytes() + f->encoding() * wordSize; + } + + int ra_offset_in_bytes(void) { + return reserved_slot_offset_in_bytes() + + (RegisterImpl::number_of_registers - 3) * + RegisterImpl::max_slots_per_register * + BytesPerInt; } - static int x10_offset_in_bytes(void) { return reg_offset_in_bytes(x10); } // x10 - static int xmethod_offset_in_bytes(void) { return reg_offset_in_bytes(xmethod); } // x31 - static int tmp0_offset_in_bytes(void) { return reg_offset_in_bytes(t0); } // x5 - static int f0_offset_in_bytes(void) { return 0; } - static int f10_offset_in_bytes(void) { return 10 /* floats*/ * wordSize; } - static int return_offset_in_bytes(void) { return return_off * BytesPerInt; } - - // During deoptimization only the result registers need to be restored, - // all the other values have already been extracted. - static void restore_result_registers(MacroAssembler* masm); - - // Capture info about frame layout - enum layout { - fpu_state_off = 0, - fpu_state_end = fpu_state_off + FPUStateSizeInWords - 1, - // The frame sender code expects that fp will be in - // the "natural" place and will override any oopMap - // setting for it. We must therefore force the layout - // so that it agrees with the frame sender code. - x0_off = fpu_state_off + FPUStateSizeInWords, - fp_off = x0_off + 30 * 2, - return_off = fp_off + 2, // slot for return address - reg_save_size = return_off + 2 - }; }; OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { assert_cond(masm != NULL && total_frame_words != NULL); - int frame_size_in_bytes = align_up(additional_frame_words * wordSize + reg_save_size * BytesPerInt, 16); + int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16); // OopMap frame size is in compiler stack slots (jint's) not bytes or words int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; // The caller will allocate additional_frame_words @@ -153,20 +152,25 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ OopMap* oop_map = new OopMap(frame_size_in_slots, 0); assert_cond(oop_maps != NULL && oop_map != NULL); - // ignore zr, ra and sp, being ignored also by push_CPU_state (pushing zr only for stack alignment) - for (int i = 3; i < RegisterImpl::number_of_registers; i++) { - Register r = as_Register(i); - if (r != xthread && r != t0 && r != t1) { - int sp_offset = 2 * ((i - 2) + 32); // SP offsets are in 4-byte words, register slots are 8 bytes - // wide, 32 floating-point registers - oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots), r->as_VMReg()); - } - } + int sp_offset_in_slots = 0; + int step_in_slots = 0; - for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) { + step_in_slots = FloatRegisterImpl::max_slots_per_register; + for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { FloatRegister r = as_FloatRegister(i); - int sp_offset = 2 * i; - oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); + } + + step_in_slots = RegisterImpl::max_slots_per_register; + // skip the slot reserved for alignment, see MacroAssembler::push_reg; + // also skip x5 ~ x6 on the stack because they are caller-saved registers. + sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3; + // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack. + for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { + Register r = as_Register(i); + if (r != xthread) { + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg()); + } } return oop_map; @@ -178,37 +182,18 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm) { __ leave(); } -void RegisterSaver::restore_result_registers(MacroAssembler* masm) { - // Just restore result register. Only used by deoptimization. By - // now any callee save register that needs to be restored to a c2 - // caller of the deoptee has been extracted into the vframeArray - // and will be stuffed into the c2i adapter we create for later - // restoration so only result registers need to be restored here. - assert_cond(masm != NULL); - // Restore fp result register - __ fld(f10, Address(sp, f10_offset_in_bytes())); - // Restore integer result register - __ ld(x10, Address(sp, x10_offset_in_bytes())); - - // Pop all of the register save are off the stack - __ add(sp, sp, align_up(return_offset_in_bytes(), 16)); -} - // Is vector's size (in bytes) bigger than a size saved by default? -// 8 bytes vector registers are saved by default on riscv64. bool SharedRuntime::is_wide_vector(int size) { - return size > 8; + return false; } size_t SharedRuntime::trampoline_size() { - // Byte size of function generate_trampoline. movptr_with_offset: 5 instructions, jalr: 1 instrction - return 6 * NativeInstruction::instruction_size; // lui + addi + slli + addi + slli + jalr + return 6 * NativeInstruction::instruction_size; } void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { - assert_cond(masm != NULL); int32_t offset = 0; - __ movptr_with_offset(t0, destination, offset); // lui + addi + slli + addi + slli + __ movptr_with_offset(t0, destination, offset); __ jalr(x0, t0, offset); } @@ -217,9 +202,9 @@ void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destinatio // (like the placement of the register window) the slots must be biased by // the following value. static int reg2offset_in(VMReg r) { - // Account for saved fp and lr + // Account for saved fp and ra // This should really be in_preserve_stack_slots - return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size; + return r->reg2stack() * VMRegImpl::stack_slot_size; } static int reg2offset_out(VMReg r) { @@ -341,11 +326,15 @@ static void patch_callers_callsite(MacroAssembler *masm) { #endif __ mv(c_rarg0, xmethod); - __ mv(c_rarg1, lr); + __ mv(c_rarg1, ra); int32_t offset = 0; __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset); __ jalr(x1, t0, offset); - __ ifence(); + + // Explicit fence.i required because fixup_callers_callsite may change the code + // stream. + __ safepoint_ifence(); + __ pop_CPU_state(); // restore sp __ leave(); @@ -644,7 +633,7 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, VMRegPair *regs2, int total_args_passed) { - assert(regs2 == NULL, "not needed on riscv64"); + assert(regs2 == NULL, "not needed on riscv"); // We return the amount of VMRegImpl stack slots we need to reserve for all // the arguments NOT counting out_preserve_stack_slots. @@ -910,14 +899,14 @@ void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, // which by this time is free to use switch (ret_type) { case T_FLOAT: - __ fsw(f10, Address(fp, -wordSize)); + __ fsw(f10, Address(fp, -3 * wordSize)); break; case T_DOUBLE: - __ fsd(f10, Address(fp, -wordSize)); + __ fsd(f10, Address(fp, -3 * wordSize)); break; case T_VOID: break; default: { - __ sd(x10, Address(fp, -wordSize)); + __ sd(x10, Address(fp, -3 * wordSize)); } } } @@ -928,14 +917,14 @@ void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_ty // which by this time is free to use switch (ret_type) { case T_FLOAT: - __ flw(f10, Address(fp, -wordSize)); + __ flw(f10, Address(fp, -3 * wordSize)); break; case T_DOUBLE: - __ fld(f10, Address(fp, -wordSize)); + __ fld(f10, Address(fp, -3 * wordSize)); break; case T_VOID: break; default: { - __ ld(x10, Address(fp, -wordSize)); + __ ld(x10, Address(fp, -3 * wordSize)); } } } @@ -975,87 +964,7 @@ static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMR } } -// Check GCLocker::needs_gc and enter the runtime if it's true. This -// keeps a new JNI critical region from starting until a GC has been -// forced. Save down any oops in registers and describe them in an -// OopMap. -static void check_needs_gc_for_critical_native(MacroAssembler* masm, - int stack_slots, - int total_c_args, - int total_in_args, - int arg_save_area, - OopMapSet* oop_maps, - VMRegPair* in_regs, - BasicType* in_sig_bt) { Unimplemented(); } - -// Unpack an array argument into a pointer to the body and the length -// if the array is non-null, otherwise pass 0 for both. -static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); } - -class ComputeMoveOrder: public StackObj { - class MoveOperation: public ResourceObj { - friend class ComputeMoveOrder; - private: - VMRegPair _src; - VMRegPair _dst; - int _src_index; - int _dst_index; - bool _processed; - MoveOperation* _next; - MoveOperation* _prev; - - static int get_id(VMRegPair r) { Unimplemented(); return 0; } - - public: - MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst): - _src(src) - , _dst(dst) - , _src_index(src_index) - , _dst_index(dst_index) - , _processed(false) - , _next(NULL) - , _prev(NULL) { Unimplemented(); } - - ~MoveOperation() { - _next = NULL; - _prev = NULL; - } - - VMRegPair src() const { Unimplemented(); return _src; } - int src_id() const { Unimplemented(); return 0; } - int src_index() const { Unimplemented(); return 0; } - VMRegPair dst() const { Unimplemented(); return _src; } - void set_dst(int i, VMRegPair dst) { Unimplemented(); } - int dst_index() const { Unimplemented(); return 0; } - int dst_id() const { Unimplemented(); return 0; } - MoveOperation* next() const { Unimplemented(); return 0; } - MoveOperation* prev() const { Unimplemented(); return 0; } - void set_processed() { Unimplemented(); } - bool is_processed() const { Unimplemented(); return 0; } - - // insert - void break_cycle(VMRegPair temp_register) { Unimplemented(); } - - void link(GrowableArray& killer) { Unimplemented(); } - }; - - private: - GrowableArray edges; - - public: - ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs, - BasicType* in_sig_bt, GrowableArray& arg_order, VMRegPair tmp_vmreg) { Unimplemented(); } - - ~ComputeMoveOrder() {} - // Collected all the move operations - void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { Unimplemented(); } - - // Walk the edges breaking cycles between moves. The result list - // can be walked in order to produce the proper set of loads - GrowableArray* get_store_order(VMRegPair temp_register) { Unimplemented(); return 0; } -}; - -static void rt_call(MacroAssembler* masm, address dest, int gpargs, int fpargs, int type) { +static void rt_call(MacroAssembler* masm, address dest) { assert_cond(masm != NULL); CodeBlob *cb = CodeCache::find_blob(dest); if (cb) { @@ -1064,7 +973,6 @@ static void rt_call(MacroAssembler* masm, address dest, int gpargs, int fpargs, int32_t offset = 0; __ la_patchable(t0, RuntimeAddress(dest), offset); __ jalr(x1, t0, offset); - __ ifence(); } } @@ -1208,12 +1116,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, in_ByteSize(-1), (OopMapSet*)NULL); } - bool is_critical_native = true; - address native_func = critical_entry; - if (native_func == NULL) { - native_func = method->native_function(); - is_critical_native = false; - } + address native_func = method->native_function(); assert(native_func != NULL, "must have function"); // An OopMap for lock (and class if static) @@ -1228,70 +1131,20 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // the hidden arguments as arg[0] and possibly arg[1] (static method) const int total_in_args = method->size_of_parameters(); - int total_c_args = total_in_args; - if (!is_critical_native) { - total_c_args += 1; - if (method->is_static()) { - total_c_args++; - } - } else { - for (int i = 0; i < total_in_args; i++) { - if (in_sig_bt[i] == T_ARRAY) { - total_c_args++; - } - } - } + int total_c_args = total_in_args + (method->is_static() ? 2 : 1); BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); - assert_cond(out_sig_bt != NULL && out_regs != NULL); BasicType* in_elem_bt = NULL; int argc = 0; - if (!is_critical_native) { - out_sig_bt[argc++] = T_ADDRESS; - if (method->is_static()) { - out_sig_bt[argc++] = T_OBJECT; - } + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } - for (int i = 0; i < total_in_args ; i++) { - out_sig_bt[argc++] = in_sig_bt[i]; - } - } else { - Thread* THREAD = Thread::current(); - in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); - assert_cond(in_elem_bt != NULL); - SignatureStream ss(method->signature()); - for (int i = 0; i < total_in_args ; i++) { - if (in_sig_bt[i] == T_ARRAY) { - // Arrays are passed as int, elem* pair - out_sig_bt[argc++] = T_INT; - out_sig_bt[argc++] = T_ADDRESS; - Symbol* atype = ss.as_symbol(CHECK_NULL); - const char* at = atype->as_C_string(); - if (strlen(at) == 2) { - assert(at[0] == '[', "must be"); - switch (at[1]) { - case 'B': in_elem_bt[i] = T_BYTE; break; - case 'C': in_elem_bt[i] = T_CHAR; break; - case 'D': in_elem_bt[i] = T_DOUBLE; break; - case 'F': in_elem_bt[i] = T_FLOAT; break; - case 'I': in_elem_bt[i] = T_INT; break; - case 'J': in_elem_bt[i] = T_LONG; break; - case 'S': in_elem_bt[i] = T_SHORT; break; - case 'Z': in_elem_bt[i] = T_BOOLEAN; break; - default: ShouldNotReachHere(); - } - } - } else { - out_sig_bt[argc++] = in_sig_bt[i]; - in_elem_bt[i] = T_VOID; - } - if (in_sig_bt[i] != T_VOID) { - assert(in_sig_bt[i] == ss.type(), "must match"); - ss.next(); - } - } + for (int i = 0; i < total_in_args ; i++) { + out_sig_bt[argc++] = in_sig_bt[i]; } // Now figure out where the args must be stored and how much stack space @@ -1308,34 +1161,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Now the space for the inbound oop handle area int total_save_slots = 8 * VMRegImpl::slots_per_word; // 8 arguments passed in registers - if (is_critical_native) { - // Critical natives may have to call out so they need a save area - // for register arguments. - int double_slots = 0; - int single_slots = 0; - for ( int i = 0; i < total_in_args; i++) { - if (in_regs[i].first()->is_Register()) { - const Register reg = in_regs[i].first()->as_Register(); - switch (in_sig_bt[i]) { - case T_BOOLEAN: - case T_BYTE: - case T_SHORT: - case T_CHAR: - case T_INT: single_slots++; break; - case T_ARRAY: // specific to LP64 (7145024) - case T_LONG: double_slots++; break; - default: ShouldNotReachHere(); - } - } else if (in_regs[i].first()->is_FloatRegister()) { - ShouldNotReachHere(); - } - } - total_save_slots = double_slots * 2 + single_slots; - // align the save area - if (double_slots != 0) { - stack_slots = align_up(stack_slots, 2); - } - } int oop_handle_offset = stack_slots; stack_slots += total_save_slots; @@ -1369,6 +1194,8 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // // // FP-> | | + // | 2 slots (ra) | + // | 2 slots (fp) | // |---------------------| // | 2 slots for moves | // |---------------------| @@ -1425,11 +1252,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ nop(); // Generate stack overflow check - if (UseStackBanging) { - __ bang_stack_with_offset(JavaThread::stack_shadow_zone_size()); - } else { - Unimplemented(); - } + __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size()); // Generate a new frame for the wrapper. __ enter(); @@ -1444,10 +1267,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, const Register oop_handle_reg = x18; - if (is_critical_native) { - check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, - oop_handle_offset, oop_maps, in_regs, in_sig_bt); - } // // We immediately shuffle the arguments so that any vm call we have to // make from here on out (sync slow path, jvmti, etc.) we will have @@ -1492,22 +1311,14 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, #endif /* ASSERT */ - // This may iterate in two different directions depending on the - // kind of native it is. The reason is that for regular JNI natives - // the incoming and outgoing registers are offset upwards and for - // critical natives they are offset down. + // For JNI natives the incoming and outgoing registers are offset upwards. GrowableArray arg_order(2 * total_in_args); VMRegPair tmp_vmreg; tmp_vmreg.set2(x9->as_VMReg()); - if (!is_critical_native) { - for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { - arg_order.push(i); - arg_order.push(c_arg); - } - } else { - // Compute a valid move order, using tmp_vmreg to break any cycles - ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); + for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { + arg_order.push(i); + arg_order.push(c_arg); } int temploc = -1; @@ -1515,20 +1326,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, int i = arg_order.at(ai); int c_arg = arg_order.at(ai + 1); __ block_comment(err_msg("mv %d -> %d", i, c_arg)); - if (c_arg == -1) { - assert(is_critical_native, "should only be required for critical natives"); - // This arg needs to be moved to a temporary - __ mv(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); - in_regs[i] = tmp_vmreg; - temploc = i; - continue; - } else if (i == -1) { - assert(is_critical_native, "should only be required for critical natives"); - // Read from the temporary location - assert(temploc != -1, "must be valid"); - i = temploc; - temploc = -1; - } + assert(c_arg != -1 && i != -1, "wrong order"); #ifdef ASSERT if (in_regs[i].first()->is_Register()) { assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); @@ -1543,22 +1341,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, #endif /* ASSERT */ switch (in_sig_bt[i]) { case T_ARRAY: - if (is_critical_native) { - unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); - c_arg++; -#ifdef ASSERT - if (out_regs[c_arg].first()->is_Register()) { - reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; - } else if (out_regs[c_arg].first()->is_FloatRegister()) { - freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; - } -#endif - int_args++; - break; - } - // no break case T_OBJECT: - assert(!is_critical_native, "no oop arguments"); object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ((i == 0) && (!is_static)), &receiver_offset); @@ -1600,7 +1383,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, int c_arg = total_c_args - total_in_args; // Pre-load a static method's oop into c_rarg1. - if (method->is_static() && !is_critical_native) { + if (method->is_static()) { // load oop into a register __ movoop(c_rarg1, @@ -1653,13 +1436,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, const Register obj_reg = x9; // Will contain the oop const Register lock_reg = x30; // Address of compiler lock object (BasicLock) const Register old_hdr = x30; // value of old header at unlock time - const Register tmp = lr; + const Register tmp = ra; Label slow_path_lock; Label lock_done; if (method->is_synchronized()) { - assert(!is_critical_native, "unhandled"); const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); @@ -1707,7 +1489,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ bnez(swap_reg, slow_path_lock); // Slow path will re-enter here - __ bind(lock_done); } @@ -1715,9 +1496,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Finally just about ready to make the JNI call // get JNIEnv* which is first argument to native - if (!is_critical_native) { - __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset()))); - } + __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset()))); // Now set thread in native __ la(t1, Address(xthread, JavaThread::thread_state_offset())); @@ -1725,33 +1504,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); __ sw(t0, Address(t1)); - { - int return_type = 0; - switch (ret_type) { - case T_VOID: break; - return_type = 0; break; - case T_CHAR: - case T_BYTE: - case T_SHORT: - case T_INT: - case T_BOOLEAN: - case T_LONG: - return_type = 1; break; - case T_ARRAY: - case T_OBJECT: - return_type = 1; break; - case T_FLOAT: - return_type = 2; break; - case T_DOUBLE: - return_type = 3; break; - default: - ShouldNotReachHere(); - } - rt_call(masm, native_func, - int_args + 2, // riscv64 passes up to 8 args in int registers - float_args, // and up to 8 float args - return_type); - } + rt_call(masm, native_func); __ bind(native_return); @@ -1759,10 +1512,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, oop_maps->add_gc_map(return_pc - start, map); // Unpack native results. - if(ret_type != T_OBJECT && ret_type != T_ARRAY) { + if (ret_type != T_OBJECT && ret_type != T_ARRAY) { __ cast_primitive_type(ret_type, x10); } + Label safepoint_in_progress, safepoint_in_progress_done; + Label after_transition; + // Switch thread to "native transition" state before reading the synchronization state. // This additional state is necessary because reading and testing the synchronization // state is not atomic w.r.t. GC, as this scenario demonstrates: @@ -1772,29 +1528,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // didn't see any synchronization is progress, and escapes. __ mv(t0, _thread_in_native_trans); - if(os::is_MP()) { - if (UseMembar) { - __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); + __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); - // Force this write out before the read below - __ membar(MacroAssembler::AnyAny); - } else { - __ la(t1, Address(xthread, JavaThread::thread_state_offset())); - __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); - __ sw(t0, Address(t1)); - - // Write serialization page so VM thread can do a pseudo remote membar. - // We use the current thread pointer to calculate a thread specific - // offset to write to within the page. This minimizes bus traffic - // due to cache line collision. - __ serialize_memory(xthread, x12, t0); - } - } else { - __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); - } + // Force this write out before the read below + __ membar(MacroAssembler::AnyAny); // check for safepoint operation in progress and/or pending suspend requests - Label safepoint_in_progress, safepoint_in_progress_done; { __ safepoint_poll_acquire(safepoint_in_progress); __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset())); @@ -1803,7 +1542,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, } // change thread state - Label after_transition; __ la(t1, Address(xthread, JavaThread::thread_state_offset())); __ mv(t0, _thread_in_Java); __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); @@ -1834,7 +1572,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, } // Simple recursive lock? - __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); __ beqz(t0, done); @@ -1843,7 +1580,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, save_native_result(masm, ret_type, stack_slots); } - // get address of the stack lock __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); // get old displaced header @@ -1884,32 +1620,26 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset())); } - if (!is_critical_native) { - // reset handle block - __ ld(x12, Address(xthread, JavaThread::active_handles_offset())); - __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes())); - } + // reset handle block + __ ld(x12, Address(xthread, JavaThread::active_handles_offset())); + __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes())); __ leave(); - if (!is_critical_native) { - // Any exception pending? - __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); - __ bnez(t0, exception_pending); - } + // Any exception pending? + __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + __ bnez(t0, exception_pending); // We're done __ ret(); // Unexpected paths are out of line and go here - if (!is_critical_native) { - // forward the exception - __ bind(exception_pending); + // forward the exception + __ bind(exception_pending); - // and forward the exception - __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); - } + // and forward the exception + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); // Slow path locking & unlocking if (method->is_synchronized()) { @@ -1946,7 +1676,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ block_comment("Slow path unlock {"); __ bind(slow_path_unlock); - if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + if (ret_type == T_FLOAT || ret_type == T_DOUBLE) { save_native_result(masm, ret_type, stack_slots); } @@ -1959,7 +1689,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset()))); - rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), 3, 0, 1); + rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); #ifdef ASSERT { @@ -1973,7 +1703,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); - if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + if (ret_type == T_FLOAT || ret_type == T_DOUBLE) { restore_native_result(masm, ret_type, stack_slots); } __ j(unlock_done); @@ -1986,7 +1716,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ bind(reguard); save_native_result(masm, ret_type, stack_slots); - rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), 0, 0, 0); + rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); restore_native_result(masm, ret_type, stack_slots); // and continue __ j(reguard_done); @@ -2005,22 +1735,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); #endif int32_t offset = 0; - if (!is_critical_native) { - __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset); - } else { - __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)), offset); - } + __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset); __ jalr(x1, t0, offset); - __ ifence(); + // Restore any method result value restore_native_result(masm, ret_type, stack_slots); - if (is_critical_native) { - // The call above performed the transition to thread_in_Java so - // skip the transition logic above. - __ j(after_transition); - } - __ j(safepoint_in_progress_done); __ block_comment("} safepoint"); } @@ -2068,9 +1788,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), oop_maps); assert(nm != NULL, "create native nmethod fail!"); - if (is_critical_native) { - nm->set_lazy_critical_native(true); - } return nm; } @@ -2099,6 +1816,7 @@ void SharedRuntime::generate_deopt_blob() { OopMap* map = NULL; OopMapSet *oop_maps = new OopMapSet(); assert_cond(masm != NULL && oop_maps != NULL); + RegisterSaver reg_saver; // ------------- // This code enters when returning to a de-optimized nmethod. A return @@ -2112,7 +1830,7 @@ void SharedRuntime::generate_deopt_blob() { // In the case of an exception pending when deoptimizing, we enter // with a return address on the stack that points after the call we patched // into the exception handler. We have the following register state from, - // e.g., the forward exception stub (see stubGenerator_riscv64.cpp). + // e.g., the forward exception stub (see stubGenerator_riscv.cpp). // x10: exception oop // x9: exception handler // x13: throwing pc @@ -2136,7 +1854,7 @@ void SharedRuntime::generate_deopt_blob() { // Prolog for non exception case! // Save everything in sight. - map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); // Normal deoptimization. Save exec mode for unpack_frames. __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved @@ -2148,7 +1866,7 @@ void SharedRuntime::generate_deopt_blob() { // return address is the pc describes what bci to do re-execute at // No need to update map as each call to save_live_registers will produce identical oopmap - (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words); __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved __ j(cont); @@ -2185,7 +1903,7 @@ void SharedRuntime::generate_deopt_blob() { // This is a somewhat fragile mechanism. // Save everything in sight. - map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); // Now it is safe to overwrite any register @@ -2196,7 +1914,7 @@ void SharedRuntime::generate_deopt_blob() { // of the current frame. Then clear the field in JavaThread __ ld(x13, Address(xthread, JavaThread::exception_pc_offset())); - __ sd(x13, Address(fp, wordSize)); + __ sd(x13, Address(fp, frame::return_addr_offset * wordSize)); __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); #ifdef ASSERT @@ -2261,7 +1979,7 @@ void SharedRuntime::generate_deopt_blob() { __ verify_oop(x10); // Overwrite the result registers with the exception results. - __ sd(x10, Address(sp, RegisterSaver::x10_offset_in_bytes())); + __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); __ bind(noException); @@ -2269,7 +1987,13 @@ void SharedRuntime::generate_deopt_blob() { // Now restore the result registers. Everything else is either dead // or captured in the vframeArray. - RegisterSaver::restore_result_registers(masm); + // Restore fp result register + __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); + // Restore integer result register + __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); + + // Pop all of the register save area off the stack + __ add(sp, sp, frame_size_in_words * wordSize); // All of the register save area has been popped of the stack. Only the // return address remains. @@ -2290,18 +2014,16 @@ void SharedRuntime::generate_deopt_blob() { __ sub(x12, x12, 2 * wordSize); __ add(sp, sp, x12); __ ld(fp, Address(sp, 0)); - __ ld(lr, Address(sp, wordSize)); + __ ld(ra, Address(sp, wordSize)); __ addi(sp, sp, 2 * wordSize); - // LR should now be the return address to the caller (3) + // RA should now be the return address to the caller (3) #ifdef ASSERT // Compilers generate code that bang the stack by as much as the // interpreter would need. So this stack banging should never // trigger a fault. Verify that it does not on non product builds. - if (UseStackBanging) { - __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); - __ bang_stack_size(x9, x12); - } + __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); + __ bang_stack_size(x9, x12); #endif // Load address of array of frame pcs into x12 __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); @@ -2333,7 +2055,7 @@ void SharedRuntime::generate_deopt_blob() { __ ld(x9, Address(x14, 0)); // Load frame size __ addi(x14, x14, wordSize); __ sub(x9, x9, 2 * wordSize); // We'll push pc and fp by hand - __ ld(lr, Address(x12, 0)); // Load pc + __ ld(ra, Address(x12, 0)); // Load pc __ addi(x12, x12, wordSize); __ enter(); // Save old & set new fp __ sub(sp, sp, x9); // Prolog @@ -2345,7 +2067,7 @@ void SharedRuntime::generate_deopt_blob() { __ bnez(x13, loop); // Re-push self-frame - __ ld(lr, Address(x12)); + __ ld(ra, Address(x12)); __ enter(); // Allocate a full sized register save area. We subtract 2 because @@ -2353,8 +2075,8 @@ void SharedRuntime::generate_deopt_blob() { __ sub(sp, sp, (frame_size_in_words - 2) * wordSize); // Restore frame locals after moving the frame - __ fsd(f10, Address(sp, RegisterSaver::f10_offset_in_bytes())); - __ sd(x10, Address(sp, RegisterSaver::x10_offset_in_bytes())); + __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); + __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); // Call C code. Need thread but NOT official VM entry // crud. We cannot block on this call, no GC can happen. Call should @@ -2376,14 +2098,14 @@ void SharedRuntime::generate_deopt_blob() { // Set an oopmap for the call site // Use the same PC we used for the last java frame oop_maps->add_gc_map(the_pc - start, - new OopMap( frame_size_in_words, 0 )); + new OopMap(frame_size_in_words, 0)); // Clear fp AND pc __ reset_last_Java_frame(true); // Collect return values - __ fld(f10, Address(sp, RegisterSaver::f10_offset_in_bytes())); - __ ld(x10, Address(sp, RegisterSaver::x10_offset_in_bytes())); + __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); + __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); // Pop self-frame. __ leave(); // Epilog @@ -2417,16 +2139,15 @@ void SharedRuntime::generate_uncommon_trap_blob() { address start = __ pc(); - // Push self-frame. We get here with a return address in LR + // Push self-frame. We get here with a return address in RA // and sp should be 16 byte aligned // push fp and retaddr by hand __ addi(sp, sp, -2 * wordSize); - __ sd(lr, Address(sp, wordSize)); + __ sd(ra, Address(sp, wordSize)); __ sd(fp, Address(sp, 0)); // we don't expect an arg reg save area #ifndef PRODUCT - assert(frame::arg_reg_save_area_bytes == 0, "no" - "ame reg save area"); + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); #endif // compiler left unloaded_class_index in j_rarg0 move to where the // runtime expects it. @@ -2460,7 +2181,7 @@ void SharedRuntime::generate_uncommon_trap_blob() { OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); assert_cond(oop_maps != NULL && map != NULL); - // location of rfp is known implicitly by the frame sender code + // location of fp is known implicitly by the frame sender code oop_maps->add_gc_map(__ pc() - start, map); @@ -2495,20 +2216,18 @@ void SharedRuntime::generate_uncommon_trap_blob() { __ sub(x12, x12, 2 * wordSize); __ add(sp, sp, x12); __ ld(fp, sp, 0); - __ ld(lr, sp, wordSize); + __ ld(ra, sp, wordSize); __ addi(sp, sp, 2 * wordSize); - // LR should now be the return address to the caller (3) frame + // RA should now be the return address to the caller (3) frame #ifdef ASSERT // Compilers generate code that bang the stack by as much as the // interpreter would need. So this stack banging should never // trigger a fault. Verify that it does not on non product builds. - if (UseStackBanging) { - __ lwu(x11, Address(x14, - Deoptimization::UnrollBlock:: - total_frame_sizes_offset_in_bytes())); - __ bang_stack_size(x11, x12); - } + __ lwu(x11, Address(x14, + Deoptimization::UnrollBlock:: + total_frame_sizes_offset_in_bytes())); + __ bang_stack_size(x11, x12); #endif // Load address of array of frame pcs into x12 (address*) @@ -2543,7 +2262,7 @@ void SharedRuntime::generate_uncommon_trap_blob() { __ bind(loop); __ ld(x11, Address(x15, 0)); // Load frame size __ sub(x11, x11, 2 * wordSize); // We'll push pc and fp by hand - __ ld(lr, Address(x12, 0)); // Save return address + __ ld(ra, Address(x12, 0)); // Save return address __ enter(); // and old fp & set new fp __ sub(sp, sp, x11); // Prolog __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable @@ -2554,7 +2273,7 @@ void SharedRuntime::generate_uncommon_trap_blob() { __ add(x12, x12, wordSize); // Bump array pointer (pcs) __ subw(x13, x13, 1); // Decrement counter __ bgtz(x13, loop); - __ ld(lr, Address(x12, 0)); // save final return address + __ ld(ra, Address(x12, 0)); // save final return address // Re-push self-frame __ enter(); // & old fp & set new fp @@ -2621,9 +2340,10 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t address call_pc = NULL; int frame_size_in_words = -1; bool cause_return = (poll_type == POLL_AT_RETURN); + RegisterSaver reg_saver; // Save Integer and Float registers. - map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); // The following is basically a call_VM. However, we need the precise // address of the call in order to generate an oopmap. Hence, we do all the @@ -2641,7 +2361,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t // it later to determine if someone changed the return address for // us! __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset())); - __ sd(x18, Address(fp, wordSize)); + __ sd(x18, Address(fp, frame::return_addr_offset * wordSize)); } // Do the call @@ -2669,7 +2389,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t // Exception pending - RegisterSaver::restore_live_registers(masm); + reg_saver.restore_live_registers(masm); __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); @@ -2679,7 +2399,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t Label no_adjust, bail; if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { // If our stashed return pc was modified by the runtime we avoid touching it - __ ld(t0, Address(fp, wordSize)); + __ ld(t0, Address(fp, frame::return_addr_offset * wordSize)); __ bne(x18, t0, no_adjust); #ifdef ASSERT @@ -2699,13 +2419,13 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t #endif // Adjust return pc forward to step over the safepoint poll instruction __ add(x18, x18, NativeInstruction::instruction_size); - __ sd(x18, Address(fp, wordSize)); + __ sd(x18, Address(fp, frame::return_addr_offset * wordSize)); } __ bind(no_adjust); // Normal exit, restore registers and exit. - RegisterSaver::restore_live_registers(masm); + reg_saver.restore_live_registers(masm); __ ret(); #ifdef ASSERT @@ -2739,6 +2459,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha assert_cond(masm != NULL); int frame_size_in_words = -1; + RegisterSaver reg_saver; OopMapSet *oop_maps = new OopMapSet(); assert_cond(oop_maps != NULL); @@ -2746,7 +2467,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha int start = __ offset(); - map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); int frame_complete = __ offset(); @@ -2767,8 +2488,6 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha oop_maps->add_gc_map( __ offset() - start, map); - __ ifence(); - // x10 contains the address we are going to jump to assuming no exception got installed // clear last_Java_sp @@ -2780,11 +2499,11 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha // get the returned Method* __ get_vm_result_2(xmethod, xthread); - __ sd(xmethod, Address(sp, RegisterSaver::reg_offset_in_bytes(xmethod))); + __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod))); // x10 is where we want to jump, overwrite t0 which is saved and temporary - __ sd(x10, Address(sp, RegisterSaver::tmp0_offset_in_bytes())); - RegisterSaver::restore_live_registers(masm); + __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0))); + reg_saver.restore_live_registers(masm); // We are back the the original state on entry and ready to go. @@ -2794,7 +2513,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha __ bind(pending); - RegisterSaver::restore_live_registers(masm); + reg_saver.restore_live_registers(masm); // exception pending => remove activation and forward to exception handler @@ -2812,12 +2531,10 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha } #ifdef COMPILER2 -// This is here instead of runtime_riscv64.cpp because it uses SimpleRuntimeFrame -// //------------------------------generate_exception_blob--------------------------- // creates exception blob at the end // Using exception blob, this code is jumped from a compiled method. -// (see emit_exception_handler in riscv64.ad file) +// (see emit_exception_handler in riscv.ad file) // // Given an exception pc at a call we call into the runtime for the // handler in this method. This handler might merely restore state @@ -2863,7 +2580,7 @@ void OptoRuntime::generate_exception_blob() { // push fp and retaddr by hand // Exception pc is 'return address' for stack walker __ addi(sp, sp, -2 * wordSize); - __ sd(lr, Address(sp, wordSize)); + __ sd(ra, Address(sp, wordSize)); __ sd(fp, Address(sp)); // there are no callee save registers and we don't expect an // arg reg save area @@ -2894,7 +2611,6 @@ void OptoRuntime::generate_exception_blob() { __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset); __ jalr(x1, t0, offset); - __ ifence(); // handle_exception_C is a special VM call which does not require an explicit // instruction sync afterwards. diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp index 26f39fa4a21..9970229c5c5 100644 --- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -48,6 +48,9 @@ #ifdef COMPILER2 #include "opto/runtime.hpp" #endif +#if INCLUDE_ZGC +#include "gc/z/zThreadLocalData.hpp" +#endif // Declaration and definition of StubGenerator (no .hpp file). // For a more detailed description of the stub routine structure @@ -98,7 +101,7 @@ class StubGenerator: public StubCodeGenerator { // There is no return from the stub itself as any Java result // is written to result // - // we save x1 (lr) as the return PC at the base of the frame and + // we save x1 (ra) as the return PC at the base of the frame and // link x8 (fp) below it as the frame pointer installing sp (x2) // into fp. // @@ -123,80 +126,81 @@ class StubGenerator: public StubCodeGenerator { // [ return_from_Java ] <--- sp // [ argument word n ] // ... - // -32 [ argument word 1 ] - // -31 [ saved f27 ] <--- sp_after_call - // -30 [ saved f26 ] - // -29 [ saved f25 ] - // -28 [ saved f24 ] - // -27 [ saved f23 ] - // -26 [ saved f22 ] - // -25 [ saved f21 ] - // -24 [ saved f20 ] - // -23 [ saved f19 ] - // -22 [ saved f18 ] - // -21 [ saved f9 ] - // -20 [ saved f8 ] - // -19 [ saved x27 ] - // -18 [ saved x26 ] - // -17 [ saved x25 ] - // -16 [ saved x24 ] - // -15 [ saved x23 ] - // -14 [ saved x22 ] - // -13 [ saved x21 ] - // -12 [ saved x20 ] - // -11 [ saved x19 ] - // -10 [ saved x18 ] - // -9 [ saved x9 ] - // -8 [ call wrapper (x10) ] - // -7 [ result (x11) ] - // -6 [ result type (x12) ] - // -5 [ method (x13) ] - // -4 [ entry point (x14) ] - // -3 [ parameters (x15) ] - // -2 [ parameter size (x16) ] - // -1 [ thread (x17) ] - // 0 [ saved fp (x8) ] <--- fp == saved sp (x2) - // 1 [ saved lr (x1) ] + // -34 [ argument word 1 ] + // -33 [ saved f27 ] <--- sp_after_call + // -32 [ saved f26 ] + // -31 [ saved f25 ] + // -30 [ saved f24 ] + // -29 [ saved f23 ] + // -28 [ saved f22 ] + // -27 [ saved f21 ] + // -26 [ saved f20 ] + // -25 [ saved f19 ] + // -24 [ saved f18 ] + // -23 [ saved f9 ] + // -22 [ saved f8 ] + // -21 [ saved x27 ] + // -20 [ saved x26 ] + // -19 [ saved x25 ] + // -18 [ saved x24 ] + // -17 [ saved x23 ] + // -16 [ saved x22 ] + // -15 [ saved x21 ] + // -14 [ saved x20 ] + // -13 [ saved x19 ] + // -12 [ saved x18 ] + // -11 [ saved x9 ] + // -10 [ call wrapper (x10) ] + // -9 [ result (x11) ] + // -8 [ result type (x12) ] + // -7 [ method (x13) ] + // -6 [ entry point (x14) ] + // -5 [ parameters (x15) ] + // -4 [ parameter size (x16) ] + // -3 [ thread (x17) ] + // -2 [ saved fp (x8) ] + // -1 [ saved ra (x1) ] + // 0 [ ] <--- fp == saved sp (x2) // Call stub stack layout word offsets from fp enum call_stub_layout { - sp_after_call_off = -31, - - f27_off = -31, - f26_off = -30, - f25_off = -29, - f24_off = -28, - f23_off = -27, - f22_off = -26, - f21_off = -25, - f20_off = -24, - f19_off = -23, - f18_off = -22, - f9_off = -21, - f8_off = -20, - - x27_off = -19, - x26_off = -18, - x25_off = -17, - x24_off = -16, - x23_off = -15, - x22_off = -14, - x21_off = -13, - x20_off = -12, - x19_off = -11, - x18_off = -10, - x9_off = -9, - - call_wrapper_off = -8, - result_off = -7, - result_type_off = -6, - method_off = -5, - entry_point_off = -4, - parameters_off = -3, - parameter_size_off = -2, - thread_off = -1, - fp_f = 0, - retaddr_off = 1, + sp_after_call_off = -33, + + f27_off = -33, + f26_off = -32, + f25_off = -31, + f24_off = -30, + f23_off = -29, + f22_off = -28, + f21_off = -27, + f20_off = -26, + f19_off = -25, + f18_off = -24, + f9_off = -23, + f8_off = -22, + + x27_off = -21, + x26_off = -20, + x25_off = -19, + x24_off = -18, + x23_off = -17, + x22_off = -16, + x21_off = -15, + x20_off = -14, + x19_off = -13, + x18_off = -12, + x9_off = -11, + + call_wrapper_off = -10, + result_off = -9, + result_type_off = -8, + method_off = -7, + entry_point_off = -6, + parameters_off = -5, + parameter_size_off = -4, + thread_off = -3, + fp_f = -2, + retaddr_off = -1, }; address generate_call_stub(address& return_address) { @@ -247,7 +251,7 @@ class StubGenerator: public StubCodeGenerator { // stub code - address riscv64_entry = __ pc(); + address riscv_entry = __ pc(); // set up frame and move sp to end of save area __ enter(); @@ -446,7 +450,7 @@ class StubGenerator: public StubCodeGenerator { // Note: Usually the parameters are removed by the callee. In case // of an exception crossing an activation frame boundary, that is // not the case if the callee is compiled code => need to setup the - // rsp. + // sp. // // x10: exception oop @@ -497,7 +501,7 @@ class StubGenerator: public StubCodeGenerator { // x10: exception // x13: throwing pc // - // NOTE: At entry of this stub, exception-pc must be in LR !! + // NOTE: At entry of this stub, exception-pc must be in RA !! // NOTE: this is always used as a jump target within generated code // so it just needs to be generated code with no x86 prolog @@ -506,7 +510,7 @@ class StubGenerator: public StubCodeGenerator { StubCodeMark mark(this, "StubRoutines", "forward exception"); address start = __ pc(); - // Upon entry, LR points to the return address returning into + // Upon entry, RA points to the return address returning into // Java (interpreted or compiled) code; i.e., the return address // becomes the throwing pc. // @@ -530,24 +534,24 @@ class StubGenerator: public StubCodeGenerator { // call the VM to find the handler address associated with the // caller address. pass thread in x10 and caller pc (ret address) - // in x11. n.b. the caller pc is in lr, unlike x86 where it is on + // in x11. n.b. the caller pc is in ra, unlike x86 where it is on // the stack. - __ mv(c_rarg1, lr); - // lr will be trashed by the VM call so we move it to x9 + __ mv(c_rarg1, ra); + // ra will be trashed by the VM call so we move it to x9 // (callee-saved) because we also need to pass it to the handler // returned by this call. - __ mv(x9, lr); + __ mv(x9, ra); BLOCK_COMMENT("call exception_handler_for_return_address"); __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), xthread, c_rarg1); - // we should not really care that lr is no longer the callee + // we should not really care that ra is no longer the callee // address. we saved the value the handler needs in x9 so we can // just copy it to x13. however, the C2 handler will push its own // frame and then calls into the VM and the VM code asserts that // the PC for the frame above the handler belongs to a compiled - // Java method. So, we restore lr here to satisfy that assert. - __ mv(lr, x9); + // Java method. So, we restore ra here to satisfy that assert. + __ mv(ra, x9); // setup x10 & x13 & clear pending exception __ mv(x13, x9); __ mv(x9, x10); @@ -583,7 +587,7 @@ class StubGenerator: public StubCodeGenerator { // Stack after saving c_rarg3: // [tos + 0]: saved c_rarg3 // [tos + 1]: saved c_rarg2 - // [tos + 2]: saved lr + // [tos + 2]: saved ra // [tos + 3]: saved t1 // [tos + 4]: saved x10 // [tos + 5]: saved t0 @@ -630,7 +634,7 @@ class StubGenerator: public StubCodeGenerator { __ pusha(); // debug(char* msg, int64_t pc, int64_t regs[]) __ mv(c_rarg0, t0); // pass address of error message - __ mv(c_rarg1, lr); // pass return address + __ mv(c_rarg1, ra); // pass return address __ mv(c_rarg2, sp); // pass address of regs on stack #ifndef PRODUCT assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); @@ -888,7 +892,7 @@ class StubGenerator: public StubCodeGenerator { const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17; assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2); - Assembler::SEW sew = Assembler::elemBytes_to_sew(granularity); + Assembler::SEW sew = Assembler::elembytes_to_sew(granularity); Label loop_forward, loop_backward, done; __ mv(dst, d); @@ -928,7 +932,6 @@ class StubGenerator: public StubCodeGenerator { void copy_memory(bool is_aligned, Register s, Register d, Register count, Register tmp, int step) { - if (UseRVV) { return copy_memory_v(s, d, count, tmp, step); } @@ -1039,7 +1042,7 @@ class StubGenerator: public StubCodeGenerator { // Scan over array at a for count oops, verifying each one. // Preserves a and count, clobbers t0 and t1. - void verify_oop_array (size_t size, Register a, Register count, Register temp) { + void verify_oop_array(size_t size, Register a, Register count, Register temp) { Label loop, end; __ mv(t1, zr); __ slli(t0, count, exact_log2(size)); @@ -1598,8 +1601,8 @@ class StubGenerator: public StubCodeGenerator { __ bgtu(temp, t0, L_failed); // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'. - __ clear_upper_bits(src_pos, 32); - __ clear_upper_bits(dst_pos, 32); + __ zero_extend(src_pos, src_pos, 32); + __ zero_extend(dst_pos, dst_pos, 32); BLOCK_COMMENT("arraycopy_range_checks done"); } @@ -1813,8 +1816,8 @@ class StubGenerator: public StubCodeGenerator { // Get array_header_in_bytes() int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1); int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width; - __ slli(t0_offset, lh, registerSize - lh_header_size_msb); // left shift to remove 24 ~ 32; - __ srli(t0_offset, t0_offset, registerSize - lh_header_size_width); // array_offset + __ slli(t0_offset, lh, XLEN - lh_header_size_msb); // left shift to remove 24 ~ 32; + __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset __ add(src, src, t0_offset); // src array offset __ add(dst, dst, t0_offset); // dst array offset @@ -1843,20 +1846,16 @@ class StubGenerator: public StubCodeGenerator { __ j(RuntimeAddress(byte_copy_entry)); __ BIND(L_copy_shorts); - __ slli(t0, src_pos, 1); - __ add(from, src, t0); // src_addr - __ slli(t0, dst_pos, 1); - __ add(to, dst, t0); // dst_addr + __ shadd(from, src_pos, src, t0, 1); // src_addr + __ shadd(to, dst_pos, dst, t0, 1); // dst_addr __ addw(count, scratch_length, zr); // length __ j(RuntimeAddress(short_copy_entry)); __ BIND(L_copy_ints); __ andi(t0, x22_elsize, 1); __ bnez(t0, L_copy_longs); - __ slli(t0, src_pos, 2); - __ add(from, src, t0); // src_addr - __ slli(t0, dst_pos, 2); - __ add(to, dst, t0); // dst_addr + __ shadd(from, src_pos, src, t0, 2); // src_addr + __ shadd(to, dst_pos, dst, t0, 2); // dst_addr __ addw(count, scratch_length, zr); // length __ j(RuntimeAddress(int_copy_entry)); @@ -1874,10 +1873,8 @@ class StubGenerator: public StubCodeGenerator { BLOCK_COMMENT("} assert long copy done"); } #endif - __ slli(t0, src_pos, 3); - __ add(from, src, t0); // src_addr - __ slli(t0, dst_pos, 3); - __ add(to, dst, t0); // dst_addr + __ shadd(from, src_pos, src, t0, 3); // src_addr + __ shadd(to, dst_pos, dst, t0, 3); // dst_addr __ addw(count, scratch_length, zr); // length __ j(RuntimeAddress(long_copy_entry)); @@ -1894,11 +1891,9 @@ class StubGenerator: public StubCodeGenerator { arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, t1, L_failed); - __ slli(t0, src_pos, LogBytesPerHeapOop); - __ add(from, t0, src); + __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop); __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); - __ slli(t0, dst_pos, LogBytesPerHeapOop); - __ add(to, t0, dst); + __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop); __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); __ addw(count, scratch_length, zr); // length __ BIND(L_plain_copy); @@ -1919,11 +1914,9 @@ class StubGenerator: public StubCodeGenerator { __ load_klass(dst_klass, dst); // reload // Marshal the base address arguments now, freeing registers. - __ slli(t0, src_pos, LogBytesPerHeapOop); - __ add(from, t0, src); + __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop); __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); - __ slli(t0, dst_pos, LogBytesPerHeapOop); - __ add(to, t0, dst); + __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop); __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); __ addw(count, length, zr); // length (reloaded) const Register sco_temp = c_rarg3; // this register is free now @@ -2084,8 +2077,7 @@ class StubGenerator: public StubCodeGenerator { // Note that the total length is no less than 8 bytes. if (t == T_BYTE || t == T_SHORT) { __ beqz(count, L_exit1); - __ slli(tmp_reg, count, shift); - __ add(to, to, tmp_reg); // points to the end + __ shadd(to, count, to, tmp_reg, shift); // points to the end __ sd(value, Address(to, -8)); // overwrite some elements __ bind(L_exit1); __ leave(); @@ -2145,7 +2137,7 @@ class StubGenerator: public StubCodeGenerator { generate_copy_longs(copy_f, c_rarg0, c_rarg1, t1, copy_forwards); generate_copy_longs(copy_b, c_rarg0, c_rarg1, t1, copy_backwards); - StubRoutines::riscv64::_zero_blocks = generate_zero_blocks(); + StubRoutines::riscv::_zero_blocks = generate_zero_blocks(); //*** jbyte // Always need aligned and unaligned versions @@ -2354,11 +2346,9 @@ class StubGenerator: public StubCodeGenerator { if (isLU) { __ add(str1, str1, cnt2); - __ slli(t0, cnt2, 1); - __ add(str2, str2, t0); + __ shadd(str2, cnt2, str2, t0, 1); } else { - __ slli(t0, cnt2, 1); - __ add(str1, str1, t0); + __ shadd(str1, cnt2, str1, t0, 1); __ add(str2, str2, cnt2); } __ xorr(tmp3, tmp1, tmp2); @@ -2387,9 +2377,10 @@ class StubGenerator: public StubCodeGenerator { __ addi(t0, cnt2, 16); __ beqz(t0, LOAD_LAST); __ bind(TAIL); // 1..15 characters left until last load (last 4 characters) - __ slli(t0, cnt2, 1); - __ add(cnt1, cnt1, t0); // Address of 8 bytes before last 4 characters in UTF-16 string - __ add(tmp2, tmp2, cnt2); // Address of 16 bytes before last 4 characters in Latin1 string + // Address of 8 bytes before last 4 characters in UTF-16 string + __ shadd(cnt1, cnt2, cnt1, t0, 1); + // Address of 16 bytes before last 4 characters in Latin1 string + __ add(tmp2, tmp2, cnt2); __ ld(tmp4, Address(cnt1, -8)); // last 16 characters before last load compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); @@ -2521,10 +2512,10 @@ class StubGenerator: public StubCodeGenerator { } void generate_compare_long_strings() { - StubRoutines::riscv64::_compare_long_string_LL = generate_compare_long_string_same_encoding(true); - StubRoutines::riscv64::_compare_long_string_UU = generate_compare_long_string_same_encoding(false); - StubRoutines::riscv64::_compare_long_string_LU = generate_compare_long_string_different_encoding(true); - StubRoutines::riscv64::_compare_long_string_UL = generate_compare_long_string_different_encoding(false); + StubRoutines::riscv::_compare_long_string_LL = generate_compare_long_string_same_encoding(true); + StubRoutines::riscv::_compare_long_string_UU = generate_compare_long_string_same_encoding(false); + StubRoutines::riscv::_compare_long_string_LU = generate_compare_long_string_different_encoding(true); + StubRoutines::riscv::_compare_long_string_UL = generate_compare_long_string_different_encoding(false); } // x10 result @@ -2549,7 +2540,7 @@ class StubGenerator: public StubCodeGenerator { // parameters Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14; // temporary registers - Register mask1 = x20, match_mask = x21, first = x22, trailing_zero = x23, mask2 = x24, tmp = x25; + Register mask1 = x20, match_mask = x21, first = x22, trailing_zeros = x23, mask2 = x24, tmp = x25; // redefinitions Register ch1 = x28, ch2 = x29; RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29); @@ -2570,9 +2561,13 @@ class StubGenerator: public StubCodeGenerator { // first is needle[0] __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first); - __ mv(mask1, haystack_isL ? 0x0101010101010101 : 0x0001000100010001); + uint64_t mask0101 = UCONST64(0x0101010101010101); + uint64_t mask0001 = UCONST64(0x0001000100010001); + __ mv(mask1, haystack_isL ? mask0101 : mask0001); __ mul(first, first, mask1); - __ mv(mask2, haystack_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); + uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); + uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); + __ mv(mask2, haystack_isL ? mask7f7f : mask7fff); if (needle_isL != haystack_isL) { __ mv(tmp, ch1); } @@ -2580,7 +2575,7 @@ class StubGenerator: public StubCodeGenerator { __ blez(haystack_len, L_SMALL); if (needle_isL != haystack_isL) { - __ inflate_lo32(ch1, tmp, match_mask, trailing_zero); + __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros); } // xorr, sub, orr, notr, andr // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i] @@ -2617,7 +2612,7 @@ class StubGenerator: public StubCodeGenerator { __ xorr(ch2, first, ch2); __ sub(match_mask, ch2, mask1); __ orr(ch2, ch2, mask2); - __ mv(trailing_zero, -1); // all bits set + __ mv(trailing_zeros, -1); // all bits set __ j(L_SMALL_PROCEED); __ align(OptoLoopAlignment); @@ -2625,44 +2620,42 @@ class StubGenerator: public StubCodeGenerator { __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift); __ neg(haystack_len, haystack_len); if (needle_isL != haystack_isL) { - __ inflate_lo32(ch1, tmp, match_mask, trailing_zero); + __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros); } __ xorr(ch2, first, ch2); __ sub(match_mask, ch2, mask1); __ orr(ch2, ch2, mask2); - __ mv(trailing_zero, -1); // all bits set + __ mv(trailing_zeros, -1); // all bits set __ bind(L_SMALL_PROCEED); - __ srl(trailing_zero, trailing_zero, haystack_len); // mask. zeroes on useless bits. + __ srl(trailing_zeros, trailing_zeros, haystack_len); // mask. zeroes on useless bits. __ notr(ch2, ch2); __ andr(match_mask, match_mask, ch2); - __ andr(match_mask, match_mask, trailing_zero); // clear useless bits and check + __ andr(match_mask, match_mask, trailing_zeros); // clear useless bits and check __ beqz(match_mask, NOMATCH); __ bind(L_SMALL_HAS_ZERO_LOOP); - __ ctzc_bit(trailing_zero, match_mask, haystack_isL, ch2, tmp); // count trailing zeros - __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); + __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, ch2, tmp); // count trailing zeros + __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); __ mv(ch2, wordSize / haystack_chr_size); __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2); - __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); - __ mv(trailing_zero, wordSize / haystack_chr_size); + __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); + __ mv(trailing_zeros, wordSize / haystack_chr_size); __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); __ bind(L_SMALL_CMP_LOOP); - __ slli(first, trailing_zero, needle_chr_shift); - __ add(first, needle, first); - __ slli(ch2, trailing_zero, haystack_chr_shift); - __ add(ch2, haystack, ch2); + __ shadd(first, trailing_zeros, needle, first, needle_chr_shift); + __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift); needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first)); haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); - __ add(trailing_zero, trailing_zero, 1); - __ bge(trailing_zero, needle_len, L_SMALL_CMP_LOOP_LAST_CMP); + __ add(trailing_zeros, trailing_zeros, 1); + __ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP); __ beq(first, ch2, L_SMALL_CMP_LOOP); __ bind(L_SMALL_CMP_LOOP_NOMATCH); __ beqz(match_mask, NOMATCH); - __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2); - __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); + __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2); + __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); __ add(result, result, 1); __ add(haystack, haystack, haystack_chr_size); __ j(L_SMALL_HAS_ZERO_LOOP); @@ -2674,14 +2667,14 @@ class StubGenerator: public StubCodeGenerator { __ align(OptoLoopAlignment); __ bind(L_SMALL_CMP_LOOP_LAST_CMP2); - __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); + __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); __ j(DONE); __ align(OptoLoopAlignment); __ bind(L_HAS_ZERO); - __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2); - __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); + __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2); + __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2); __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits) __ sub(result, result, 1); // array index from 0, so result -= 1 @@ -2691,28 +2684,26 @@ class StubGenerator: public StubCodeGenerator { __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2); __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2); // load next 8 bytes from haystack, and increase result index - __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); + __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); __ add(result, result, 1); - __ mv(trailing_zero, wordSize / haystack_chr_size); + __ mv(trailing_zeros, wordSize / haystack_chr_size); __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); // compare one char __ bind(L_CMP_LOOP); - __ slli(needle_len, trailing_zero, needle_chr_shift); - __ add(needle_len, needle, needle_len); + __ shadd(needle_len, trailing_zeros, needle, needle_len, needle_chr_shift); needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len)); - __ slli(ch2, trailing_zero, haystack_chr_shift); - __ add(ch2, haystack, ch2); + __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift); haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); - __ add(trailing_zero, trailing_zero, 1); // next char index + __ add(trailing_zeros, trailing_zeros, 1); // next char index __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2); - __ bge(trailing_zero, tmp, L_CMP_LOOP_LAST_CMP); + __ bge(trailing_zeros, tmp, L_CMP_LOOP_LAST_CMP); __ beq(needle_len, ch2, L_CMP_LOOP); __ bind(L_CMP_LOOP_NOMATCH); __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH); - __ ctzc_bit(trailing_zero, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index - __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); + __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index + __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); __ add(haystack, haystack, haystack_chr_size); __ j(L_HAS_ZERO_LOOP); @@ -2723,7 +2714,7 @@ class StubGenerator: public StubCodeGenerator { __ align(OptoLoopAlignment); __ bind(L_CMP_LOOP_LAST_CMP2); - __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); + __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); __ add(result, result, 1); __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); __ j(DONE); @@ -2760,11 +2751,778 @@ class StubGenerator: public StubCodeGenerator { void generate_string_indexof_stubs() { - StubRoutines::riscv64::_string_indexof_linear_ll = generate_string_indexof_linear(true, true); - StubRoutines::riscv64::_string_indexof_linear_uu = generate_string_indexof_linear(false, false); - StubRoutines::riscv64::_string_indexof_linear_ul = generate_string_indexof_linear(true, false); + StubRoutines::riscv::_string_indexof_linear_ll = generate_string_indexof_linear(true, true); + StubRoutines::riscv::_string_indexof_linear_uu = generate_string_indexof_linear(false, false); + StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false); + } + +#ifdef COMPILER2 + address generate_mulAdd() + { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "mulAdd"); + + address entry = __ pc(); + + const Register out = x10; + const Register in = x11; + const Register offset = x12; + const Register len = x13; + const Register k = x14; + const Register tmp = x28; + + BLOCK_COMMENT("Entry:"); + __ enter(); + __ mul_add(out, in, offset, len, k, tmp); + __ leave(); + __ ret(); + + return entry; + } + + /** + * Arguments: + * + * Input: + * c_rarg0 - x address + * c_rarg1 - x length + * c_rarg2 - y address + * c_rarg3 - y length + * c_rarg4 - z address + * c_rarg5 - z length + */ + address generate_multiplyToLen() + { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); + address entry = __ pc(); + + const Register x = x10; + const Register xlen = x11; + const Register y = x12; + const Register ylen = x13; + const Register z = x14; + const Register zlen = x15; + + const Register tmp1 = x16; + const Register tmp2 = x17; + const Register tmp3 = x7; + const Register tmp4 = x28; + const Register tmp5 = x29; + const Register tmp6 = x30; + const Register tmp7 = x31; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(); + + return entry; } + address generate_squareToLen() + { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "squareToLen"); + address entry = __ pc(); + + const Register x = x10; + const Register xlen = x11; + const Register z = x12; + const Register zlen = x13; + const Register y = x14; // == x + const Register ylen = x15; // == xlen + + const Register tmp1 = x16; + const Register tmp2 = x17; + const Register tmp3 = x7; + const Register tmp4 = x28; + const Register tmp5 = x29; + const Register tmp6 = x30; + const Register tmp7 = x31; + + BLOCK_COMMENT("Entry:"); + __ enter(); + __ mv(y, x); + __ mv(ylen, xlen); + __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); + __ leave(); + __ ret(); + + return entry; + } +#endif + +#ifdef COMPILER2 + class MontgomeryMultiplyGenerator : public MacroAssembler { + + Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn, + Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2, Ri, Rj; + + RegSet _toSave; + bool _squaring; + + public: + MontgomeryMultiplyGenerator (Assembler *as, bool squaring) + : MacroAssembler(as->code()), _squaring(squaring) { + + // Register allocation + + Register reg = c_rarg0; + Pa_base = reg; // Argument registers + if (squaring) { + Pb_base = Pa_base; + } else { + Pb_base = ++reg; + } + Pn_base = ++reg; + Rlen= ++reg; + inv = ++reg; + Pm_base = ++reg; + + // Working registers: + Ra = ++reg; // The current digit of a, b, n, and m. + Rb = ++reg; + Rm = ++reg; + Rn = ++reg; + + Pa = ++reg; // Pointers to the current/next digit of a, b, n, and m. + Pb = ++reg; + Pm = ++reg; + Pn = ++reg; + + tmp0 = ++reg; // Three registers which form a + tmp1 = ++reg; // triple-precision accumuator. + tmp2 = ++reg; + + Ri = x6; // Inner and outer loop indexes. + Rj = x7; + + Rhi_ab = x28; // Product registers: low and high parts + Rlo_ab = x29; // of a*b and m*n. + Rhi_mn = x30; + Rlo_mn = x31; + + // x18 and up are callee-saved. + _toSave = RegSet::range(x18, reg) + Pm_base; + } + + private: + void save_regs() { + push_reg(_toSave, sp); + } + + void restore_regs() { + pop_reg(_toSave, sp); + } + + template + void unroll_2(Register count, T block) { + Label loop, end, odd; + beqz(count, end); + andi(t0, count, 0x1); + bnez(t0, odd); + align(16); + bind(loop); + (this->*block)(); + bind(odd); + (this->*block)(); + addi(count, count, -2); + bgtz(count, loop); + bind(end); + } + + template + void unroll_2(Register count, T block, Register d, Register s, Register tmp) { + Label loop, end, odd; + beqz(count, end); + andi(tmp, count, 0x1); + bnez(tmp, odd); + align(16); + bind(loop); + (this->*block)(d, s, tmp); + bind(odd); + (this->*block)(d, s, tmp); + addi(count, count, -2); + bgtz(count, loop); + bind(end); + } + + void pre1(RegisterOrConstant i) { + block_comment("pre1"); + // Pa = Pa_base; + // Pb = Pb_base + i; + // Pm = Pm_base; + // Pn = Pn_base + i; + // Ra = *Pa; + // Rb = *Pb; + // Rm = *Pm; + // Rn = *Pn; + if (i.is_register()) { + slli(t0, i.as_register(), LogBytesPerWord); + } else { + mv(t0, i.as_constant()); + slli(t0, t0, LogBytesPerWord); + } + + mv(Pa, Pa_base); + add(Pb, Pb_base, t0); + mv(Pm, Pm_base); + add(Pn, Pn_base, t0); + + ld(Ra, Address(Pa)); + ld(Rb, Address(Pb)); + ld(Rm, Address(Pm)); + ld(Rn, Address(Pn)); + + // Zero the m*n result. + mv(Rhi_mn, zr); + mv(Rlo_mn, zr); + } + + // The core multiply-accumulate step of a Montgomery + // multiplication. The idea is to schedule operations as a + // pipeline so that instructions with long latencies (loads and + // multiplies) have time to complete before their results are + // used. This most benefits in-order implementations of the + // architecture but out-of-order ones also benefit. + void step() { + block_comment("step"); + // MACC(Ra, Rb, tmp0, tmp1, tmp2); + // Ra = *++Pa; + // Rb = *--Pb; + mulhu(Rhi_ab, Ra, Rb); + mul(Rlo_ab, Ra, Rb); + addi(Pa, Pa, wordSize); + ld(Ra, Address(Pa)); + addi(Pb, Pb, -wordSize); + ld(Rb, Address(Pb)); + acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the + // previous iteration. + // MACC(Rm, Rn, tmp0, tmp1, tmp2); + // Rm = *++Pm; + // Rn = *--Pn; + mulhu(Rhi_mn, Rm, Rn); + mul(Rlo_mn, Rm, Rn); + addi(Pm, Pm, wordSize); + ld(Rm, Address(Pm)); + addi(Pn, Pn, -wordSize); + ld(Rn, Address(Pn)); + acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); + } + + void post1() { + block_comment("post1"); + + // MACC(Ra, Rb, tmp0, tmp1, tmp2); + // Ra = *++Pa; + // Rb = *--Pb; + mulhu(Rhi_ab, Ra, Rb); + mul(Rlo_ab, Ra, Rb); + acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n + acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); + + // *Pm = Rm = tmp0 * inv; + mul(Rm, tmp0, inv); + sd(Rm, Address(Pm)); + + // MACC(Rm, Rn, tmp0, tmp1, tmp2); + // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; + mulhu(Rhi_mn, Rm, Rn); + +#ifndef PRODUCT + // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply"); + { + mul(Rlo_mn, Rm, Rn); + add(Rlo_mn, tmp0, Rlo_mn); + Label ok; + beqz(Rlo_mn, ok); + stop("broken Montgomery multiply"); + bind(ok); + } +#endif + // We have very carefully set things up so that + // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate + // the lower half of Rm * Rn because we know the result already: + // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff + // tmp0 != 0. So, rather than do a mul and an cad we just set + // the carry flag iff tmp0 is nonzero. + // + // mul(Rlo_mn, Rm, Rn); + // cad(zr, tmp0, Rlo_mn); + addi(t0, tmp0, -1); + sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero + cadc(tmp0, tmp1, Rhi_mn, t0); + adc(tmp1, tmp2, zr, t0); + mv(tmp2, zr); + } + + void pre2(Register i, Register len) { + block_comment("pre2"); + // Pa = Pa_base + i-len; + // Pb = Pb_base + len; + // Pm = Pm_base + i-len; + // Pn = Pn_base + len; + + sub(Rj, i, len); + // Rj == i-len + + // Ra as temp register + slli(Ra, Rj, LogBytesPerWord); + add(Pa, Pa_base, Ra); + add(Pm, Pm_base, Ra); + slli(Ra, len, LogBytesPerWord); + add(Pb, Pb_base, Ra); + add(Pn, Pn_base, Ra); + + // Ra = *++Pa; + // Rb = *--Pb; + // Rm = *++Pm; + // Rn = *--Pn; + add(Pa, Pa, wordSize); + ld(Ra, Address(Pa)); + add(Pb, Pb, -wordSize); + ld(Rb, Address(Pb)); + add(Pm, Pm, wordSize); + ld(Rm, Address(Pm)); + add(Pn, Pn, -wordSize); + ld(Rn, Address(Pn)); + + mv(Rhi_mn, zr); + mv(Rlo_mn, zr); + } + + void post2(Register i, Register len) { + block_comment("post2"); + sub(Rj, i, len); + + cad(tmp0, tmp0, Rlo_mn, t0); // The pending m*n, low part + + // As soon as we know the least significant digit of our result, + // store it. + // Pm_base[i-len] = tmp0; + // Rj as temp register + slli(Rj, Rj, LogBytesPerWord); + add(Rj, Pm_base, Rj); + sd(tmp0, Address(Rj)); + + // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; + cadc(tmp0, tmp1, Rhi_mn, t0); // The pending m*n, high part + adc(tmp1, tmp2, zr, t0); + mv(tmp2, zr); + } + + // A carry in tmp0 after Montgomery multiplication means that we + // should subtract multiples of n from our result in m. We'll + // keep doing that until there is no carry. + void normalize(Register len) { + block_comment("normalize"); + // while (tmp0) + // tmp0 = sub(Pm_base, Pn_base, tmp0, len); + Label loop, post, again; + Register cnt = tmp1, i = tmp2; // Re-use registers; we're done with them now + beqz(tmp0, post); { + bind(again); { + mv(i, zr); + mv(cnt, len); + slli(Rn, i, LogBytesPerWord); + add(Rm, Pm_base, Rn); + ld(Rm, Address(Rm)); + add(Rn, Pn_base, Rn); + ld(Rn, Address(Rn)); + li(t0, 1); // set carry flag, i.e. no borrow + align(16); + bind(loop); { + notr(Rn, Rn); + add(Rm, Rm, t0); + add(Rm, Rm, Rn); + sltu(t0, Rm, Rn); + slli(Rn, i, LogBytesPerWord); // Rn as temp register + add(Rn, Pm_base, Rn); + sd(Rm, Address(Rn)); + add(i, i, 1); + slli(Rn, i, LogBytesPerWord); + add(Rm, Pm_base, Rn); + ld(Rm, Address(Rm)); + add(Rn, Pn_base, Rn); + ld(Rn, Address(Rn)); + sub(cnt, cnt, 1); + } bnez(cnt, loop); + addi(tmp0, tmp0, -1); + add(tmp0, tmp0, t0); + } bnez(tmp0, again); + } bind(post); + } + + // Move memory at s to d, reversing words. + // Increments d to end of copied memory + // Destroys tmp1, tmp2 + // Preserves len + // Leaves s pointing to the address which was in d at start + void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { + assert(tmp1 < x28 && tmp2 < x28, "register corruption"); + + slli(tmp1, len, LogBytesPerWord); + add(s, s, tmp1); + mv(tmp1, len); + unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); + slli(tmp1, len, LogBytesPerWord); + sub(s, d, tmp1); + } + // [63...0] -> [31...0][63...32] + void reverse1(Register d, Register s, Register tmp) { + addi(s, s, -wordSize); + ld(tmp, Address(s)); + ror_imm(tmp, tmp, 32, t0); + sd(tmp, Address(d)); + addi(d, d, wordSize); + } + + void step_squaring() { + // An extra ACC + step(); + acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); + } + + void last_squaring(Register i) { + Label dont; + // if ((i & 1) == 0) { + andi(t0, i, 0x1); + bnez(t0, dont); { + // MACC(Ra, Rb, tmp0, tmp1, tmp2); + // Ra = *++Pa; + // Rb = *--Pb; + mulhu(Rhi_ab, Ra, Rb); + mul(Rlo_ab, Ra, Rb); + acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); + } bind(dont); + } + + void extra_step_squaring() { + acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n + + // MACC(Rm, Rn, tmp0, tmp1, tmp2); + // Rm = *++Pm; + // Rn = *--Pn; + mulhu(Rhi_mn, Rm, Rn); + mul(Rlo_mn, Rm, Rn); + addi(Pm, Pm, wordSize); + ld(Rm, Address(Pm)); + addi(Pn, Pn, -wordSize); + ld(Rn, Address(Pn)); + } + + void post1_squaring() { + acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n + + // *Pm = Rm = tmp0 * inv; + mul(Rm, tmp0, inv); + sd(Rm, Address(Pm)); + + // MACC(Rm, Rn, tmp0, tmp1, tmp2); + // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; + mulhu(Rhi_mn, Rm, Rn); + +#ifndef PRODUCT + // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply"); + { + mul(Rlo_mn, Rm, Rn); + add(Rlo_mn, tmp0, Rlo_mn); + Label ok; + beqz(Rlo_mn, ok); { + stop("broken Montgomery multiply"); + } bind(ok); + } +#endif + // We have very carefully set things up so that + // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate + // the lower half of Rm * Rn because we know the result already: + // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff + // tmp0 != 0. So, rather than do a mul and a cad we just set + // the carry flag iff tmp0 is nonzero. + // + // mul(Rlo_mn, Rm, Rn); + // cad(zr, tmp, Rlo_mn); + addi(t0, tmp0, -1); + sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero + cadc(tmp0, tmp1, Rhi_mn, t0); + adc(tmp1, tmp2, zr, t0); + mv(tmp2, zr); + } + + // use t0 as carry + void acc(Register Rhi, Register Rlo, + Register tmp0, Register tmp1, Register tmp2) { + cad(tmp0, tmp0, Rlo, t0); + cadc(tmp1, tmp1, Rhi, t0); + adc(tmp2, tmp2, zr, t0); + } + + public: + /** + * Fast Montgomery multiplication. The derivation of the + * algorithm is in A Cryptographic Library for the Motorola + * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. + * + * Arguments: + * + * Inputs for multiplication: + * c_rarg0 - int array elements a + * c_rarg1 - int array elements b + * c_rarg2 - int array elements n (the modulus) + * c_rarg3 - int length + * c_rarg4 - int inv + * c_rarg5 - int array elements m (the result) + * + * Inputs for squaring: + * c_rarg0 - int array elements a + * c_rarg1 - int array elements n (the modulus) + * c_rarg2 - int length + * c_rarg3 - int inv + * c_rarg4 - int array elements m (the result) + * + */ + address generate_multiply() { + Label argh, nothing; + bind(argh); + stop("MontgomeryMultiply total_allocation must be <= 8192"); + + align(CodeEntryAlignment); + address entry = pc(); + + beqz(Rlen, nothing); + + enter(); + + // Make room. + li(Ra, 512); + bgt(Rlen, Ra, argh); + slli(Ra, Rlen, exact_log2(4 * sizeof(jint))); + sub(Ra, sp, Ra); + andi(sp, Ra, -2 * wordSize); + + srliw(Rlen, Rlen, 1); // length in longwords = len/2 + + { + // Copy input args, reversing as we go. We use Ra as a + // temporary variable. + reverse(Ra, Pa_base, Rlen, Ri, Rj); + if (!_squaring) + reverse(Ra, Pb_base, Rlen, Ri, Rj); + reverse(Ra, Pn_base, Rlen, Ri, Rj); + } + + // Push all call-saved registers and also Pm_base which we'll need + // at the end. + save_regs(); + +#ifndef PRODUCT + // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + { + ld(Rn, Address(Pn_base)); + mul(Rlo_mn, Rn, inv); + li(t0, -1); + Label ok; + beq(Rlo_mn, t0, ok); + stop("broken inverse in Montgomery multiply"); + bind(ok); + } +#endif + + mv(Pm_base, Ra); + + mv(tmp0, zr); + mv(tmp1, zr); + mv(tmp2, zr); + + block_comment("for (int i = 0; i < len; i++) {"); + mv(Ri, zr); { + Label loop, end; + bge(Ri, Rlen, end); + + bind(loop); + pre1(Ri); + + block_comment(" for (j = i; j; j--) {"); { + mv(Rj, Ri); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step); + } block_comment(" } // j"); + + post1(); + addw(Ri, Ri, 1); + blt(Ri, Rlen, loop); + bind(end); + block_comment("} // i"); + } + + block_comment("for (int i = len; i < 2*len; i++) {"); + mv(Ri, Rlen); { + Label loop, end; + slli(t0, Rlen, 1); + bge(Ri, t0, end); + + bind(loop); + pre2(Ri, Rlen); + + block_comment(" for (j = len*2-i-1; j; j--) {"); { + slliw(Rj, Rlen, 1); + subw(Rj, Rj, Ri); + subw(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step); + } block_comment(" } // j"); + + post2(Ri, Rlen); + addw(Ri, Ri, 1); + slli(t0, Rlen, 1); + blt(Ri, t0, loop); + bind(end); + } + block_comment("} // i"); + + normalize(Rlen); + + mv(Ra, Pm_base); // Save Pm_base in Ra + restore_regs(); // Restore caller's Pm_base + + // Copy our result into caller's Pm_base + reverse(Pm_base, Ra, Rlen, Ri, Rj); + + leave(); + bind(nothing); + ret(); + + return entry; + } + + /** + * + * Arguments: + * + * Inputs: + * c_rarg0 - int array elements a + * c_rarg1 - int array elements n (the modulus) + * c_rarg2 - int length + * c_rarg3 - int inv + * c_rarg4 - int array elements m (the result) + * + */ + address generate_square() { + Label argh; + bind(argh); + stop("MontgomeryMultiply total_allocation must be <= 8192"); + + align(CodeEntryAlignment); + address entry = pc(); + + enter(); + + // Make room. + li(Ra, 512); + bgt(Rlen, Ra, argh); + slli(Ra, Rlen, exact_log2(4 * sizeof(jint))); + sub(Ra, sp, Ra); + andi(sp, Ra, -2 * wordSize); + + srliw(Rlen, Rlen, 1); // length in longwords = len/2 + + { + // Copy input args, reversing as we go. We use Ra as a + // temporary variable. + reverse(Ra, Pa_base, Rlen, Ri, Rj); + reverse(Ra, Pn_base, Rlen, Ri, Rj); + } + + // Push all call-saved registers and also Pm_base which we'll need + // at the end. + save_regs(); + + mv(Pm_base, Ra); + + mv(tmp0, zr); + mv(tmp1, zr); + mv(tmp2, zr); + + block_comment("for (int i = 0; i < len; i++) {"); + mv(Ri, zr); { + Label loop, end; + bind(loop); + bge(Ri, Rlen, end); + + pre1(Ri); + + block_comment("for (j = (i+1)/2; j; j--) {"); { + addi(Rj, Ri, 1); + srliw(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); + } block_comment(" } // j"); + + last_squaring(Ri); + + block_comment(" for (j = i/2; j; j--) {"); { + srliw(Rj, Ri, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); + } block_comment(" } // j"); + + post1_squaring(); + addi(Ri, Ri, 1); + blt(Ri, Rlen, loop); + + bind(end); + block_comment("} // i"); + } + + block_comment("for (int i = len; i < 2*len; i++) {"); + mv(Ri, Rlen); { + Label loop, end; + bind(loop); + slli(t0, Rlen, 1); + bge(Ri, t0, end); + + pre2(Ri, Rlen); + + block_comment(" for (j = (2*len-i-1)/2; j; j--) {"); { + slli(Rj, Rlen, 1); + sub(Rj, Rj, Ri); + sub(Rj, Rj, 1); + srliw(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); + } block_comment(" } // j"); + + last_squaring(Ri); + + block_comment(" for (j = (2*len-i)/2; j; j--) {"); { + slli(Rj, Rlen, 1); + sub(Rj, Rj, Ri); + srliw(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); + } block_comment(" } // j"); + + post2(Ri, Rlen); + addi(Ri, Ri, 1); + slli(t0, Rlen, 1); + blt(Ri, t0, loop); + + bind(end); + block_comment("} // i"); + } + + normalize(Rlen); + + mv(Ra, Pm_base); // Save Pm_base in Ra + restore_regs(); // Restore caller's Pm_base + + // Copy our result into caller's Pm_base + reverse(Pm_base, Ra, Rlen, Ri, Rj); + + leave(); + ret(); + + return entry; + } + }; +#endif // COMPILER2 + // Continuation point for throwing of implicit exceptions that are // not handled in the current activation. Fabricates an exception // oop and initiates normal exception dispatching in this @@ -2792,7 +3550,7 @@ class StubGenerator: public StubCodeGenerator { // Note that we only have to preserve callee-saved registers since // the compilers are responsible for supplying a continuation point // if they expect all registers to be preserved. - // n.b. riscv64 asserts that frame::arg_reg_save_area_bytes == 0 + // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0 assert_cond(runtime_entry != NULL); enum layout { fp_off = 0, @@ -2817,12 +3575,12 @@ class StubGenerator: public StubCodeGenerator { // thread-local storage and also sets up last_Java_sp slightly // differently than the real call_VM - __ enter(); // Save FP and LR before call + __ enter(); // Save FP and RA before call assert(is_even(framesize / 2), "sp not 16-byte aligned"); - // lr and fp are already in place - __ addi(sp, fp, 0 - (((unsigned)framesize - 4) << LogBytesPerInt)); // prolog + // ra and fp are already in place + __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog int frame_complete = __ pc() - start; @@ -2851,7 +3609,6 @@ class StubGenerator: public StubCodeGenerator { oop_maps->add_gc_map(the_pc - start, map); __ reset_last_Java_frame(true); - __ ifence(); __ leave(); @@ -2936,11 +3693,37 @@ class StubGenerator: public StubCodeGenerator { // arraycopy stubs used by compilers generate_arraycopy_stubs(); +#ifdef COMPILER2 + if (UseMulAddIntrinsic) { + StubRoutines::_mulAdd = generate_mulAdd(); + } + + if (UseMultiplyToLenIntrinsic) { + StubRoutines::_multiplyToLen = generate_multiplyToLen(); + } + + if (UseSquareToLenIntrinsic) { + StubRoutines::_squareToLen = generate_squareToLen(); + } + + if (UseMontgomeryMultiplyIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); + MontgomeryMultiplyGenerator g(_masm, /*squaring*/false); + StubRoutines::_montgomeryMultiply = g.generate_multiply(); + } + + if (UseMontgomerySquareIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomerySquare"); + MontgomeryMultiplyGenerator g(_masm, /*squaring*/true); + StubRoutines::_montgomerySquare = g.generate_square(); + } +#endif + generate_compare_long_strings(); generate_string_indexof_stubs(); - StubRoutines::riscv64::set_completed(); + StubRoutines::riscv::set_completed(); } public: diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp index 53edd653aa9..9202d9ec4b0 100644 --- a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp +++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,26 +34,24 @@ // Implementation of the platform-specific part of StubRoutines - for // a description of how to extend it, see the stubRoutines.hpp file. -address StubRoutines::riscv64::_get_previous_sp_entry = NULL; +address StubRoutines::riscv::_get_previous_sp_entry = NULL; -address StubRoutines::riscv64::_f2i_fixup = NULL; -address StubRoutines::riscv64::_f2l_fixup = NULL; -address StubRoutines::riscv64::_d2i_fixup = NULL; -address StubRoutines::riscv64::_d2l_fixup = NULL; -address StubRoutines::riscv64::_float_sign_mask = NULL; -address StubRoutines::riscv64::_float_sign_flip = NULL; -address StubRoutines::riscv64::_double_sign_mask = NULL; -address StubRoutines::riscv64::_double_sign_flip = NULL; -address StubRoutines::riscv64::_zero_blocks = NULL; -address StubRoutines::riscv64::_has_negatives = NULL; -address StubRoutines::riscv64::_has_negatives_long = NULL; -address StubRoutines::riscv64::_compare_long_string_LL = NULL; -address StubRoutines::riscv64::_compare_long_string_UU = NULL; -address StubRoutines::riscv64::_compare_long_string_LU = NULL; -address StubRoutines::riscv64::_compare_long_string_UL = NULL; -address StubRoutines::riscv64::_string_indexof_linear_ll = NULL; -address StubRoutines::riscv64::_string_indexof_linear_uu = NULL; -address StubRoutines::riscv64::_string_indexof_linear_ul = NULL; -address StubRoutines::riscv64::_large_byte_array_inflate = NULL; -address StubRoutines::riscv64::_method_entry_barrier = NULL; -bool StubRoutines::riscv64::_completed = false; +address StubRoutines::riscv::_f2i_fixup = NULL; +address StubRoutines::riscv::_f2l_fixup = NULL; +address StubRoutines::riscv::_d2i_fixup = NULL; +address StubRoutines::riscv::_d2l_fixup = NULL; +address StubRoutines::riscv::_float_sign_mask = NULL; +address StubRoutines::riscv::_float_sign_flip = NULL; +address StubRoutines::riscv::_double_sign_mask = NULL; +address StubRoutines::riscv::_double_sign_flip = NULL; +address StubRoutines::riscv::_zero_blocks = NULL; +address StubRoutines::riscv::_compare_long_string_LL = NULL; +address StubRoutines::riscv::_compare_long_string_UU = NULL; +address StubRoutines::riscv::_compare_long_string_LU = NULL; +address StubRoutines::riscv::_compare_long_string_UL = NULL; +address StubRoutines::riscv::_string_indexof_linear_ll = NULL; +address StubRoutines::riscv::_string_indexof_linear_uu = NULL; +address StubRoutines::riscv::_string_indexof_linear_ul = NULL; +address StubRoutines::riscv::_large_byte_array_inflate = NULL; + +bool StubRoutines::riscv::_completed = false; diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp index 37d3523adb5..0c9445e18a7 100644 --- a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp +++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -31,7 +31,7 @@ // definition. See stubRoutines.hpp for a description on how to // extend it. -static bool returns_to_call_stub(address return_pc) { +static bool returns_to_call_stub(address return_pc) { return return_pc == _call_stub_return_address; } @@ -40,7 +40,7 @@ enum platform_dependent_constants { code_size2 = 28000 // simply increase if too small (assembler will crash if too small) }; -class riscv64 { +class riscv { friend class StubGenerator; private: @@ -58,8 +58,6 @@ class riscv64 { static address _zero_blocks; - static address _has_negatives; - static address _has_negatives_long; static address _compare_long_string_LL; static address _compare_long_string_LU; static address _compare_long_string_UL; @@ -69,54 +67,43 @@ class riscv64 { static address _string_indexof_linear_ul; static address _large_byte_array_inflate; - static address _method_entry_barrier; - static bool _completed; public: - static address get_previous_sp_entry() - { + static address get_previous_sp_entry() { return _get_previous_sp_entry; } - static address f2i_fixup() - { + static address f2i_fixup() { return _f2i_fixup; } - static address f2l_fixup() - { + static address f2l_fixup() { return _f2l_fixup; } - static address d2i_fixup() - { + static address d2i_fixup() { return _d2i_fixup; } - static address d2l_fixup() - { + static address d2l_fixup() { return _d2l_fixup; } - static address float_sign_mask() - { + static address float_sign_mask() { return _float_sign_mask; } - static address float_sign_flip() - { + static address float_sign_flip() { return _float_sign_flip; } - static address double_sign_mask() - { + static address double_sign_mask() { return _double_sign_mask; } - static address double_sign_flip() - { + static address double_sign_flip() { return _double_sign_flip; } @@ -124,14 +111,6 @@ class riscv64 { return _zero_blocks; } - static address has_negatives() { - return _has_negatives; - } - - static address has_negatives_long() { - return _has_negatives_long; - } - static address compare_long_string_LL() { return _compare_long_string_LL; } @@ -164,10 +143,6 @@ class riscv64 { return _large_byte_array_inflate; } - static address method_entry_barrier() { - return _method_entry_barrier; - } - static bool complete() { return _completed; } diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp index 33f812e67ed..e639fa7e12f 100644 --- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -80,7 +80,7 @@ address TemplateInterpreterGenerator::generate_slow_signature_handler() { __ addi(sp, c_rarg3, -18 * wordSize); __ addi(sp, sp, -2 * wordSize); - __ sd(lr, Address(sp, 0)); + __ sd(ra, Address(sp, 0)); __ call_VM(noreg, CAST_FROM_FN_PTR(address, @@ -101,8 +101,8 @@ address TemplateInterpreterGenerator::generate_slow_signature_handler() { // bcp (NULL) // ... - // Restore LR - __ ld(lr, Address(sp, 0)); + // Restore ra + __ ld(ra, Address(sp, 0)); __ addi(sp, sp , 2 * wordSize); // Do FP first so we can use c_rarg3 as temp @@ -159,11 +159,11 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M // stack: // [ arg ] <-- esp // [ arg ] - // retaddr in lr + // retaddr in ra address fn = NULL; address entry_point = NULL; - Register continuation = lr; + Register continuation = ra; switch (kind) { case Interpreter::java_lang_math_abs: entry_point = __ pc(); @@ -181,7 +181,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M entry_point = __ pc(); __ fld(f10, Address(esp)); __ mv(sp, x30); - __ mv(x9, lr); + __ mv(x9, ra); continuation = x9; // The first callee-saved register if (StubRoutines::dsin() == NULL) { fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); @@ -195,7 +195,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M entry_point = __ pc(); __ fld(f10, Address(esp)); __ mv(sp, x30); - __ mv(x9, lr); + __ mv(x9, ra); continuation = x9; // The first callee-saved register if (StubRoutines::dcos() == NULL) { fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); @@ -209,7 +209,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M entry_point = __ pc(); __ fld(f10, Address(esp)); __ mv(sp, x30); - __ mv(x9, lr); + __ mv(x9, ra); continuation = x9; // The first callee-saved register if (StubRoutines::dtan() == NULL) { fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); @@ -223,7 +223,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M entry_point = __ pc(); __ fld(f10, Address(esp)); __ mv(sp, x30); - __ mv(x9, lr); + __ mv(x9, ra); continuation = x9; // The first callee-saved register if (StubRoutines::dlog() == NULL) { fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); @@ -237,7 +237,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M entry_point = __ pc(); __ fld(f10, Address(esp)); __ mv(sp, x30); - __ mv(x9, lr); + __ mv(x9, ra); continuation = x9; // The first callee-saved register if (StubRoutines::dlog10() == NULL) { fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); @@ -251,7 +251,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M entry_point = __ pc(); __ fld(f10, Address(esp)); __ mv(sp, x30); - __ mv(x9, lr); + __ mv(x9, ra); continuation = x9; // The first callee-saved register if (StubRoutines::dexp() == NULL) { fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); @@ -263,7 +263,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M break; case Interpreter::java_lang_math_pow : entry_point = __ pc(); - __ mv(x9, lr); + __ mv(x9, ra); continuation = x9; __ fld(f10, Address(esp, 2 * Interpreter::stackElementSize)); __ fld(f11, Address(esp)); @@ -366,7 +366,7 @@ address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { // setup parameters // convention: expect aberrant index in register x11 - __ zero_ext(c_rarg2, x11, 32); + __ zero_extend(c_rarg2, x11, 32); // convention: expect array in register x13 __ mv(c_rarg1, x13); __ call_VM(noreg, @@ -455,8 +455,7 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, __ ld(x11, Address(x11, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); __ andi(x11, x11, ConstantPoolCacheEntry::parameter_size_mask); - __ slli(t0, x11, 3); - __ add(esp, esp, t0); + __ shadd(esp, x11, esp, t0, 3); // Restore machine SP __ ld(t0, Address(xmethod, Method::const_offset())); @@ -557,9 +556,10 @@ address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, // // xmethod: method // -void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, - Label* profile_method, - Label* profile_method_continue) { +void TemplateInterpreterGenerator::generate_counter_incr( + Label* overflow, + Label* profile_method, + Label* profile_method_continue) { Label done; // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. if (TieredCompilation) { @@ -579,19 +579,19 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, __ bind(no_mdo); // Increment counter in MethodCounters const Address invocation_counter(t1, - MethodCounters::invocation_counter_offset() + - InvocationCounter::counter_offset()); + MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); __ get_method_counters(xmethod, t1, done); const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); __ bind(done); } else { // not TieredCompilation const Address backedge_counter(t1, - MethodCounters::backedge_counter_offset() + - InvocationCounter::counter_offset()); + MethodCounters::backedge_counter_offset() + + InvocationCounter::counter_offset()); const Address invocation_counter(t1, - MethodCounters::invocation_counter_offset() + - InvocationCounter::counter_offset()); + MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); __ get_method_counters(xmethod, t1, done); @@ -627,7 +627,7 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, __ ld(t1, Address(xmethod, Method::method_counters_offset())); __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset()))); __ bltu(x10, t1, done); - __ j(*overflow); // offset is too large so we have to use j instead of bgeu here + __ j(*overflow); } __ bind(done); } @@ -685,8 +685,7 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { // locals + overhead, in bytes __ mv(x10, overhead_size); - __ slli(t0, x13, Interpreter::logStackElementSize); - __ add(x10, x10, t0); // 2 slots per parameter. + __ shadd(x10, x13, x10, t0, Interpreter::logStackElementSize); // 2 slots per parameter. const Address stack_limit(xthread, JavaThread::stack_overflow_limit_offset()); __ ld(t0, stack_limit); @@ -780,7 +779,7 @@ void TemplateInterpreterGenerator::lock_method() { // interpreted methods and for native methods hence the shared code. // // Args: -// lr: return address +// ra: return address // xmethod: Method* // xlocals: pointer to locals // xcpool: cp cache @@ -815,9 +814,18 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize)); // Get mirror and store it in the frame as GC root for this Method* - __ load_mirror(t2, xmethod); - __ sd(zr, Address(sp, 5 * wordSize)); - __ sd(t2, Address(sp, 4 * wordSize)); +#if INCLUDE_SHENANDOAHGC + if (UseShenandoahGC) { + __ load_mirror(x28, xmethod); + __ sd(zr, Address(sp, 5 * wordSize)); + __ sd(x28, Address(sp, 4 * wordSize)); + } else +#endif + { + __ load_mirror(t2, xmethod); + __ sd(zr, Address(sp, 5 * wordSize)); + __ sd(t2, Address(sp, 4 * wordSize)); + } __ ld(xcpool, Address(xmethod, Method::const_offset())); __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset())); @@ -825,9 +833,9 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { __ sd(xcpool, Address(sp, 3 * wordSize)); __ sd(xlocals, Address(sp, 2 * wordSize)); - __ sd(lr, Address(sp, 11 * wordSize)); + __ sd(ra, Address(sp, 11 * wordSize)); __ sd(fp, Address(sp, 10 * wordSize)); - __ la(fp, Address(sp, 10 * wordSize)); + __ la(fp, Address(sp, 12 * wordSize)); // include ra & fp // set sender sp // leave last_sp as null @@ -882,7 +890,7 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { // xmethod: Method* // x30: senderSP must preserve for slow path, set SP to it on fast path - // LR is live. It must be saved around calls. + // ra is live. It must be saved around calls. address entry = __ pc(); @@ -948,14 +956,12 @@ void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { // Bang each page in the shadow zone. We can't assume it's been done for // an interpreter frame with greater than a page of locals, so each page // needs to be checked. Only true for non-native. - if (UseStackBanging) { - const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size(); - const int start_page = native_call ? n_shadow_pages : 1; - const int page_size = os::vm_page_size(); - for (int pages = start_page; pages <= n_shadow_pages; pages++) { - __ sub(t1, sp, pages * page_size); - __ sd(zr, Address(t1)); - } + const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size(); + const int start_page = native_call ? n_shadow_pages : 1; + const int page_size = os::vm_page_size(); + for (int pages = start_page; pages <= n_shadow_pages ; pages++) { + __ sub(t0, sp, pages * page_size); + __ sd(zr, Address(t0)); } } @@ -964,7 +970,7 @@ void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { // native method than the typical interpreter frame setup. address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // determine code generation flags - bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; // x11: Method* // x30: sender sp @@ -991,8 +997,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // for natives the size of locals is zero // compute beginning of parameters (xlocals) - __ slli(xlocals, x12, 3); - __ add(xlocals, esp, xlocals); + __ shadd(xlocals, x12, esp, xlocals, 3); __ addi(xlocals, xlocals, -wordSize); // Pull SP back to minimum size: this avoids holes in the stack @@ -1160,7 +1165,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // Call the native method. __ jalr(x28); __ bind(native_return); - __ ifence(); __ get_method(xmethod); // result potentially in x10 or f10 @@ -1182,18 +1186,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ mv(t0, _thread_in_native_trans); __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); - if (os::is_MP()) { - if (UseMembar) { - // Force this write out before the read below - __ membar(MacroAssembler::AnyAny); - } else { - // Write serialization page so VM thread can do a pseudo remote membar. - // We use the current thread pointer to calculate a thread specific - // offset to write to within the page. This minimizes bus traffic - // due to cache line collision. - __ serialize_memory(xthread, t0, t1); - } - } + // Force this write out before the read below + __ membar(MacroAssembler::AnyAny); // check for safepoint operation in progress and/or pending suspend requests { @@ -1211,7 +1205,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ mv(c_rarg0, xthread); __ mv(t1, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)); __ jalr(t1); - __ ifence(); __ get_method(xmethod); __ reinit_heapbase(); __ bind(Continue); @@ -1387,8 +1380,7 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { generate_stack_overflow_check(); // compute beginning of parameters (xlocals) - __ slli(t1, x12, 3); - __ add(xlocals, esp, t1); + __ shadd(xlocals, x12, esp, t1, 3); __ add(xlocals, xlocals, -wordSize); // Make room for additional locals @@ -1499,7 +1491,7 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { __ set_method_data_pointer_for_bcp(); // don't think we need this __ get_method(x11); - __ jal(profile_method_continue); + __ j(profile_method_continue); } // Handle overflow of counter and compile method __ bind(invocation_counter_overflow); @@ -1706,23 +1698,23 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // following registers set up: // // x10: exception - // lr: return address/pc that threw exception + // ra: return address/pc that threw exception // sp: expression stack of caller // fp: fp of caller - // FIXME: There's no point saving LR here because VM calls don't trash it + // FIXME: There's no point saving ra here because VM calls don't trash it __ sub(sp, sp, 2 * wordSize); __ sd(x10, Address(sp, 0)); // save exception - __ sd(lr, Address(sp, wordSize)); // save return address + __ sd(ra, Address(sp, wordSize)); // save return address __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), - xthread, lr); + xthread, ra); __ mv(x11, x10); // save exception handler __ ld(x10, Address(sp, 0)); // restore exception - __ ld(lr, Address(sp, wordSize)); // restore return address + __ ld(ra, Address(sp, wordSize)); // restore return address __ add(sp, sp, 2 * wordSize); // We might be returning to a deopt handler that expects x13 to // contain the exception pc - __ mv(x13, lr); + __ mv(x13, ra); // Note that an "issuing PC" is actually the next PC after the call __ jr(x11); // jump to exception // handler of caller @@ -1789,14 +1781,14 @@ void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, address TemplateInterpreterGenerator::generate_trace_code(TosState state) { address entry = __ pc(); - __ push_reg(lr); + __ push_reg(ra); __ push(state); __ push_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp); __ mv(c_rarg2, x10); // Pass itos __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), c_rarg1, c_rarg2, c_rarg3); __ pop_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp); __ pop(state); - __ pop_reg(lr); + __ pop_reg(ra); __ ret(); // return from result handler return entry; diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp index bdfd540c878..84b1afc7dc6 100644 --- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp +++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -49,7 +49,7 @@ // Platform-dependent initialization void TemplateTable::pd_initialize() { - // No riscv64 specific initialization + // No RISC-V specific initialization } // Address computation: local variables @@ -76,16 +76,14 @@ static inline Address aaddress(int n) { static inline Address iaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { assert_cond(_masm != NULL); - _masm->slli(temp, r, 3); - _masm->add(temp, xlocals, temp); + _masm->shadd(temp, r, xlocals, temp, 3); return Address(temp, 0); } static inline Address laddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { assert_cond(_masm != NULL); - _masm->slli(temp, r, 3); - _masm->add(temp, xlocals, temp); + _masm->shadd(temp, r, xlocals, temp, 3); return Address(temp, Interpreter::local_offset_in_bytes(1));; } @@ -301,7 +299,7 @@ void TemplateTable::sipush() { transition(vtos, itos); __ load_unsigned_short(x10, at_bcp(1)); - __ grevw(x10, x10); + __ revb_w_w(x10, x10); __ sraiw(x10, x10, 16); } @@ -352,8 +350,7 @@ void TemplateTable::ldc(bool wide) __ bne(x13, t1, notFloat); // ftos - __ slli(x11, x11, 3); - __ add(x11, x12, x11); + __ shadd(x11, x11, x12, x11, 3); __ flw(f10, Address(x11, base_offset)); __ push_f(f10); __ j(Done); @@ -364,8 +361,7 @@ void TemplateTable::ldc(bool wide) __ bne(x13, t1, notInt); // itos - __ slli(x11, x11, 3); - __ add(x11, x12, x11); + __ shadd(x11, x11, x12, x11, 3); __ lw(x10, Address(x11, base_offset)); __ push_i(x10); __ j(Done); @@ -413,7 +409,7 @@ void TemplateTable::fast_aldc(bool wide) int32_t offset = 0; __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset); __ ld(tmp, Address(rarg, offset)); - __ oop_bne(result, tmp, notNull); + __ bne(result, tmp, notNull); __ mv(result, zr); // NULL object reference __ bind(notNull); } @@ -441,8 +437,7 @@ void TemplateTable::ldc2_w() __ bne(x12, t1, notDouble); // dtos - __ slli(x12, x10, 3); - __ add(x12, x11, x12); + __ shadd(x12, x10, x11, x12, 3); __ fld(f10, Address(x12, base_offset)); __ push_d(f10); __ j(Done); @@ -452,8 +447,7 @@ void TemplateTable::ldc2_w() __ bne(x12, t1, notLong); // ltos - __ slli(x10, x10, 3); - __ add(x10, x11, x10); + __ shadd(x10, x10, x11, x10, 3); __ ld(x10, Address(x10, base_offset)); __ push_l(x10); __ j(Done); @@ -461,7 +455,6 @@ void TemplateTable::ldc2_w() __ bind(notLong); condy_helper(Done); __ bind(Done); - } void TemplateTable::condy_helper(Label& Done) @@ -487,8 +480,8 @@ void TemplateTable::condy_helper(Label& Done) __ add(off, obj, off); const Address field(off, 0); // base + R---->base + offset - __ slli(flags, flags, registerSize - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits)); - __ srli(flags, flags, registerSize - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> flags:0~3 + __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits)); + __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> flags:0~3 switch (bytecode()) { case Bytecodes::_ldc: // fall through @@ -682,7 +675,7 @@ void TemplateTable::aload() void TemplateTable::locals_index_wide(Register reg) { __ lhu(reg, at_bcp(2)); - __ grevhu(reg, reg); // reverse bytes in half-word and zero-extend + __ revb_h_h_u(reg, reg); // reverse bytes in half-word and zero-extend __ neg(reg, reg); } @@ -696,7 +689,7 @@ void TemplateTable::wide_lload() { transition(vtos, ltos); __ lhu(x11, at_bcp(2)); - __ grevhu(x11, x11); // reverse bytes in half-word and zero-extend + __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend __ slli(x11, x11, LogBytesPerWord); __ sub(x11, xlocals, x11); __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1))); @@ -713,7 +706,7 @@ void TemplateTable::wide_dload() { transition(vtos, dtos); __ lhu(x11, at_bcp(2)); - __ grevhu(x11, x11); // reverse bytes in half-word and zero-extend + __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend __ slli(x11, x11, LogBytesPerWord); __ sub(x11, xlocals, x11); __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1))); @@ -757,8 +750,7 @@ void TemplateTable::iaload() // x11: index index_check(x10, x11); // leaves index in x11 __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); - __ slli(t0, x11, 2); - __ add(x10, t0, x10); + __ shadd(x10, x11, x10, t0, 2); __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); __ addw(x10, x10, zr); // signed extended } @@ -772,8 +764,7 @@ void TemplateTable::laload() // x11: index index_check(x10, x11); // leaves index in x11 __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); - __ slli(t0, x11, 3); - __ add(x10, t0, x10); + __ shadd(x10, x11, x10, t0, 3); __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); } @@ -786,8 +777,7 @@ void TemplateTable::faload() // x11: index index_check(x10, x11); // leaves index in x11 __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); - __ slli(t0, x11, 2); - __ add(x10, t0, x10); + __ shadd(x10, x11, x10, t0, 2); __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); } @@ -800,8 +790,7 @@ void TemplateTable::daload() // x11: index index_check(x10, x11); // leaves index in x11 __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); - __ slli(t0, x11, 3); - __ add(x10, t0, x10); + __ shadd(x10, x11, x10, t0, 3); __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); } @@ -814,8 +803,7 @@ void TemplateTable::aaload() // x11: index index_check(x10, x11); // leaves index in x11 __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); - __ slli(t0, x11, LogBytesPerHeapOop); - __ add(x10, t0, x10); + __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop); do_oop_load(_masm, Address(x10), x10, @@ -831,8 +819,7 @@ void TemplateTable::baload() // x11: index index_check(x10, x11); // leaves index in x11 __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); - __ slli(t0, x11, 0); - __ add(x10, t0, x10); + __ shadd(x10, x11, x10, t0, 0); __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); } @@ -845,8 +832,7 @@ void TemplateTable::caload() // x11: index index_check(x10, x11); // leaves index in x11 __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); - __ slli(t0, x11, 1); - __ add(x10, t0, x10); + __ shadd(x10, x11, x10, t0, 1); __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); } @@ -863,8 +849,7 @@ void TemplateTable::fast_icaload() // x11: index index_check(x10, x11); // leaves index in x11, kills t0 __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11 - __ slli(t0, x11, 1); - __ add(x10, x10, t0); + __ shadd(x10, x11, x10, t0, 1); __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); } @@ -877,8 +862,7 @@ void TemplateTable::saload() // x11: index index_check(x10, x11); // leaves index in x11, kills t0 __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1); - __ slli(t0, x11, 1); - __ add(x10, t0, x10); + __ shadd(x10, x11, x10, t0, 1); __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); } @@ -1065,8 +1049,7 @@ void TemplateTable::iastore() { // x13: array index_check(x13, x11); // prefer index in x11 __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); - __ slli(t0, x11, 2); - __ add(t0, x13, t0); + __ shadd(t0, x11, x13, t0, 2); __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg); } @@ -1079,8 +1062,7 @@ void TemplateTable::lastore() { // x13: array index_check(x13, x11); // prefer index in x11 __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); - __ slli(t0, x11, 3); - __ add(t0, x13, t0); + __ shadd(t0, x11, x13, t0, 3); __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg); } @@ -1093,8 +1075,7 @@ void TemplateTable::fastore() { // x13: array index_check(x13, x11); // prefer index in x11 __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); - __ slli(t0, x11, 2); - __ add(t0, x13, t0); + __ shadd(t0, x11, x13, t0, 2); __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg); } @@ -1107,8 +1088,7 @@ void TemplateTable::dastore() { // x13: array index_check(x13, x11); // prefer index in x11 __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); - __ slli(t0, x11, 3); - __ add(t0, x13, t0); + __ shadd(t0, x11, x13, t0, 3); __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg); } @@ -1122,8 +1102,7 @@ void TemplateTable::aastore() { index_check(x13, x12); // kills x11 __ add(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); - __ slli(x14, x14, LogBytesPerHeapOop); - __ add(x14, x13, x14); + __ shadd(x14, x14, x13, x14, LogBytesPerHeapOop); Address element_address(x14, 0); @@ -1204,8 +1183,7 @@ void TemplateTable::castore() // x13: array index_check(x13, x11); // prefer index in x11 __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); - __ slli(t0, x11, 1); - __ add(t0, x13, t0); + __ shadd(t0, x11, x13, t0, 1); __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg); } @@ -1573,8 +1551,8 @@ void TemplateTable::wide_iinc() { transition(vtos, vtos); __ lwu(x11, at_bcp(2)); // get constant and index - __ grev16wu(x11, x11); // reverse bytes in half-word (32bit) and zero-extend - __ zero_ext(x12, x11, 48); + __ revb_h_w_u(x11, x11); // reverse bytes in half-word (32bit) and zero-extend + __ zero_extend(x12, x11, 16); __ neg(x12, x12); __ slli(x11, x11, 32); __ srai(x11, x11, 48); @@ -1633,7 +1611,7 @@ void TemplateTable::convert() // Conversion switch (bytecode()) { case Bytecodes::_i2l: - __ sign_ext(x10, x10, registerSize - 32); + __ sign_extend(x10, x10, 32); break; case Bytecodes::_i2f: __ fcvt_s_w(f10, x10); @@ -1642,13 +1620,13 @@ void TemplateTable::convert() __ fcvt_d_w(f10, x10); break; case Bytecodes::_i2b: - __ sign_ext(x10, x10, registerSize - 8); + __ sign_extend(x10, x10, 8); break; case Bytecodes::_i2c: - __ zero_ext(x10, x10, registerSize - 16); + __ zero_extend(x10, x10, 16); break; case Bytecodes::_i2s: - __ sign_ext(x10, x10, registerSize - 16); + __ sign_extend(x10, x10, 16); break; case Bytecodes::_l2i: __ addw(x10, x10, zr); @@ -1733,10 +1711,10 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) // load branch displacement if (!is_wide) { __ lhu(x12, at_bcp(1)); - __ grevh(x12, x12); // reverse bytes in half-word and sign-extend + __ revb_h_h(x12, x12); // reverse bytes in half-word and sign-extend } else { __ lwu(x12, at_bcp(1)); - __ grevw(x12, x12); // reverse bytes in word and sign-extend + __ revb_w_w(x12, x12); // reverse bytes in word and sign-extend } // Handle all the JSR stuff here, then exit. @@ -1800,7 +1778,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) __ beqz(x11, no_mdo); // Increment the MDO backedge counter const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) + - in_bytes(InvocationCounter::counter_offset())); + in_bytes(InvocationCounter::counter_offset())); const Address mask(x11, in_bytes(MethodData::backedge_mask_offset())); __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, x10, t0, false, @@ -1817,9 +1795,9 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) } else { // not TieredCompilation // increment counter __ ld(t1, Address(xmethod, Method::method_counters_offset())); - __ lwu(x10, Address(t1, be_offset)); // load backedge counter + __ lwu(x10, Address(t1, be_offset)); // load backedge counter __ addw(t0, x10, InvocationCounter::count_increment); // increment counter - __ sw(t0, Address(t1, be_offset)); // store counter + __ sw(t0, Address(t1, be_offset)); // store counter __ lwu(x10, Address(t1, inv_offset)); // load invocation counter __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits @@ -1860,7 +1838,6 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) } __ bind(dispatch); } - // Pre-load the next target bytecode into t0 __ load_unsigned_byte(t0, Address(xbcp, 0)); @@ -1870,7 +1847,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) __ dispatch_only(vtos, /*generate_poll*/true); if (UseLoopCounter) { - if (ProfileInterpreter) { + if (ProfileInterpreter && !TieredCompilation) { // Out-of-line code to allocate method data oop. __ bind(profile_method); __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); @@ -1916,7 +1893,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) // remove activation // get sender esp __ ld(esp, - Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); + Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // remove frame anchor __ leave(); // Ensure compiled code always sees stack at proper alignment @@ -1927,7 +1904,6 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) __ jr(t0); } } - } void TemplateTable::if_0cmp(Condition cc) @@ -2023,9 +1999,9 @@ void TemplateTable::if_acmp(Condition cc) __ pop_ptr(x11); if (cc == equal) { - __ oop_bne(x11, x10, not_taken); + __ bne(x11, x10, not_taken); } else if (cc == not_equal) { - __ oop_beq(x11, x10, not_taken); + __ beq(x11, x10, not_taken); } branch(false, false); __ bind(not_taken); @@ -2069,20 +2045,19 @@ void TemplateTable::tableswitch() { // load lo & hi __ lwu(x12, Address(x11, BytesPerInt)); __ lwu(x13, Address(x11, 2 * BytesPerInt)); - __ grevw(x12, x12); // reverse bytes in word (32bit) and sign-extend - __ grevw(x13, x13); // reverse bytes in word (32bit) and sign-extend + __ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend + __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend // check against lo & hi __ blt(x10, x12, default_case); __ bgt(x10, x13, default_case); // lookup dispatch offset __ subw(x10, x10, x12); - __ slli(t0, x10, 2); - __ add(x13, x11, t0); + __ shadd(x13, x10, x11, t0, 2); __ lwu(x13, Address(x13, 3 * BytesPerInt)); __ profile_switch_case(x10, x11, x12); // continue execution __ bind(continue_execution); - __ grevw(x13, x13); // reverse bytes in word (32bit) and sign-extend + __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend __ add(xbcp, xbcp, x13); __ load_unsigned_byte(t0, Address(xbcp)); __ dispatch_only(vtos, /*generate_poll*/true); @@ -2102,7 +2077,7 @@ void TemplateTable::fast_linearswitch() { transition(itos, vtos); Label loop_entry, loop, found, continue_execution; // bswap x10 so we can avoid bswapping the table entries - __ grevw(x10, x10); // reverse bytes in word (32bit) and sign-extend + __ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend // align xbcp __ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of // this instruction (change offsets @@ -2110,12 +2085,11 @@ void TemplateTable::fast_linearswitch() { __ andi(x9, x9, -BytesPerInt); // set counter __ lwu(x11, Address(x9, BytesPerInt)); - __ grev32(x11, x11); + __ revb_w(x11, x11); __ j(loop_entry); // table search __ bind(loop); - __ slli(t0, x11, 3); - __ add(t0, x9, t0); + __ shadd(t0, x11, x9, t0, 3); __ lw(t0, Address(t0, 2 * BytesPerInt)); __ beq(x10, t0, found); __ bind(loop_entry); @@ -2127,13 +2101,12 @@ void TemplateTable::fast_linearswitch() { __ j(continue_execution); // entry found -> get offset __ bind(found); - __ slli(t0, x11, 3); - __ add(t0, x9, t0); + __ shadd(t0, x11, x9, t0, 3); __ lwu(x13, Address(t0, 3 * BytesPerInt)); __ profile_switch_case(x11, x10, x9); // continue execution __ bind(continue_execution); - __ grevw(x13, x13); // reverse bytes in word (32bit) and sign-extend + __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend __ add(xbcp, xbcp, x13); __ lbu(t0, Address(xbcp, 0)); __ dispatch_only(vtos, /*generate_poll*/true); @@ -2186,7 +2159,7 @@ void TemplateTable::fast_binaryswitch() { __ lwu(j, Address(array, -BytesPerInt)); // j = length(array) // Convert j into native byteordering - __ grev32(j, j); + __ revb_w(j, j); // And start Label entry; @@ -2202,10 +2175,9 @@ void TemplateTable::fast_binaryswitch() { // then [j = h] // else [i = h] // Convert array[h].match to native byte-ordering before compare - __ slli(temp, h, 3); - __ add(temp, array, temp); + __ shadd(temp, h, array, temp, 3); __ ld(temp, Address(temp, 0)); - __ grevw(temp, temp); // reverse bytes in word (32bit) and sign-extend + __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend Label L_done, L_greater; __ bge(key, temp, L_greater); @@ -2226,18 +2198,16 @@ void TemplateTable::fast_binaryswitch() { // end of binary search, result index is i (must check again!) Label default_case; // Convert array[i].match to native byte-ordering before compare - __ slli(temp, i, 3); - __ add(temp, array, temp); + __ shadd(temp, i, array, temp, 3); __ ld(temp, Address(temp, 0)); - __ grevw(temp, temp); // reverse bytes in word (32bit) and sign-extend + __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend __ bne(key, temp, default_case); // entry found -> j = offset - __ slli(temp, i, 3); - __ add(temp, array, temp); + __ shadd(temp, i, array, temp, 3); __ lwu(j, Address(temp, BytesPerInt)); __ profile_switch_case(i, key, array); - __ grevw(j, j); // reverse bytes in word (32bit) and sign-extend + __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend __ add(temp, xbcp, j); __ load_unsigned_byte(t0, Address(temp, 0)); @@ -2250,7 +2220,7 @@ void TemplateTable::fast_binaryswitch() { __ bind(default_case); __ profile_switch_default(i); __ lwu(j, Address(array, -2 * BytesPerInt)); - __ grevw(j, j); // reverse bytes in word (32bit) and sign-extend + __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend __ add(temp, xbcp, j); __ load_unsigned_byte(t0, Address(temp, 0)); @@ -2489,23 +2459,15 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr pop_and_check_object(obj); } - if (!UseBarriersForVolatile) { - Label notVolatile; - __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift); - __ beqz(t0, notVolatile); - __ membar(MacroAssembler::AnyAny); - __ bind(notVolatile); - } - __ add(off, obj, off); const Address field(off); Label Done, notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; - __ slli(flags, raw_flags, registerSize - (ConstantPoolCacheEntry::tos_state_shift + - ConstantPoolCacheEntry::tos_state_bits)); - __ srli(flags, flags, registerSize - ConstantPoolCacheEntry::tos_state_bits); + __ slli(flags, raw_flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + + ConstantPoolCacheEntry::tos_state_bits)); + __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); assert(btos == 0, "change code, btos != 0"); __ bnez(flags, notByte); @@ -2738,9 +2700,9 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; - __ slli(flags, flags, registerSize - (ConstantPoolCacheEntry::tos_state_shift + - ConstantPoolCacheEntry::tos_state_bits)); - __ srli(flags, flags, registerSize - ConstantPoolCacheEntry::tos_state_bits); + __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + + ConstantPoolCacheEntry::tos_state_bits)); + __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); assert(btos == 0, "change code, btos != 0"); __ bnez(flags, notByte); @@ -3139,14 +3101,6 @@ void TemplateTable::fast_accessfield(TosState state) __ add(x11, x10, x11); const Address field(x11, 0); - if (!UseBarriersForVolatile) { - Label notVolatile; - __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); - __ beqz(t0, notVolatile); - __ membar(MacroAssembler::AnyAny); - __ bind(notVolatile); - } - // access field switch (bytecode()) { case Bytecodes::_fast_agetfield: @@ -3198,16 +3152,6 @@ void TemplateTable::fast_xaccess(TosState state) __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()))); - if (!UseBarriersForVolatile) { - Label notVolatile; - __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() + - ConstantPoolCacheEntry::flags_offset()))); - __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); - __ beqz(t0, notVolatile); - __ membar(MacroAssembler::AnyAny); - __ bind(notVolatile); - } - // make sure exception is reported in correct bcp range (getfield is // next instruction) __ addi(xbcp, xbcp, 1); @@ -3247,11 +3191,6 @@ void TemplateTable::fast_xaccess(TosState state) //----------------------------------------------------------------------------- // Calls -void TemplateTable::count_calls(Register method, Register temp) -{ - __ call_Unimplemented(); -} - void TemplateTable::prepare_invoke(int byte_no, Register method, // linked method (or i-klass) Register index, // itable index, MethodType, etc. @@ -3273,8 +3212,8 @@ void TemplateTable::prepare_invoke(int byte_no, assert(recv == noreg || recv == x12, ""); // setup registers & access constant pool cache - if (recv == noreg) { - recv = x12; + if (recv == noreg) { + recv = x12; } if (flags == noreg) { flags = x13; @@ -3296,6 +3235,7 @@ void TemplateTable::prepare_invoke(int byte_no, // since the parameter_size includes it. __ push_reg(x9); __ mv(x9, index); + assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); __ load_resolved_reference_at_index(index, x9); __ pop_reg(x9); __ push_reg(index); // push appendix (MethodType, CallSite, etc.) @@ -3305,23 +3245,21 @@ void TemplateTable::prepare_invoke(int byte_no, // load receiver if needed (note: no return address pushed yet) if (load_receiver) { __ andi(recv, flags, ConstantPoolCacheEntry::parameter_size_mask); // parameter_size_mask = 1 << 8 - __ slli(t0, recv, 3); - __ add(t0, esp, t0); + __ shadd(t0, recv, esp, t0, 3); __ ld(recv, Address(t0, -Interpreter::expr_offset_in_bytes(1))); __ verify_oop(recv); } // compute return type - __ slli(t1, flags, registerSize - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits)); - __ srli(t1, t1, registerSize - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> t1:0~3 + __ slli(t1, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits)); + __ srli(t1, t1, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> t1:0~3 // load return address { const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code); __ mv(t0, table_addr); - __ slli(t1, t1, 3); - __ add(t0, t0, t1); - __ ld(lr, Address(t0, 0)); + __ shadd(t0, t1, t0, t1, 3); + __ ld(ra, Address(t0, 0)); } } @@ -3481,7 +3419,9 @@ void TemplateTable::invokeinterface(int byte_no) { __ profile_virtual_call(x13, x30, x9); // Get declaring interface class from method, and itable index - __ load_method_holder(x10, xmethod); + __ ld(x10, Address(xmethod, Method::const_offset())); + __ ld(x10, Address(x10, ConstMethod::constants_offset())); + __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes())); __ lwu(xmethod, Address(xmethod, Method::itable_index_offset())); __ subw(xmethod, xmethod, Method::itable_index_max); __ negw(xmethod, xmethod); @@ -3668,7 +3608,7 @@ void TemplateTable::_new() { __ bnez(x13, loop); } - // initialize object hader only. + // initialize object header only. __ bind(initialize_header); if (UseBiasedLocking) { __ ld(t0, Address(x14, Klass::prototype_header_offset())); @@ -4045,8 +3985,7 @@ void TemplateTable::wide() { __ load_unsigned_byte(x9, at_bcp(1)); __ mv(t0, (address)Interpreter::_wentry_point); - __ slli(t1, x9, 3); - __ add(t0, t1, t0); + __ shadd(t0, x9, t0, t1, 3); __ ld(t0, Address(t0)); __ jr(t0); } @@ -4057,13 +3996,11 @@ void TemplateTable::multianewarray() { __ load_unsigned_byte(x10, at_bcp(3)); // get number of dimensions // last dim is on top of stack; we want address of first one: // first_addr = last_addr + (ndims - 1) * wordSize - __ slli(c_rarg1, x10, 3); - __ add(c_rarg1, c_rarg1, esp); + __ shadd(c_rarg1, x10, esp, c_rarg1, 3); __ sub(c_rarg1, c_rarg1, wordSize); call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), c_rarg1); __ load_unsigned_byte(x11, at_bcp(3)); - __ slli(t0, x11, 3); - __ add(esp, esp, t0); + __ shadd(esp, x11, esp, t0, 3); } diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp index ca19023096f..6c89133de02 100644 --- a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp +++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp index 7d4ad0fb6ec..6bdce51506e 100644 --- a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp +++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,6 +23,7 @@ * */ +#include "precompiled.hpp" #include "memory/allocation.hpp" #include "memory/allocation.inline.hpp" #include "runtime/os.inline.hpp" @@ -42,16 +43,11 @@ void VM_Version_Ext::initialize_cpu_information(void) { return; } - int core_id = -1; - int chip_id = -1; - int len = 0; - char* src_string = NULL; - _no_of_cores = os::processor_count(); _no_of_threads = _no_of_cores; _no_of_sockets = _no_of_cores; - snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Riscv64"); - snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Riscv64 %s", _features_string); + snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string); _initialized = true; } diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp index 31a25e097e8..711e4aeaf68 100644 --- a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp +++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp index 142e08393d2..5be0312733e 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,59 +24,14 @@ */ #include "precompiled.hpp" -#include "asm/macroAssembler.hpp" -#include "asm/macroAssembler.inline.hpp" -#include "memory/resourceArea.hpp" #include "runtime/java.hpp" #include "runtime/os.hpp" -#include "runtime/stubCodeGenerator.hpp" #include "runtime/vm_version.hpp" #include "utilities/formatBuffer.hpp" #include "utilities/macros.hpp" #include OS_HEADER_INLINE(os) -#include -#include - -#ifndef HWCAP_ISA_I -#define HWCAP_ISA_I (1 << ('I' - 'A')) -#endif - -#ifndef HWCAP_ISA_M -#define HWCAP_ISA_M (1 << ('M' - 'A')) -#endif - -#ifndef HWCAP_ISA_A -#define HWCAP_ISA_A (1 << ('A' - 'A')) -#endif - -#ifndef HWCAP_ISA_F -#define HWCAP_ISA_F (1 << ('F' - 'A')) -#endif - -#ifndef HWCAP_ISA_D -#define HWCAP_ISA_D (1 << ('D' - 'A')) -#endif - -#ifndef HWCAP_ISA_C -#define HWCAP_ISA_C (1 << ('C' - 'A')) -#endif - -#ifndef HWCAP_ISA_V -#define HWCAP_ISA_V (1 << ('V' - 'A')) -#endif - -#define read_csr(csr) \ -({ \ - register unsigned long __v; \ - __asm__ __volatile__ ("csrr %0, %1" \ - : "=r" (__v) \ - : "i" (csr) \ - : "memory"); \ - __v; \ -}) - address VM_Version::_checkvext_fault_pc = NULL; address VM_Version::_checkvext_continuation_pc = NULL; @@ -120,55 +75,13 @@ class VM_Version_StubGenerator: public StubCodeGenerator { const char* VM_Version::_uarch = ""; uint32_t VM_Version::_initial_vector_length = 0; -uint32_t VM_Version::get_current_vector_length() { - assert(_features & CPU_V, "should not call this"); - return (uint32_t)read_csr(CSR_VLENB); -} - -void VM_Version::get_os_cpu_info() { - - uint64_t auxv = getauxval(AT_HWCAP); - - assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP"); - assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP"); - assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP"); - assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP"); - assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP"); - assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP"); - assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP"); - - // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs. - // Availability for those extensions could not be queried from HWCAP. - // TODO: Add proper detection for those extensions. - _features = auxv & ( - HWCAP_ISA_I | - HWCAP_ISA_M | - HWCAP_ISA_A | - HWCAP_ISA_F | - HWCAP_ISA_D | - HWCAP_ISA_C | - HWCAP_ISA_V); - - if (FILE *f = fopen("/proc/cpuinfo", "r")) { - char buf[512], *p; - while (fgets(buf, sizeof (buf), f) != NULL) { - if ((p = strchr(buf, ':')) != NULL) { - if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) { - char* uarch = os::strdup(p + 2); - uarch[strcspn(uarch, "\n")] = '\0'; - _uarch = uarch; - break; - } - } - } - fclose(f); - } -} +void VM_Version::initialize() { + get_os_cpu_info(); -void VM_Version::get_processor_features() { if (FLAG_IS_DEFAULT(UseFMA)) { FLAG_SET_DEFAULT(UseFMA, true); } + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0); } @@ -209,11 +122,6 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } - if (UsePopCountInstruction) { - warning("Pop count instructions are not available on this CPU."); - FLAG_SET_DEFAULT(UsePopCountInstruction, false); - } - if (UseCRC32Intrinsics) { warning("CRC32 intrinsics are not available on this CPU."); FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); @@ -252,35 +160,80 @@ void VM_Version::get_processor_features() { } } + if (UseRVB && !(_features & CPU_B)) { + warning("RVB is not supported on this CPU"); + FLAG_SET_DEFAULT(UseRVB, false); + } + + if (UseRVC && !(_features & CPU_C)) { + warning("RVC is not supported on this CPU"); + FLAG_SET_DEFAULT(UseRVC, false); + } + if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) { FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true); } + if (UseRVB) { + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { + FLAG_SET_DEFAULT(UsePopCountInstruction, true); + } + } else { + FLAG_SET_DEFAULT(UsePopCountInstruction, false); + } + + char buf[512]; + buf[0] = '\0'; + if (_uarch != NULL && strcmp(_uarch, "") != 0) snprintf(buf, sizeof(buf), "%s,", _uarch); + strcat(buf, "rv64"); +#define ADD_FEATURE_IF_SUPPORTED(id, name, bit) if (_features & CPU_##id) strcat(buf, name); + CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED) +#undef ADD_FEATURE_IF_SUPPORTED + + _features_string = os::strdup(buf); + #ifdef COMPILER2 - get_c2_processor_features(); + c2_initialize(); #endif // COMPILER2 + + UNSUPPORTED_OPTION(CriticalJNINatives); + + FLAG_SET_DEFAULT(UseMembar, true); } #ifdef COMPILER2 -void VM_Version::get_c2_processor_features() { - // lack of cmove in riscv64 +void VM_Version::c2_initialize() { if (UseCMoveUnconditionally) { FLAG_SET_DEFAULT(UseCMoveUnconditionally, false); } + if (ConditionalMoveLimit > 0) { FLAG_SET_DEFAULT(ConditionalMoveLimit, 0); } - // disable vector - if (FLAG_IS_DEFAULT(UseSuperWord)) { - FLAG_SET_DEFAULT(UseSuperWord, false); + if (!UseRVV) { + FLAG_SET_DEFAULT(SpecialEncodeISOArray, false); } - if (FLAG_IS_DEFAULT(MaxVectorSize)) { + + if (!UseRVV && MaxVectorSize) { FLAG_SET_DEFAULT(MaxVectorSize, 0); } - if (MaxVectorSize > 0) { - warning("Vector instructions are not available on this CPU"); - FLAG_SET_DEFAULT(MaxVectorSize, 0); + + if (UseRVV) { + if (FLAG_IS_DEFAULT(MaxVectorSize)) { + MaxVectorSize = _initial_vector_length; + } else if (MaxVectorSize < 16) { + warning("RVV does not support vector length less than 16 bytes. Disabling RVV."); + UseRVV = false; + } else if (is_power_of_2(MaxVectorSize)) { + if (MaxVectorSize > _initial_vector_length) { + warning("Current system only supports max RVV vector length %d. Set MaxVectorSize to %d", + _initial_vector_length, _initial_vector_length); + } + MaxVectorSize = _initial_vector_length; + } else { + vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize)); + } } if (UseRVV) { @@ -292,10 +245,25 @@ void VM_Version::get_c2_processor_features() { if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); } -} -#endif // COMPILER2 -void VM_Version::initialize() { - get_processor_features(); - UNSUPPORTED_OPTION(CriticalJNINatives); + if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { + FLAG_SET_DEFAULT(UseMulAddIntrinsic, true); + } + + if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { + FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true); + } + + if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { + FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true); + } + + if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { + FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true); + } + + if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { + FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true); + } } +#endif // COMPILER2 diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp index e3302422fa5..85369ce0493 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,33 +33,39 @@ #include "utilities/sizes.hpp" class VM_Version : public Abstract_VM_Version { -public: - // Initialization - static void initialize(); +#ifdef COMPILER2 +private: + static void c2_initialize(); +#endif // COMPILER2 protected: static const char* _uarch; static uint32_t _initial_vector_length; - static void get_processor_features(); - static uint32_t get_current_vector_length(); static void get_os_cpu_info(); + static uint32_t get_current_vector_length(); + +public: + // Initialization + static void initialize(); enum Feature_Flag { -#define CPU_FEATURE_FLAGS(decl) \ - decl(I, "i", 8) \ - decl(M, "m", 12) \ - decl(A, "a", 0) \ - decl(F, "f", 5) \ - decl(D, "d", 3) \ - decl(C, "c", 2) \ - decl(V, "v", 21) +#define CPU_FEATURE_FLAGS(decl) \ + decl(I, "i", 8) \ + decl(M, "m", 12) \ + decl(A, "a", 0) \ + decl(F, "f", 5) \ + decl(D, "d", 3) \ + decl(C, "c", 2) \ + decl(V, "v", 21) \ + decl(B, "b", 1) #define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit), - CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG) + CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG) #undef DECLARE_CPU_FEATURE_FLAG }; -public: + static void initialize_cpu_information(void); + static bool is_checkvext_fault(address pc) { return pc != NULL && pc == _checkvext_fault_pc; } @@ -72,6 +78,7 @@ class VM_Version : public Abstract_VM_Version { static address _checkvext_fault_pc; static address _checkvext_continuation_pc; + #ifdef COMPILER2 private: static void get_c2_processor_features(); diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp index c9152a67b81..c4338715f95 100644 --- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp +++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp @@ -1,7 +1,6 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,8 +28,8 @@ #include "code/vmreg.hpp" void VMRegImpl::set_regName() { - Register reg = ::as_Register(0); int i = 0; + Register reg = ::as_Register(0); for ( ; i < ConcreteRegisterImpl::max_gpr ; ) { for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) { regName[i++] = reg->name(); @@ -46,7 +45,7 @@ void VMRegImpl::set_regName() { freg = freg->successor(); } - for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { + for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) { regName[i] = "NON-GPR-FPR"; } } diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp index 8454b811cb6..6f613a8f11a 100644 --- a/src/hotspot/cpu/riscv/vmreg_riscv.hpp +++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp @@ -1,7 +1,6 @@ /* * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,35 +34,20 @@ inline bool is_FloatRegister() { return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; } -inline bool is_VectorRegister() { - return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr; -} - inline Register as_Register() { - assert( is_Register(), "must be"); + assert(is_Register(), "must be"); return ::as_Register(value() / RegisterImpl::max_slots_per_register); } inline FloatRegister as_FloatRegister() { - assert( is_FloatRegister() && is_even(value()), "must be" ); + assert(is_FloatRegister() && is_even(value()), "must be"); return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) / FloatRegisterImpl::max_slots_per_register); } -inline VectorRegister as_VectorRegister() { - assert( is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be" ); - return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) / - VectorRegisterImpl::max_slots_per_register); -} - inline bool is_concrete() { assert(is_reg(), "must be"); - if (is_VectorRegister()) { - int base = value() - ConcreteRegisterImpl::max_fpr; - return (base % VectorRegisterImpl::max_slots_per_register) == 0; - } else { - return is_even(value()); - } + return is_even(value()); } #endif // CPU_RISCV_VMREG_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp index 135d0c62a2b..06b70020b4b 100644 --- a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp +++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp @@ -1,7 +1,6 @@ /* * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,19 +26,19 @@ #ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP #define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP -inline VMReg RegisterImpl::as_VMReg() { - if( this == noreg ) { +inline VMReg RegisterImpl::as_VMReg() const { + if (this == noreg) { return VMRegImpl::Bad(); } return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register); } -inline VMReg FloatRegisterImpl::as_VMReg() { +inline VMReg FloatRegisterImpl::as_VMReg() const { return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) + ConcreteRegisterImpl::max_gpr); } -inline VMReg VectorRegisterImpl::as_VMReg() { +inline VMReg VectorRegisterImpl::as_VMReg() const { return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) + ConcreteRegisterImpl::max_fpr); } diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp index c95b968a167..78b81138003 100644 --- a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp +++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -254,7 +254,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { } int VtableStub::pd_code_alignment() { - // riscv64 cache line size is 64 bytes, but we want to limit alignment loss. + // RISCV cache line size is not an architected constant. We just align on word size. const unsigned int icache_line_size = wordSize; return icache_line_size; } diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp index ec6550c2cef..2ed532fd9d7 100644 --- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2016, 2019, SAP SE. All rights reserved. + * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2019 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index a336d2a35c1..965ffaa604f 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp index c09480abe0e..1c4887eb7e4 100644 --- a/src/hotspot/os/linux/os_linux.cpp +++ b/src/hotspot/os/linux/os_linux.cpp @@ -2042,8 +2042,6 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) { static Elf32_Half running_arch_code=EM_PPC64; #elif (defined __powerpc__) static Elf32_Half running_arch_code=EM_PPC; -#elif (defined riscv) - static Elf32_Half running_arch_code=EM_RISCV; #elif (defined AARCH64) static Elf32_Half running_arch_code=EM_AARCH64; #elif (defined ARM) diff --git a/src/hotspot/os/posix/os_posix.cpp b/src/hotspot/os/posix/os_posix.cpp index e8363fd617a..bc5d1fe24a2 100644 --- a/src/hotspot/os/posix/os_posix.cpp +++ b/src/hotspot/os/posix/os_posix.cpp @@ -117,10 +117,6 @@ void os::check_dump_limit(char* buffer, size_t bufferSize) { VMError::record_coredump_status(buffer, success); } -// Native stack isn't walkable for RISCV this way. -// Native C frame and Java frame have different structure on RISCV. -// A seperate implementation is provided under linux_riscv for RISCV. -#if !defined(RISCV) || defined(ZERO) int os::get_native_stack(address* stack, int frames, int toSkip) { int frame_idx = 0; int num_of_frames; // number of frames captured @@ -147,7 +143,7 @@ int os::get_native_stack(address* stack, int frames, int toSkip) { return num_of_frames; } -#endif + bool os::unsetenv(const char* name) { assert(name != NULL, "Null pointer"); diff --git a/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp index 55038d243e9..f2610af6cdd 100644 --- a/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp +++ b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp @@ -24,5 +24,3 @@ */ // nothing required here - - diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp index 50ae9121888..9b8b1a31774 100644 --- a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp +++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp @@ -30,13 +30,16 @@ // Implementation of class atomic +// Note that memory_order_conservative requires a full barrier after atomic stores. +// See https://patchwork.kernel.org/patch/3575821/ + #define FULL_MEM_BARRIER __sync_synchronize() #define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); #define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); template struct Atomic::PlatformAdd - : public Atomic::AddAndFetch > + : Atomic::FetchAndAdd > { template D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp index 2bfbeb01148..28868c76406 100644 --- a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp +++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp @@ -1,6 +1,5 @@ /* * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp similarity index 85% rename from src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp rename to src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp index 1e9187f6f89..bdf36d6b4c3 100644 --- a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp +++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,14 +23,14 @@ * */ -#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP -#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP +#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP +#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { (void)memmove(to, from, count * HeapWordSize); } -static inline void pd_disjoint_words_helper(const HeapWord* from, HeapWord* to, size_t count, bool is_atomic) { +static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { switch (count) { case 8: to[7] = from[7]; // fall through case 7: to[6] = from[6]; // fall through @@ -42,20 +42,28 @@ static inline void pd_disjoint_words_helper(const HeapWord* from, HeapWord* to, case 1: to[0] = from[0]; // fall through case 0: break; default: - if(is_atomic) { - while (count-- > 0) { *to++ = *from++; } - } else { - memcpy(to, from, count * HeapWordSize); - } + memcpy(to, from, count * HeapWordSize); + break; } } -static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { - pd_disjoint_words_helper(from, to, count, false); -} - static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { - pd_disjoint_words_helper(from, to, count, true); + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: + while (count-- > 0) { + *to++ = *from++; + } + break; + } } static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { @@ -113,4 +121,4 @@ static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t _Copy_arrayof_conjoint_jlongs(from, to, count); } -#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP +#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp index 48ddf796a08..297414bfcd5 100644 --- a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp +++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -40,7 +40,4 @@ define_pd_global(uintx, JVMInvokeMethodSlack, 8192); // Used on 64 bit platforms for UseCompressedOops base address define_pd_global(uintx, HeapBaseMinAddress, 2 * G); -class Thread; -extern __thread Thread *riscv64_currentThread; - #endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp index 7477206a8b4..5b5d35553f7 100644 --- a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp +++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp @@ -49,6 +49,7 @@ inline void OrderAccess::fence() { FULL_MEM_BARRIER; } + template struct OrderAccess::PlatformOrderedLoad { diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp index c76d6b48036..ea3a57e1da9 100644 --- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -108,7 +108,7 @@ intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { // frames. Currently we don't do that on Linux, so it's the same as // os::fetch_frame_from_context(). ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, - const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { + const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { assert(thread != NULL, "just checking"); assert(ret_sp != NULL, "just checking"); @@ -118,9 +118,9 @@ ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, } ExtendedPC os::fetch_frame_from_context(const void* ucVoid, - intptr_t** ret_sp, intptr_t** ret_fp) { + intptr_t** ret_sp, intptr_t** ret_fp) { - ExtendedPC epc; + ExtendedPC epc; const ucontext_t* uc = (const ucontext_t*)ucVoid; if (uc != NULL) { @@ -173,14 +173,14 @@ bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* // stack overflow handling return false; } else { - // In compiled code, the stack banging is performed before LR - // has been saved in the frame. LR is live, and SP and FP + // In compiled code, the stack banging is performed before RA + // has been saved in the frame. RA is live, and SP and FP // belong to the caller. - intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc); - intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc); - address frame_pc = (address)(uintptr_t)(uc->uc_mcontext.__gregs[REG_LR] - - NativeInstruction::instruction_size); - *fr = frame(frame_sp, frame_fp, frame_pc); + intptr_t* fp = os::Linux::ucontext_get_fp(uc); + intptr_t* sp = os::Linux::ucontext_get_sp(uc); + address pc = (address)(uc->uc_mcontext.__gregs[REG_LR] + - NativeInstruction::instruction_size); + *fr = frame(sp, fp, pc); if (!fr->is_java_frame()) { assert(fr->safe_for_sender(thread), "Safety check"); assert(!fr->is_first_frame(), "Safety check"); @@ -195,14 +195,14 @@ bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* // By default, gcc always saves frame pointer rfp on this stack. This // may get turned off by -fomit-frame-pointer. frame os::get_sender_for_C_frame(frame* fr) { - return frame(fr->c_frame_sender_sp(), fr->c_frame_link(), fr->c_frame_sender_pc()); + return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); } NOINLINE frame os::current_frame() { intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0); - if(sender_sp != NULL) { + if (sender_sp != NULL) { frame myframe((intptr_t*)os::current_stack_pointer(), - sender_sp[frame::c_frame_link_offset], + sender_sp[frame::link_offset], CAST_FROM_FN_PTR(address, os::current_frame)); if (os::is_first_C_frame(&myframe)) { // stack is not walkable @@ -216,83 +216,7 @@ NOINLINE frame os::current_frame() { } } -bool os::is_first_C_frame(frame* fr) { - // Load up sp, fp, sender sp and sender fp, check for reasonable values. - // Check usp first, because if that's bad the other accessors may fault - // on some architectures. Ditto ufp second, etc. - uintptr_t fp_align_mask = (uintptr_t)(sizeof(address) - 1); - // sp on amd can be 32 bit aligned. - uintptr_t sp_align_mask = (uintptr_t)(sizeof(int) - 1); - - uintptr_t usp = (uintptr_t)fr->sp(); - if ((usp & sp_align_mask) != 0) { - return true; - } - - uintptr_t ufp = (uintptr_t)fr->fp(); - if ((ufp & fp_align_mask) != 0) { - return true; - } - - uintptr_t old_sp = (uintptr_t)fr->c_frame_sender_sp(); - if ((old_sp & sp_align_mask) != 0) { - return true; - } - if (old_sp == 0 || old_sp == (uintptr_t)-1) { - return true; - } - - uintptr_t old_fp = (uintptr_t)fr->c_frame_link(); - if ((old_fp & fp_align_mask) != 0) { - return true; - } - if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp) { - return true; - } - - // stack grows downwards; if old_fp is below current fp or if the stack - // frame is too large, either the stack is corrupted or fp is not saved - // on stack (i.e. on x86, ebp may be used as general register). The stack - // is not walkable beyond current frame. - if (old_fp < ufp) { - return true; - } - if (old_fp - ufp > 64 * K) { - return true; - } - - return false; -} - -int os::get_native_stack(address* stack, int frames, int toSkip) { - int frame_idx = 0; - int num_of_frames = 0; // number of frames captured - frame fr = os::current_frame(); - while (fr.pc() && frame_idx < frames) { - if (toSkip > 0) { - toSkip --; - } else { - stack[frame_idx ++] = fr.pc(); - } - if (fr.fp() == NULL || fr.cb() != NULL || - fr.c_frame_sender_pc() == NULL || os::is_first_C_frame(&fr)) { - break; - } - - if (fr.c_frame_sender_pc() && !os::is_first_C_frame(&fr)) { - fr = os::get_sender_for_C_frame(&fr); - } else { - break; - } - } - num_of_frames = frame_idx; - for (; frame_idx < frames; frame_idx ++) { - stack[frame_idx] = NULL; - } - - return num_of_frames; -} - +// Utility functions extern "C" JNIEXPORT int JVM_handle_linux_signal(int sig, siginfo_t* info, @@ -327,37 +251,43 @@ JVM_handle_linux_signal(int sig, #ifdef CAN_SHOW_REGISTERS_ON_ASSERT if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { - handle_assert_poison_fault(ucVoid, info->si_addr); - return 1; + if (handle_assert_poison_fault(ucVoid, info->si_addr)) { + return 1; + } } #endif JavaThread* thread = NULL; VMThread* vmthread = NULL; if (os::Linux::signal_handlers_are_installed) { - if (t != NULL ) { + if (t != NULL ){ if(t->is_Java_thread()) { - thread = (JavaThread*)t; - } else if(t->is_VM_thread()) { + thread = (JavaThread *) t; + } + else if(t->is_VM_thread()){ vmthread = (VMThread *)t; } } } + // Handle SafeFetch faults + if ((sig == SIGSEGV || sig == SIGBUS) && uc != NULL) { + address const pc = (address) os::Linux::ucontext_get_pc(uc); + if (pc && StubRoutines::is_safefetch_fault(pc)) { + os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); + return 1; + } + } + // decide if this trap can be handled by a stub address stub = NULL; - address pc = NULL; + address pc = NULL; //%note os_trap_1 if (info != NULL && uc != NULL && thread != NULL) { pc = (address) os::Linux::ucontext_get_pc(uc); - if (StubRoutines::is_safefetch_fault(pc)) { - os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); - return 1; - } - // Handle ALL stack overflow variations here if (sig == SIGSEGV) { address addr = (address) info->si_addr; @@ -372,12 +302,12 @@ JVM_handle_linux_signal(int sig, if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { assert(fr.is_java_frame(), "Must be a Java frame"); frame activation = - SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); + SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); if (activation.sp() != NULL) { thread->disable_stack_reserved_zone(); if (activation.is_interpreted_frame()) { thread->set_reserved_stack_activation((address)( - activation.fp() + frame::interpreter_frame_initial_sp_offset)); + activation.fp() + frame::interpreter_frame_initial_sp_offset)); } else { thread->set_reserved_stack_activation((address)activation.unextended_sp()); } @@ -410,14 +340,14 @@ JVM_handle_linux_signal(int sig, // current thread was created by user code with MAP_GROWSDOWN flag // and then attached to VM. See notes in os_linux.cpp. if (thread->osthread()->expanding_stack() == 0) { - thread->osthread()->set_expanding_stack(); - if (os::Linux::manually_expand_stack(thread, addr)) { - thread->osthread()->clear_expanding_stack(); - return 1; - } - thread->osthread()->clear_expanding_stack(); + thread->osthread()->set_expanding_stack(); + if (os::Linux::manually_expand_stack(thread, addr)) { + thread->osthread()->clear_expanding_stack(); + return 1; + } + thread->osthread()->clear_expanding_stack(); } else { - fatal("recursive segv. expanding stack."); + fatal("recursive segv. expanding stack."); } } } @@ -455,7 +385,7 @@ JVM_handle_linux_signal(int sig, stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } } else if (sig == SIGFPE && - (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { + (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { stub = SharedRuntime:: continuation_for_implicit_exception(thread, @@ -463,7 +393,7 @@ JVM_handle_linux_signal(int sig, SharedRuntime:: IMPLICIT_DIVIDE_BY_ZERO); } else if (sig == SIGSEGV && - !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { + !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { // Determination of interpreter/vtable stub/compiled code null exception stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); } @@ -477,9 +407,9 @@ JVM_handle_linux_signal(int sig, // jni_fast_GetField can trap at certain pc's if a GC kicks in // and the heap gets shrunk before the field access. if ((sig == SIGSEGV) || (sig == SIGBUS)) { - address addr_slow = JNI_FastGetField::find_slowcase_pc(pc); - if (addr_slow != (address)-1) { - stub = addr_slow; + address addr = JNI_FastGetField::find_slowcase_pc(pc); + if (addr != (address)-1) { + stub = addr; } } @@ -497,9 +427,7 @@ JVM_handle_linux_signal(int sig, if (stub != NULL) { // save all thread context in case we need to restore it - if (thread != NULL) { - thread->set_saved_exception_pc(pc); - } + if (thread != NULL) thread->set_saved_exception_pc(pc); os::Linux::ucontext_set_pc(uc, stub); return true; @@ -507,7 +435,7 @@ JVM_handle_linux_signal(int sig, // signal-chaining if (os::Linux::chained_handler(sig, info, ucVoid)) { - return true; + return true; } if (!abort_if_unrecognized) { @@ -541,7 +469,6 @@ int os::Linux::get_fpu_control_word(void) { void os::Linux::set_fpu_control_word(int fpu_control) { } - //////////////////////////////////////////////////////////////////////////////// // thread stack @@ -586,7 +513,7 @@ void os::print_context(outputStream *st, const void *context) { intptr_t *frame_sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(frame_sp)); - print_hex_dump(st, (address)frame_sp, (address)(frame_sp + 8 * sizeof(intptr_t)), sizeof(intptr_t)); + print_hex_dump(st, (address)frame_sp, (address)(frame_sp + 64), sizeof(intptr_t)); st->cr(); // Note: it may be unsafe to inspect memory near pc. For example, pc may diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp index 328fb389f67..ccceed643ed 100644 --- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp @@ -24,6 +24,7 @@ */ #include "precompiled.hpp" +#include "memory/metaspaceShared.hpp" #include "runtime/frame.inline.hpp" #include "runtime/thread.inline.hpp" @@ -62,12 +63,18 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) intptr_t* ret_fp = NULL; intptr_t* ret_sp = NULL; ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, - &ret_sp, &ret_fp); + &ret_sp, &ret_fp); if (addr.pc() == NULL || ret_sp == NULL ) { // ucontext wasn't useful return false; } + if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { + // In the middle of a trampoline call. Bail out for safety. + // This happens rarely so shouldn't affect profiling. + return false; + } + frame ret_frame(ret_sp, ret_fp, addr.pc()); if (!ret_frame.safe_for_sender(this)) { #ifdef COMPILER2 @@ -91,4 +98,3 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) } void JavaThread::cache_global_variables() { } - diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp index 2f3e2d591fb..4b91fa855ae 100644 --- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp +++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -58,9 +58,6 @@ private: bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); public: - - static Thread *riscv64_get_thread_helper(); - // These routines are only used on cpu architectures that // have separate register stacks (Itanium). static bool register_stack_overflow() { return false; } diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp new file mode 100644 index 00000000000..60260854db6 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/register.hpp" +#include "runtime/os.hpp" +#include "runtime/os.inline.hpp" +#include "runtime/vm_version.hpp" + +#include +#include + +#ifndef HWCAP_ISA_I +#define HWCAP_ISA_I (1 << ('I' - 'A')) +#endif + +#ifndef HWCAP_ISA_M +#define HWCAP_ISA_M (1 << ('M' - 'A')) +#endif + +#ifndef HWCAP_ISA_A +#define HWCAP_ISA_A (1 << ('A' - 'A')) +#endif + +#ifndef HWCAP_ISA_F +#define HWCAP_ISA_F (1 << ('F' - 'A')) +#endif + +#ifndef HWCAP_ISA_D +#define HWCAP_ISA_D (1 << ('D' - 'A')) +#endif + +#ifndef HWCAP_ISA_C +#define HWCAP_ISA_C (1 << ('C' - 'A')) +#endif + +#ifndef HWCAP_ISA_V +#define HWCAP_ISA_V (1 << ('V' - 'A')) +#endif + +#ifndef HWCAP_ISA_B +#define HWCAP_ISA_B (1 << ('B' - 'A')) +#endif + +#define read_csr(csr) \ +({ \ + register unsigned long __v; \ + __asm__ __volatile__ ("csrr %0, %1" \ + : "=r" (__v) \ + : "i" (csr) \ + : "memory"); \ + __v; \ +}) + +uint32_t VM_Version::get_current_vector_length() { + assert(_features & CPU_V, "should not call this"); + return (uint32_t)read_csr(CSR_VLENB); +} + +void VM_Version::get_os_cpu_info() { + + uint64_t auxv = getauxval(AT_HWCAP); + + STATIC_ASSERT(CPU_I == HWCAP_ISA_I); + STATIC_ASSERT(CPU_M == HWCAP_ISA_M); + STATIC_ASSERT(CPU_A == HWCAP_ISA_A); + STATIC_ASSERT(CPU_F == HWCAP_ISA_F); + STATIC_ASSERT(CPU_D == HWCAP_ISA_D); + STATIC_ASSERT(CPU_C == HWCAP_ISA_C); + STATIC_ASSERT(CPU_V == HWCAP_ISA_V); + STATIC_ASSERT(CPU_B == HWCAP_ISA_B); + _features = auxv & ( + HWCAP_ISA_I | + HWCAP_ISA_M | + HWCAP_ISA_A | + HWCAP_ISA_F | + HWCAP_ISA_D | + HWCAP_ISA_C | + HWCAP_ISA_V | + HWCAP_ISA_B); + + if (FILE *f = fopen("/proc/cpuinfo", "r")) { + char buf[512], *p; + while (fgets(buf, sizeof (buf), f) != NULL) { + if ((p = strchr(buf, ':')) != NULL) { + if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) { + char* uarch = os::strdup(p + 2); + uarch[strcspn(uarch, "\n")] = '\0'; + _uarch = uarch; + break; + } + } + } + fclose(f); + } +} diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index 6362a015c3c..36f89e62a0e 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -251,8 +251,7 @@ void LIR_Op2::verify() const { LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block) - : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) - , _type(type) + : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type) , _label(block->label()) , _block(block) , _ublock(NULL) @@ -260,8 +259,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block } LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) : - LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) - , _type(type) + LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type) , _label(stub->entry()) , _block(NULL) , _ublock(NULL) @@ -269,8 +267,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) : } LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock) - : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) - , _type(type) + : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type) , _label(block->label()) , _block(block) , _ublock(ublock) diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index 8daa0a7fd49..568354420be 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1585,8 +1585,8 @@ class LIR_Op2: public LIR_Op { : LIR_Op(code, LIR_OprFact::illegalOpr, info) , _opr1(opr1) , _opr2(opr2) - , _type(type) , _fpu_stack_size(0) + , _type(type) , _tmp1(LIR_OprFact::illegalOpr) , _tmp2(LIR_OprFact::illegalOpr) , _tmp3(LIR_OprFact::illegalOpr) @@ -1601,13 +1601,13 @@ class LIR_Op2: public LIR_Op { , _opr1(opr1) , _opr2(opr2) , _type(type) + , _condition(condition) , _fpu_stack_size(0) , _tmp1(LIR_OprFact::illegalOpr) , _tmp2(LIR_OprFact::illegalOpr) , _tmp3(LIR_OprFact::illegalOpr) , _tmp4(LIR_OprFact::illegalOpr) - , _tmp5(LIR_OprFact::illegalOpr) - , _condition(condition) { + , _tmp5(LIR_OprFact::illegalOpr) { assert(code == lir_cmove, "code check"); assert(type != T_ILLEGAL, "cmove should have type"); } @@ -1674,7 +1674,6 @@ class LIR_OpBranch: public LIR_Op2 { friend class LIR_OpVisitState; private: - BasicType _type; Label* _label; BlockBegin* _block; // if this is a branch to a block, this is the block BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block @@ -1682,8 +1681,7 @@ class LIR_OpBranch: public LIR_Op2 { public: LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl) - : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL) - , _type(type) + : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL, type) , _label(lbl) , _block(NULL) , _ublock(NULL) @@ -1703,11 +1701,10 @@ class LIR_OpBranch: public LIR_Op2 { set_condition(cond); } - BasicType type() const { return _type; } Label* label() const { return _label; } BlockBegin* block() const { return _block; } BlockBegin* ublock() const { return _ublock; } - CodeStub* stub() const { return _stub; } + CodeStub* stub() const { return _stub; } void change_block(BlockBegin* b); void change_ublock(BlockBegin* b); @@ -1805,12 +1802,12 @@ class LIR_Op4: public LIR_Op { , _opr3(opr3) , _opr4(opr4) , _type(type) - , _condition(condition) , _tmp1(LIR_OprFact::illegalOpr) , _tmp2(LIR_OprFact::illegalOpr) , _tmp3(LIR_OprFact::illegalOpr) , _tmp4(LIR_OprFact::illegalOpr) - , _tmp5(LIR_OprFact::illegalOpr) { + , _tmp5(LIR_OprFact::illegalOpr) + , _condition(condition) { assert(code == lir_cmove, "code check"); assert(type != T_ILLEGAL, "cmove should have type"); } @@ -2311,19 +2308,17 @@ class LIR_List: public CompilationResourceObj { void jump(CodeStub* stub) { append(new LIR_OpBranch(lir_cond_always, T_ILLEGAL, stub)); } - void branch(LIR_Condition cond, BasicType type, Label* lbl) { - append(new LIR_OpBranch(cond, type, lbl)); - } - // Should not be used for fp comparisons + void branch(LIR_Condition cond, BasicType type, Label* lbl) { append(new LIR_OpBranch(cond, type, lbl)); } void branch(LIR_Condition cond, BasicType type, BlockBegin* block) { + assert(type != T_FLOAT && type != T_DOUBLE, "no fp comparisons"); append(new LIR_OpBranch(cond, type, block)); } - // Should not be used for fp comparisons - void branch(LIR_Condition cond, BasicType type, CodeStub* stub) { + void branch(LIR_Condition cond, BasicType type, CodeStub* stub) { + assert(type != T_FLOAT && type != T_DOUBLE, "no fp comparisons"); append(new LIR_OpBranch(cond, type, stub)); } - // Should only be used for fp comparisons void branch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* unordered) { + assert(type == T_FLOAT || type == T_DOUBLE, "fp comparisons only"); append(new LIR_OpBranch(cond, type, block, unordered)); } diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 1043ca45f9e..33ed6ac041b 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -490,7 +490,6 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index, } else { cmp_reg_mem(lir_cond_aboveEqual, index, array, arrayOopDesc::length_offset_in_bytes(), T_INT, null_check_info); - // forward branch __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch } } @@ -499,23 +498,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index, void LIRGenerator::nio_range_check(LIR_Opr buffer, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) { CodeStub* stub = new RangeCheckStub(info, index); if (index->is_constant()) { -#ifdef RISCV64 - LIR_Opr left = new_register(T_INT); - LIR_Opr right = LIR_OprFact::intConst(index->as_jint()); - __ load(generate_address(buffer, java_nio_Buffer::limit_offset(), T_INT), left, info); -#else cmp_mem_int(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), info); -#endif __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch } else { -#ifdef RISCV64 - LIR_Opr right = new_register(T_INT); - __ load(generate_address(buffer, java_nio_Buffer::limit_offset(), T_INT), right, info); -#else cmp_reg_mem(lir_cond_aboveEqual, index, buffer, java_nio_Buffer::limit_offset(), T_INT, info); -#endif - // forward branch __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch } __ move(index, result); @@ -1355,6 +1342,7 @@ void LIRGenerator::do_isPrimitive(Intrinsic* x) { __ cmove(lir_cond_notEqual, LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN); } + // Example: Thread.currentThread() void LIRGenerator::do_currentThread(Intrinsic* x) { assert(x->number_of_arguments() == 0, "wrong type"); @@ -1362,6 +1350,7 @@ void LIRGenerator::do_currentThread(Intrinsic* x) { __ move_wide(new LIR_Address(getThreadPointer(), in_bytes(JavaThread::threadObj_offset()), T_OBJECT), reg); } + void LIRGenerator::do_RegisterFinalizer(Intrinsic* x) { assert(x->number_of_arguments() == 1, "wrong type"); LIRItem receiver(x->argument_at(0), this); @@ -1812,11 +1801,11 @@ void LIRGenerator::do_NIOCheckIndex(Intrinsic* x) { CodeStub* stub = new RangeCheckStub(info, index.result()); if (index.result()->is_constant()) { cmp_mem_int(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), info); - __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch + __ branch(lir_cond_belowEqual, T_INT, stub); } else { cmp_reg_mem(lir_cond_aboveEqual, index.result(), buf.result(), java_nio_Buffer::limit_offset(), T_INT, info); - __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch + __ branch(lir_cond_aboveEqual, T_INT, stub); } __ move(index.result(), result); } else { diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp index ef5a01003eb..c1de495260b 100644 --- a/src/hotspot/share/c1/c1_LinearScan.cpp +++ b/src/hotspot/share/c1/c1_LinearScan.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/share/c1/c1_Runtime1.cpp b/src/hotspot/share/c1/c1_Runtime1.cpp index 144cbe3381b..5e6bb08c50b 100644 --- a/src/hotspot/share/c1/c1_Runtime1.cpp +++ b/src/hotspot/share/c1/c1_Runtime1.cpp @@ -243,7 +243,7 @@ void Runtime1::generate_blob_for(BufferBlob* buffer_blob, StubID id) { case handle_exception_nofpu_id: // Unused on sparc #endif #if defined(RISCV) - // TODO: Wisp + // TODO:Wisp case monitorexit_nofpu_proxy_id: case monitorexit_proxy_id: #endif diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp index f49682741ea..6d377fa005d 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved. + * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -31,7 +31,7 @@ #include "utilities/defaultStream.hpp" void ShenandoahArguments::initialize() { -#if !(defined AARCH64 || defined RISCV64 || defined AMD64 || defined IA32) +#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined RISCV64) vm_exit_during_initialization("Shenandoah GC is not supported on this platform."); #endif diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp index c395d615c19..2bc6fecbf42 100644 --- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp +++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp index 839c1f5275c..45835830843 100644 --- a/src/hotspot/share/opto/matcher.hpp +++ b/src/hotspot/share/opto/matcher.hpp @@ -338,13 +338,13 @@ class Matcher : public PhaseTransform { // Vector ideal reg static const uint vector_ideal_reg(int len); + // Does the CPU supports vector variable shift instructions? + static bool supports_vector_variable_shifts(void); + // Vector element basic type static BasicType vector_element_basic_type(const Node* n); static BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd); - // Does the CPU supports vector variable shift instructions? - static bool supports_vector_variable_shifts(void); - // CPU supports misaligned vectors store/load. static const bool misaligned_vectors_ok(); diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp index c64d0879592..bc856d4b617 100644 --- a/src/hotspot/share/opto/regmask.hpp +++ b/src/hotspot/share/opto/regmask.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp index da738e5d55c..1e8b7319bb3 100644 --- a/src/hotspot/share/runtime/arguments.cpp +++ b/src/hotspot/share/runtime/arguments.cpp @@ -3852,7 +3852,7 @@ void Arguments::handle_extra_cms_flags(const char* msg) { void Arguments::check_arguments_for_riscv64() { UNSUPPORTED_RISCV64_OPTS(EnableCoroutine || UseWispMonitor); } -#endif // +#endif // RISCV64 // Parse entry point called from JNI_CreateJavaVM diff --git a/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp b/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp index e74160fdfe0..d19b43d19ac 100644 --- a/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp +++ b/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp @@ -361,7 +361,7 @@ JVMFlag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose) { } int minimum_alignment = 16; -#if defined(SPARC) || (defined(X86) && !defined(AMD64)) || defined(RISCV) +#if defined(SPARC) || (defined(X86) && !defined(AMD64)) minimum_alignment = 4; #elif defined(S390) minimum_alignment = 2; diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp index 93b389e6808..60ca8379e4a 100644 --- a/src/hotspot/share/runtime/os.cpp +++ b/src/hotspot/share/runtime/os.cpp @@ -1229,11 +1229,6 @@ bool is_pointer_bad(intptr_t* ptr) { return !is_aligned(ptr, sizeof(uintptr_t)) || !os::is_readable_pointer(ptr); } -// Native stack isn't walkable for RISCV this way. -// Native C frame and Java frame have different structure on RISCV. -// A seperate implementation is provided under linux_riscv for RISCV. - -#if !defined(RISCV) || defined(ZERO) // Looks like all platforms can use the same function to check if C // stack is walkable beyond current frame. The check for fp() is not // necessary on Sparc, but it's harmless. @@ -1265,7 +1260,7 @@ bool os::is_first_C_frame(frame* fr) { return false; } -#endif + // Set up the boot classpath. diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp index 3ec62dd3953..46eefe3583b 100644 --- a/src/hotspot/share/runtime/synchronizer.cpp +++ b/src/hotspot/share/runtime/synchronizer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp index 9fed46f42f4..b91f31fd26e 100644 --- a/src/hotspot/share/runtime/thread.inline.hpp +++ b/src/hotspot/share/runtime/thread.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2021, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -142,7 +142,7 @@ inline void JavaThread::set_pending_async_exception(oop e) { set_has_async_exception(); } -#if defined(PPC64) || defined(AARCH64) || defined(RISCV64) +#if defined(PPC64) || defined (AARCH64) || defined(RISCV64) inline JavaThreadState JavaThread::thread_state() const { return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state); } diff --git a/src/hotspot/share/runtime/tieredThresholdPolicy.cpp b/src/hotspot/share/runtime/tieredThresholdPolicy.cpp index c5fe1cbc19d..0e9e90a64b4 100644 --- a/src/hotspot/share/runtime/tieredThresholdPolicy.cpp +++ b/src/hotspot/share/runtime/tieredThresholdPolicy.cpp @@ -254,7 +254,7 @@ void TieredThresholdPolicy::initialize() { } #endif -#if defined SPARC || defined AARCH64 || defined RISCV64 +#if defined SPARC || defined AARCH64 if (FLAG_IS_DEFAULT(InlineSmallCode)) { FLAG_SET_DEFAULT(InlineSmallCode, 2500); } diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c index 0d834302c57..45a927fb5ee 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -58,6 +58,10 @@ #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h" #endif +#ifdef riscv64 +#include "sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext.h" +#endif + static jfieldID p_ps_prochandle_ID = 0; static jfieldID threadList_ID = 0; static jfieldID loadObjectList_ID = 0; @@ -397,7 +401,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo return (err == PS_OK)? array : 0; } -#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) +#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64) JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0 (JNIEnv *env, jobject this_obj, jint lwp_id) { @@ -425,6 +429,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo #if defined(sparc) || defined(sparcv9) #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG #endif +#ifdef riscv64 +#define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG +#endif #if defined(ppc64) || defined(ppc64le) #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG #endif @@ -534,6 +541,44 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo } #endif /* aarch64 */ +#if defined(riscv64) +#define REG_INDEX(reg) sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg + + regs[REG_INDEX(PC)] = gregs.pc; + regs[REG_INDEX(LR)] = gregs.ra; + regs[REG_INDEX(SP)] = gregs.sp; + regs[REG_INDEX(R3)] = gregs.gp; + regs[REG_INDEX(R4)] = gregs.tp; + regs[REG_INDEX(R5)] = gregs.t0; + regs[REG_INDEX(R6)] = gregs.t1; + regs[REG_INDEX(R7)] = gregs.t2; + regs[REG_INDEX(R8)] = gregs.s0; + regs[REG_INDEX(R9)] = gregs.s1; + regs[REG_INDEX(R10)] = gregs.a0; + regs[REG_INDEX(R11)] = gregs.a1; + regs[REG_INDEX(R12)] = gregs.a2; + regs[REG_INDEX(R13)] = gregs.a3; + regs[REG_INDEX(R14)] = gregs.a4; + regs[REG_INDEX(R15)] = gregs.a5; + regs[REG_INDEX(R16)] = gregs.a6; + regs[REG_INDEX(R17)] = gregs.a7; + regs[REG_INDEX(R18)] = gregs.s2; + regs[REG_INDEX(R19)] = gregs.s3; + regs[REG_INDEX(R20)] = gregs.s4; + regs[REG_INDEX(R21)] = gregs.s5; + regs[REG_INDEX(R22)] = gregs.s6; + regs[REG_INDEX(R23)] = gregs.s7; + regs[REG_INDEX(R24)] = gregs.s8; + regs[REG_INDEX(R25)] = gregs.s9; + regs[REG_INDEX(R26)] = gregs.s10; + regs[REG_INDEX(R27)] = gregs.s11; + regs[REG_INDEX(R28)] = gregs.t3; + regs[REG_INDEX(R29)] = gregs.t4; + regs[REG_INDEX(R30)] = gregs.t5; + regs[REG_INDEX(R31)] = gregs.t6; + +#endif /* riscv64 */ + #if defined(ppc64) || defined(ppc64le) #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h index 9d7fda8a66b..ab092d4ee33 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c index d959c4f32a1..de5254d859e 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java index 38e4c05bc33..9bff9ee9b15 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java @@ -36,7 +36,7 @@ import sun.jvm.hotspot.debugger.MachineDescriptionAMD64; import sun.jvm.hotspot.debugger.MachineDescriptionPPC64; import sun.jvm.hotspot.debugger.MachineDescriptionAArch64; -import sun.jvm.hotspot.debugger.MachineDescriptionRiscv64; +import sun.jvm.hotspot.debugger.MachineDescriptionRISCV64; import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit; import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit; @@ -593,14 +593,14 @@ private void setupDebuggerLinux() { machDesc = new MachineDescriptionPPC64(); } else if (cpu.equals("aarch64")) { machDesc = new MachineDescriptionAArch64(); - } else if (cpu.equals("riscv64")) { - machDesc = new MachineDescriptionRiscv64(); } else if (cpu.equals("sparc")) { if (LinuxDebuggerLocal.getAddressSize()==8) { machDesc = new MachineDescriptionSPARC64Bit(); } else { machDesc = new MachineDescriptionSPARC32Bit(); } + } else if (cpu.equals("riscv64")) { + machDesc = new MachineDescriptionRISCV64(); } else { try { machDesc = (MachineDescription) diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRiscv64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java similarity index 90% rename from src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRiscv64.java rename to src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java index 0d10370a20d..a972516dee3 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRiscv64.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java @@ -1,5 +1,6 @@ /* * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,7 +25,7 @@ package sun.jvm.hotspot.debugger; -public class MachineDescriptionRiscv64 extends MachineDescriptionTwosComplement implements MachineDescription { +public class MachineDescriptionRISCV64 extends MachineDescriptionTwosComplement implements MachineDescription { public long getAddressSize() { return 8; } diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java index 5e5a6bb7141..dc0bcb3da94 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015, Red Hat Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -34,12 +34,14 @@ import sun.jvm.hotspot.debugger.amd64.*; import sun.jvm.hotspot.debugger.aarch64.*; import sun.jvm.hotspot.debugger.sparc.*; +import sun.jvm.hotspot.debugger.riscv64.*; import sun.jvm.hotspot.debugger.ppc64.*; import sun.jvm.hotspot.debugger.linux.x86.*; import sun.jvm.hotspot.debugger.linux.amd64.*; import sun.jvm.hotspot.debugger.linux.sparc.*; import sun.jvm.hotspot.debugger.linux.ppc64.*; import sun.jvm.hotspot.debugger.linux.aarch64.*; +import sun.jvm.hotspot.debugger.linux.riscv64.*; import sun.jvm.hotspot.utilities.*; class LinuxCDebugger implements CDebugger { @@ -116,7 +118,14 @@ public CFrame topFrameForThread(ThreadProxy thread) throws DebuggerException { Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC); if (pc == null) return null; return new LinuxAARCH64CFrame(dbg, fp, pc); - } else { + } else if (cpu.equals("riscv64")) { + RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext(); + Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP); + if (fp == null) return null; + Address pc = context.getRegisterAsAddress(RISCV64ThreadContext.PC); + if (pc == null) return null; + return new LinuxRISCV64CFrame(dbg, fp, pc); + } else { // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu ThreadContext context = (ThreadContext) thread.getContext(); return context.getTopFrame(dbg); diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java new file mode 100644 index 00000000000..f06da24bd0e --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.debugger.linux.*; +import sun.jvm.hotspot.debugger.cdbg.*; +import sun.jvm.hotspot.debugger.cdbg.basic.*; + +public final class LinuxRISCV64CFrame extends BasicCFrame { + private static final int C_FRAME_LINK_OFFSET = -2; + private static final int C_FRAME_RETURN_ADDR_OFFSET = -1; + + public LinuxRISCV64CFrame(LinuxDebugger dbg, Address fp, Address pc) { + super(dbg.getCDebugger()); + this.fp = fp; + this.pc = pc; + this.dbg = dbg; + } + + // override base class impl to avoid ELF parsing + public ClosestSymbol closestSymbolToPC() { + // try native lookup in debugger. + return dbg.lookup(dbg.getAddressValue(pc())); + } + + public Address pc() { + return pc; + } + + public Address localVariableBase() { + return fp; + } + + public CFrame sender(ThreadProxy thread) { + RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext(); + Address rsp = context.getRegisterAsAddress(RISCV64ThreadContext.SP); + + if ((fp == null) || fp.lessThan(rsp)) { + return null; + } + + // Check alignment of fp + if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) { + return null; + } + + Address nextFP = fp.getAddressAt(C_FRAME_LINK_OFFSET * ADDRESS_SIZE); + if (nextFP == null || nextFP.lessThanOrEqual(fp)) { + return null; + } + Address nextPC = fp.getAddressAt(C_FRAME_RETURN_ADDR_OFFSET * ADDRESS_SIZE); + if (nextPC == null) { + return null; + } + return new LinuxRISCV64CFrame(dbg, nextFP, nextPC); + } + + // package/class internals only + private static final int ADDRESS_SIZE = 8; + private Address pc; + private Address sp; + private Address fp; + private LinuxDebugger dbg; +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java new file mode 100644 index 00000000000..fdb841ccf3d --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.debugger.linux.*; + +public class LinuxRISCV64ThreadContext extends RISCV64ThreadContext { + private LinuxDebugger debugger; + + public LinuxRISCV64ThreadContext(LinuxDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java new file mode 100644 index 00000000000..96d5dee47ce --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.debugger.proc.*; +import sun.jvm.hotspot.utilities.*; + +public class ProcRISCV64Thread implements ThreadProxy { + private ProcDebugger debugger; + private int id; + + public ProcRISCV64Thread(ProcDebugger debugger, Address addr) { + this.debugger = debugger; + + // FIXME: the size here should be configurable. However, making it + // so would produce a dependency on the "types" package from the + // debugger package, which is not desired. + this.id = (int) addr.getCIntegerAt(0, 4, true); + } + + public ProcRISCV64Thread(ProcDebugger debugger, long id) { + this.debugger = debugger; + this.id = (int) id; + } + + public ThreadContext getContext() throws IllegalThreadStateException { + ProcRISCV64ThreadContext context = new ProcRISCV64ThreadContext(debugger); + long[] regs = debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size mismatch"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } + + public boolean canSetContext() throws DebuggerException { + return false; + } + + public void setContext(ThreadContext context) + throws IllegalThreadStateException, DebuggerException { + throw new DebuggerException("Unimplemented"); + } + + public String toString() { + return "t@" + id; + } + + public boolean equals(Object obj) { + if ((obj == null) || !(obj instanceof ProcRISCV64Thread)) { + return false; + } + + return (((ProcRISCV64Thread) obj).id == id); + } + + public int hashCode() { + return id; + } +} diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.cpp b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java similarity index 56% rename from src/hotspot/cpu/riscv/registerMap_riscv.cpp rename to src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java index ec644218fea..f2aa845e665 100644 --- a/src/hotspot/cpu/riscv/registerMap_riscv.cpp +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java @@ -1,6 +1,6 @@ /* - * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2021, Arm Limited. All rights reserved. + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -21,25 +21,28 @@ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. + * */ -#include "precompiled.hpp" -#include "runtime/registerMap.hpp" -#include "vmreg_riscv.inline.hpp" +package sun.jvm.hotspot.debugger.proc.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcRISCV64ThreadContext extends RISCV64ThreadContext { + private ProcDebugger debugger; + + public ProcRISCV64ThreadContext(ProcDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } -address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const { - if (base_reg->is_VectorRegister()) { - assert(base_reg->is_concrete(), "must pass base reg"); - int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) / - VectorRegisterImpl::max_slots_per_register; - intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size; - address base_location = location(base_reg); - if (base_location != NULL) { - return base_location + offset_in_bytes; - } else { - return NULL; + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); } - } else { - return location(base_reg->next(slot_idx)); - } } diff --git a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java similarity index 56% rename from src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp rename to src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java index f3077e0cff9..19f64b8ce2d 100644 --- a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java @@ -1,6 +1,7 @@ /* - * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -20,22 +21,26 @@ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. + * */ -#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP -#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP +package sun.jvm.hotspot.debugger.proc.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.proc.*; -#include +public class ProcRISCV64ThreadFactory implements ProcThreadFactory { + private ProcDebugger debugger; -// -// Support for building on older Linux systems -// + public ProcRISCV64ThreadFactory(ProcDebugger debugger) { + this.debugger = debugger; + } -#ifndef SYS_memfd_create -#define SYS_memfd_create 279 -#endif -#ifndef SYS_fallocate -#define SYS_fallocate 47 -#endif + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new ProcRISCV64Thread(debugger, threadIdentifierAddr); + } -#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP + public ThreadProxy createThreadWrapper(long id) { + return new ProcRISCV64Thread(debugger, id); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java new file mode 100644 index 00000000000..aecbda59023 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.debugger.remote.*; +import sun.jvm.hotspot.utilities.*; + +public class RemoteRISCV64Thread extends RemoteThread { + public RemoteRISCV64Thread(RemoteDebuggerClient debugger, Address addr) { + super(debugger, addr); + } + + public RemoteRISCV64Thread(RemoteDebuggerClient debugger, long id) { + super(debugger, id); + } + + public ThreadContext getContext() throws IllegalThreadStateException { + RemoteRISCV64ThreadContext context = new RemoteRISCV64ThreadContext(debugger); + long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : + debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size of register set must match"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java new file mode 100644 index 00000000000..1d3da6be5af --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteRISCV64ThreadContext extends RISCV64ThreadContext { + private RemoteDebuggerClient debugger; + + public RemoteRISCV64ThreadContext(RemoteDebuggerClient debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java similarity index 55% rename from src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp rename to src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java index 8ecc2f06357..725b94e25a3 100644 --- a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java @@ -1,6 +1,7 @@ /* - * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,21 +24,23 @@ * */ -#include "precompiled.hpp" -#if INCLUDE_ZGC -#include "gc/shared/barrierSetNMethod.hpp" -#include "utilities/debug.hpp" +package sun.jvm.hotspot.debugger.remote.riscv64; -void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { - ShouldNotReachHere(); -} +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.remote.*; -void BarrierSetNMethod::disarm(nmethod* nm) { - ShouldNotReachHere(); -} +public class RemoteRISCV64ThreadFactory implements RemoteThreadFactory { + private RemoteDebuggerClient debugger; + + public RemoteRISCV64ThreadFactory(RemoteDebuggerClient debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new RemoteRISCV64Thread(debugger, threadIdentifierAddr); + } -bool BarrierSetNMethod::is_armed(nmethod* nm) { - ShouldNotReachHere(); - return false; + public ThreadProxy createThreadWrapper(long id) { + return new RemoteRISCV64Thread(debugger, id); + } } -#endif \ No newline at end of file diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java new file mode 100644 index 00000000000..fb60a70427a --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.riscv64; + +import java.lang.annotation.Native; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.cdbg.*; + +/** Specifies the thread context on riscv64 platforms; only a sub-portion + * of the context is guaranteed to be present on all operating + * systems. */ + +public abstract class RISCV64ThreadContext implements ThreadContext { + // Taken from /usr/include/asm/sigcontext.h on Linux/RISCV64. + + // /* + // * Signal context structure - contains all info to do with the state + // * before the signal handler was invoked. + // */ + // struct sigcontext { + // struct user_regs_struct sc_regs; + // union __riscv_fp_state sc_fpregs; + // }; + // + // struct user_regs_struct { + // unsigned long pc; + // unsigned long ra; + // unsigned long sp; + // unsigned long gp; + // unsigned long tp; + // unsigned long t0; + // unsigned long t1; + // unsigned long t2; + // unsigned long s0; + // unsigned long s1; + // unsigned long a0; + // unsigned long a1; + // unsigned long a2; + // unsigned long a3; + // unsigned long a4; + // unsigned long a5; + // unsigned long a6; + // unsigned long a7; + // unsigned long s2; + // unsigned long s3; + // unsigned long s4; + // unsigned long s5; + // unsigned long s6; + // unsigned long s7; + // unsigned long s8; + // unsigned long s9; + // unsigned long s10; + // unsigned long s11; + // unsigned long t3; + // unsigned long t4; + // unsigned long t5; + // unsigned long t6; + // }; + + // NOTE: the indices for the various registers must be maintained as + // listed across various operating systems. However, only a small + // subset of the registers' values are guaranteed to be present (and + // must be present for the SA's stack walking to work) + + // One instance of the Native annotation is enough to trigger header generation + // for this file. + @Native + public static final int R0 = 0; + public static final int R1 = 1; + public static final int R2 = 2; + public static final int R3 = 3; + public static final int R4 = 4; + public static final int R5 = 5; + public static final int R6 = 6; + public static final int R7 = 7; + public static final int R8 = 8; + public static final int R9 = 9; + public static final int R10 = 10; + public static final int R11 = 11; + public static final int R12 = 12; + public static final int R13 = 13; + public static final int R14 = 14; + public static final int R15 = 15; + public static final int R16 = 16; + public static final int R17 = 17; + public static final int R18 = 18; + public static final int R19 = 19; + public static final int R20 = 20; + public static final int R21 = 21; + public static final int R22 = 22; + public static final int R23 = 23; + public static final int R24 = 24; + public static final int R25 = 25; + public static final int R26 = 26; + public static final int R27 = 27; + public static final int R28 = 28; + public static final int R29 = 29; + public static final int R30 = 30; + public static final int R31 = 31; + + public static final int NPRGREG = 32; + + public static final int PC = R0; + public static final int LR = R1; + public static final int SP = R2; + public static final int FP = R8; + + private long[] data; + + public RISCV64ThreadContext() { + data = new long[NPRGREG]; + } + + public int getNumRegisters() { + return NPRGREG; + } + + public String getRegisterName(int index) { + switch (index) { + case LR: return "lr"; + case SP: return "sp"; + case PC: return "pc"; + default: + return "r" + index; + } + } + + public void setRegister(int index, long value) { + data[index] = value; + } + + public long getRegister(int index) { + return data[index]; + } + + public CFrame getTopFrame(Debugger dbg) { + return null; + } + + /** This can't be implemented in this class since we would have to + * tie the implementation to, for example, the debugging system */ + public abstract void setRegisterAsAddress(int index, Address value); + + /** This can't be implemented in this class since we would have to + * tie the implementation to, for example, the debugging system */ + public abstract Address getRegisterAsAddress(int index); +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java index af711671f85..954c2e82605 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,6 +38,7 @@ import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; +import sun.jvm.hotspot.runtime.linux_riscv64.LinuxRISCV64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess; import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; @@ -99,6 +100,8 @@ private static synchronized void initialize(TypeDataBase db) { access = new LinuxPPC64JavaThreadPDAccess(); } else if (cpu.equals("aarch64")) { access = new LinuxAARCH64JavaThreadPDAccess(); + } else if (cpu.equals("riscv64")) { + access = new LinuxRISCV64JavaThreadPDAccess(); } else { try { access = (JavaThreadPDAccess) diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java new file mode 100644 index 00000000000..5c2b6e0e3ea --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.linux_riscv64; + +import java.io.*; +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.riscv64.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess { + private static AddressField lastJavaFPField; + private static AddressField osThreadField; + + // Field from OSThread + private static CIntegerField osThreadThreadIDField; + + // This is currently unneeded but is being kept in case we change + // the currentFrameGuess algorithm + private static final long GUESS_SCAN_RANGE = 128 * 1024; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaThread"); + osThreadField = type.getAddressField("_osthread"); + + Type anchorType = db.lookupType("JavaFrameAnchor"); + lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); + + Type osThreadType = db.lookupType("OSThread"); + osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); + } + + public Address getLastJavaFP(Address addr) { + return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); + } + + public Address getLastJavaPC(Address addr) { + return null; + } + + public Address getBaseOfStackPointer(Address addr) { + return null; + } + + public Frame getLastFramePD(JavaThread thread, Address addr) { + Address fp = thread.getLastJavaFP(); + if (fp == null) { + return null; // no information + } + return new RISCV64Frame(thread.getLastJavaSP(), fp); + } + + public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { + return new RISCV64RegisterMap(thread, updateMap); + } + + public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { + ThreadProxy t = getThreadProxy(addr); + RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext(); + RISCV64CurrentFrameGuess guesser = new RISCV64CurrentFrameGuess(context, thread); + if (!guesser.run(GUESS_SCAN_RANGE)) { + return null; + } + if (guesser.getPC() == null) { + return new RISCV64Frame(guesser.getSP(), guesser.getFP()); + } else { + return new RISCV64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); + } + } + + public void printThreadIDOn(Address addr, PrintStream tty) { + tty.print(getThreadProxy(addr)); + } + + public void printInfoOn(Address threadAddr, PrintStream tty) { + tty.print("Thread id: "); + printThreadIDOn(threadAddr, tty); + } + + public Address getLastSP(Address addr) { + ThreadProxy t = getThreadProxy(addr); + RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext(); + return context.getRegisterAsAddress(RISCV64ThreadContext.SP); + } + + public ThreadProxy getThreadProxy(Address addr) { + // Addr is the address of the JavaThread. + // Fetch the OSThread (for now and for simplicity, not making a + // separate "OSThread" class in this package) + Address osThreadAddr = osThreadField.getValue(addr); + // Get the address of the _thread_id from the OSThread + Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); + + JVMDebugger debugger = VM.getVM().getDebugger(); + return debugger.getThreadForIdentifierAddress(threadIdAddr); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java new file mode 100644 index 00000000000..34701c6922f --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.interpreter.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.riscv64.*; + +/**

Should be able to be used on all riscv64 platforms we support + (Linux/riscv64) to implement JavaThread's "currentFrameGuess()" + functionality. Input is an RISCV64ThreadContext; output is SP, FP, + and PC for an RISCV64Frame. Instantiation of the RISCV64Frame is + left to the caller, since we may need to subclass RISCV64Frame to + support signal handler frames on Unix platforms.

+ +

Algorithm is to walk up the stack within a given range (say, + 512K at most) looking for a plausible PC and SP for a Java frame, + also considering those coming in from the context. If we find a PC + that belongs to the VM (i.e., in generated code like the + interpreter or CodeCache) then we try to find an associated FP. + We repeat this until we either find a complete frame or run out of + stack to look at.

*/ + +public class RISCV64CurrentFrameGuess { + private RISCV64ThreadContext context; + private JavaThread thread; + private Address spFound; + private Address fpFound; + private Address pcFound; + + private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.riscv64.RISCV64Frame.DEBUG") + != null; + + public RISCV64CurrentFrameGuess(RISCV64ThreadContext context, + JavaThread thread) { + this.context = context; + this.thread = thread; + } + + /** Returns false if not able to find a frame within a reasonable range. */ + public boolean run(long regionInBytesToSearch) { + Address sp = context.getRegisterAsAddress(RISCV64ThreadContext.SP); + Address pc = context.getRegisterAsAddress(RISCV64ThreadContext.PC); + Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP); + if (sp == null) { + // Bail out if no last java frame either + if (thread.getLastJavaSP() != null) { + setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); + return true; + } + return false; + } + Address end = sp.addOffsetTo(regionInBytesToSearch); + VM vm = VM.getVM(); + + setValues(null, null, null); // Assume we're not going to find anything + + if (vm.isJavaPCDbg(pc)) { + if (vm.isClientCompiler()) { + // If the topmost frame is a Java frame, we are (pretty much) + // guaranteed to have a viable FP. We should be more robust + // than this (we have the potential for losing entire threads' + // stack traces) but need to see how much work we really have + // to do here. Searching the stack for an (SP, FP) pair is + // hard since it's easy to misinterpret inter-frame stack + // pointers as base-of-frame pointers; we also don't know the + // sizes of C1 frames (not registered in the nmethod) so can't + // derive them from SP. + + setValues(sp, fp, pc); + return true; + } else { + if (vm.getInterpreter().contains(pc)) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + + sp + ", fp = " + fp + ", pc = " + pc); + } + setValues(sp, fp, pc); + return true; + } + + // For the server compiler, FP is not guaranteed to be valid + // for compiled code. In addition, an earlier attempt at a + // non-searching algorithm (see below) failed because the + // stack pointer from the thread context was pointing + // (considerably) beyond the ostensible end of the stack, into + // garbage; walking from the topmost frame back caused a crash. + // + // This algorithm takes the current PC as a given and tries to + // find the correct corresponding SP by walking up the stack + // and repeatedly performing stackwalks (very inefficient). + // + // FIXME: there is something wrong with stackwalking across + // adapter frames...this is likely to be the root cause of the + // failure with the simpler algorithm below. + + for (long offset = 0; + offset < regionInBytesToSearch; + offset += vm.getAddressSize()) { + try { + Address curSP = sp.addOffsetTo(offset); + Frame frame = new RISCV64Frame(curSP, null, pc); + RegisterMap map = thread.newRegisterMap(false); + while (frame != null) { + if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { + // We were able to traverse all the way to the + // bottommost Java frame. + // This sp looks good. Keep it. + if (DEBUG) { + System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); + } + setValues(curSP, null, pc); + return true; + } + frame = frame.sender(map); + } + } catch (Exception e) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); + } + // Bad SP. Try another. + } + } + + // Were not able to find a plausible SP to go with this PC. + // Bail out. + return false; + } + } else { + // If the current program counter was not known to us as a Java + // PC, we currently assume that we are in the run-time system + // and attempt to look to thread-local storage for saved SP and + // FP. Note that if these are null (because we were, in fact, + // in Java code, i.e., vtable stubs or similar, and the SA + // didn't have enough insight into the target VM to understand + // that) then we are going to lose the entire stack trace for + // the thread, which is sub-optimal. FIXME. + + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + + thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); + } + if (thread.getLastJavaSP() == null) { + return false; // No known Java frames on stack + } + + // The runtime has a nasty habit of not saving fp in the frame + // anchor, leaving us to grovel about in the stack to find a + // plausible address. Fortunately, this only happens in + // compiled code; there we always have a valid PC, and we always + // push LR and FP onto the stack as a pair, with FP at the lower + // address. + pc = thread.getLastJavaPC(); + fp = thread.getLastJavaFP(); + sp = thread.getLastJavaSP(); + + if (fp == null) { + CodeCache cc = vm.getCodeCache(); + if (cc.contains(pc)) { + CodeBlob cb = cc.findBlob(pc); + if (DEBUG) { + System.out.println("FP is null. Found blob frame size " + cb.getFrameSize()); + } + // See if we can derive a frame pointer from SP and PC + long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize(); + if (link_offset >= 0) { + fp = sp.addOffsetTo(link_offset); + } + } + } + + // We found a PC in the frame anchor. Check that it's plausible, and + // if it is, use it. + if (vm.isJavaPCDbg(pc)) { + setValues(sp, fp, pc); + } else { + setValues(sp, fp, null); + } + + return true; + } + } + + public Address getSP() { return spFound; } + public Address getFP() { return fpFound; } + /** May be null if getting values from thread-local storage; take + care to call the correct RISCV64Frame constructor to recover this if + necessary */ + public Address getPC() { return pcFound; } + + private void setValues(Address sp, Address fp, Address pc) { + spFound = sp; + fpFound = fp; + pcFound = pc; + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java new file mode 100644 index 00000000000..e372bc5f7be --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java @@ -0,0 +1,554 @@ +/* + * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Red Hat Inc. + * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.riscv64; + +import java.util.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.compiler.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.oops.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +/** Specialization of and implementation of abstract methods of the + Frame class for the riscv64 family of CPUs. */ + +public class RISCV64Frame extends Frame { + private static final boolean DEBUG; + static { + DEBUG = System.getProperty("sun.jvm.hotspot.runtime.RISCV64.RISCV64Frame.DEBUG") != null; + } + + // Java frames + private static final int LINK_OFFSET = -2; + private static final int RETURN_ADDR_OFFSET = -1; + private static final int SENDER_SP_OFFSET = 0; + + // Interpreter frames + private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -3; + private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; + private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; + private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only + private static int INTERPRETER_FRAME_PADDING_OFFSET; + private static int INTERPRETER_FRAME_MIRROR_OFFSET; + private static int INTERPRETER_FRAME_CACHE_OFFSET; + private static int INTERPRETER_FRAME_LOCALS_OFFSET; + private static int INTERPRETER_FRAME_BCX_OFFSET; + private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET; + + // Entry frames + private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -10; + + // Native frames + private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET = 2; + + private static VMReg fp = new VMReg(8); + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; + INTERPRETER_FRAME_PADDING_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; + INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_PADDING_OFFSET - 1; + INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; + INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; + INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; + INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; + INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + } + + + // an additional field beyond sp and pc: + Address raw_fp; // frame pointer + private Address raw_unextendedSP; + + private RISCV64Frame() { + } + + private void adjustForDeopt() { + if ( pc != null) { + // Look for a deopt pc and if it is deopted convert to original pc + CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); + if (cb != null && cb.isJavaMethod()) { + NMethod nm = (NMethod) cb; + if (pc.equals(nm.deoptHandlerBegin())) { + if (Assert.ASSERTS_ENABLED) { + Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); + } + // adjust pc if frame is deoptimized. + pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); + deoptimized = true; + } + } + } + } + + public RISCV64Frame(Address raw_sp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("RISCV64Frame(sp, fp, pc): " + this); + dumpStack(); + } + } + + public RISCV64Frame(Address raw_sp, Address raw_fp) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + + // We cannot assume SP[-1] always contains a valid return PC (e.g. if + // the callee is a C/C++ compiled frame). If the PC is not known to + // Java then this.pc is null. + Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); + if (VM.getVM().isJavaPCDbg(savedPC)) { + this.pc = savedPC; + } + + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("RISCV64Frame(sp, fp): " + this); + dumpStack(); + } + } + + public RISCV64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_unextendedSp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("RISCV64Frame(sp, unextendedSP, fp, pc): " + this); + dumpStack(); + } + + } + + public Object clone() { + RISCV64Frame frame = new RISCV64Frame(); + frame.raw_sp = raw_sp; + frame.raw_unextendedSP = raw_unextendedSP; + frame.raw_fp = raw_fp; + frame.pc = pc; + frame.deoptimized = deoptimized; + return frame; + } + + public boolean equals(Object arg) { + if (arg == null) { + return false; + } + + if (!(arg instanceof RISCV64Frame)) { + return false; + } + + RISCV64Frame other = (RISCV64Frame) arg; + + return (AddressOps.equal(getSP(), other.getSP()) && + AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && + AddressOps.equal(getFP(), other.getFP()) && + AddressOps.equal(getPC(), other.getPC())); + } + + public int hashCode() { + if (raw_sp == null) { + return 0; + } + + return raw_sp.hashCode(); + } + + public String toString() { + return "sp: " + (getSP() == null? "null" : getSP().toString()) + + ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + + ", fp: " + (getFP() == null? "null" : getFP().toString()) + + ", pc: " + (pc == null? "null" : pc.toString()); + } + + // accessors for the instance variables + public Address getFP() { return raw_fp; } + public Address getSP() { return raw_sp; } + public Address getID() { return raw_sp; } + + // FIXME: not implemented yet + public boolean isSignalHandlerFrameDbg() { return false; } + public int getSignalNumberDbg() { return 0; } + public String getSignalNameDbg() { return null; } + + public boolean isInterpretedFrameValid() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "Not an interpreted frame"); + } + + // These are reasonable sanity checks + if (getFP() == null || getFP().andWithMask(0x3) != null) { + return false; + } + + if (getSP() == null || getSP().andWithMask(0x3) != null) { + return false; + } + + if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { + return false; + } + + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (getFP().lessThanOrEqual(getSP())) { + // this attempts to deal with unsigned comparison above + return false; + } + + if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { + // stack frames shouldn't be large. + return false; + } + + return true; + } + + public Frame sender(RegisterMap regMap, CodeBlob cb) { + RISCV64RegisterMap map = (RISCV64RegisterMap) regMap; + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map.setIncludeArgumentOops(false); + + if (isEntryFrame()) return senderForEntryFrame(map); + if (isInterpretedFrame()) return senderForInterpreterFrame(map); + + if(cb == null) { + cb = VM.getVM().getCodeCache().findBlob(getPC()); + } else { + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); + } + } + + if (cb != null) { + return senderForCompiledFrame(map, cb); + } + + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return new RISCV64Frame(getSenderSP(), getLink(), getSenderPC()); + } + + private Frame senderForEntryFrame(RISCV64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForEntryFrame"); + } + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + RISCV64JavaCallWrapper jcw = (RISCV64JavaCallWrapper) getEntryFrameCallWrapper(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); + Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); + } + RISCV64Frame fr; + if (jcw.getLastJavaPC() != null) { + fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); + } else { + fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); + } + map.clear(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); + } + return fr; + } + + //------------------------------------------------------------------------------ + // frame::adjust_unextended_sp + private void adjustUnextendedSP() { + // If we are returning to a compiled MethodHandle call site, the + // saved_fp will in fact be a saved value of the unextended SP. The + // simplest way to tell whether we are returning to such a call site + // is as follows: + + CodeBlob cb = cb(); + NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); + if (senderNm != null) { + // If the sender PC is a deoptimization point, get the original + // PC. For MethodHandle call site the unextended_sp is stored in + // saved_fp. + if (senderNm.isDeoptMhEntry(getPC())) { + raw_unextendedSP = getFP(); + } + else if (senderNm.isDeoptEntry(getPC())) { + } + else if (senderNm.isMethodHandleReturn(getPC())) { + raw_unextendedSP = getFP(); + } + } + } + + private Frame senderForInterpreterFrame(RISCV64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForInterpreterFrame"); + } + Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + Address sp = addressOfStackSlot(SENDER_SP_OFFSET); + // We do not need to update the callee-save register mapping because above + // us is either another interpreter frame or a converter-frame, but never + // directly a compiled frame. + // 11/24/04 SFG. With the removal of adapter frames this is no longer true. + // However c2 no longer uses callee save register for java calls so there + // are no callee register to find. + + if (map.getUpdateMap()) + updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); + + return new RISCV64Frame(sp, unextendedSP, getLink(), getSenderPC()); + } + + private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { + map.setLocation(fp, savedFPAddr); + } + + private Frame senderForCompiledFrame(RISCV64RegisterMap map, CodeBlob cb) { + if (DEBUG) { + System.out.println("senderForCompiledFrame"); + } + + // + // NOTE: some of this code is (unfortunately) duplicated RISCV64CurrentFrameGuess + // + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // frame owned by optimizing compiler + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); + } + Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); + + // The return_address is always the word on the stack + Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); + + // This is the saved value of FP which may or may not really be an FP. + // It is only an FP if the sender is an interpreter frame. + Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize()); + + if (map.getUpdateMap()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map.setIncludeArgumentOops(cb.callerMustGCArguments()); + + if (cb.getOopMaps() != null) { + ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); + } + + // Since the prolog does the save and restore of FP there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + updateMapWithSavedLink(map, savedFPAddr); + } + + return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); + } + + protected boolean hasSenderPD() { + return true; + } + + public long frameSize() { + return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); + } + + public Address getLink() { + try { + if (DEBUG) { + System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET) + + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0)); + } + return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); + } catch (Exception e) { + if (DEBUG) + System.out.println("Returning null"); + return null; + } + } + + public Address getUnextendedSP() { return raw_unextendedSP; } + + // Return address: + public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } + public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } + + // return address of param, zero origin index. + public Address getNativeParamAddr(int idx) { + return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx); + } + + public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } + + public Address addressOfInterpreterFrameLocals() { + return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); + } + + private Address addressOfInterpreterFrameBCX() { + return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); + } + + public int getInterpreterFrameBCI() { + // FIXME: this is not atomic with respect to GC and is unsuitable + // for use in a non-debugging, or reflective, system. Need to + // figure out how to express this. + Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); + Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); + Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); + return bcpToBci(bcp, method); + } + + public Address addressOfInterpreterFrameMDX() { + return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); + } + + // expression stack + // (the max_stack arguments are used by the GC; see class FrameClosure) + + public Address addressOfInterpreterFrameExpressionStack() { + Address monitorEnd = interpreterFrameMonitorEnd().address(); + return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); + } + + public int getInterpreterFrameExpressionStackDirection() { return -1; } + + // top of expression stack + public Address addressOfInterpreterFrameTOS() { + return getSP(); + } + + /** Expression stack from top down */ + public Address addressOfInterpreterFrameTOSAt(int slot) { + return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); + } + + public Address getInterpreterFrameSenderSP() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "interpreted frame expected"); + } + return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + } + + // Monitors + public BasicObjectLock interpreterFrameMonitorBegin() { + return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); + } + + public BasicObjectLock interpreterFrameMonitorEnd() { + Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); + if (Assert.ASSERTS_ENABLED) { + // make sure the pointer points inside the frame + Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); + Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); + } + return new BasicObjectLock(result); + } + + public int interpreterFrameMonitorSize() { + return BasicObjectLock.size(); + } + + // Method + public Address addressOfInterpreterFrameMethod() { + return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); + } + + // Constant pool cache + public Address addressOfInterpreterFrameCPCache() { + return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); + } + + // Entry frames + public JavaCallWrapper getEntryFrameCallWrapper() { + return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); + } + + protected Address addressOfSavedOopResult() { + // offset is 2 for compiler2 and 3 for compiler1 + return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * + VM.getVM().getAddressSize()); + } + + protected Address addressOfSavedReceiver() { + return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + } + + private void dumpStack() { + for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + AddressOps.lt(addr, getSP()); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + System.out.println("-----------------------"); + for (Address addr = getSP(); + AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java new file mode 100644 index 00000000000..850758a7ed4 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.riscv64; + +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.utilities.*; + +public class RISCV64JavaCallWrapper extends JavaCallWrapper { + private static AddressField lastJavaFPField; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaFrameAnchor"); + + lastJavaFPField = type.getAddressField("_last_Java_fp"); + } + + public RISCV64JavaCallWrapper(Address addr) { + super(addr); + } + + public Address getLastJavaFP() { + return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java new file mode 100644 index 00000000000..4aeb1c6f557 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.runtime.*; + +public class RISCV64RegisterMap extends RegisterMap { + + /** This is the only public constructor */ + public RISCV64RegisterMap(JavaThread thread, boolean updateMap) { + super(thread, updateMap); + } + + protected RISCV64RegisterMap(RegisterMap map) { + super(map); + } + + public Object clone() { + RISCV64RegisterMap retval = new RISCV64RegisterMap(this); + return retval; + } + + // no PD state to clear or copy: + protected void clearPD() {} + protected void initializePD() {} + protected void initializeFromPD(RegisterMap map) {} + protected Address getLocationPD(VMReg reg) { return null; } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java index 948eabcab12..6552ce255fc 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/utils/hsdis/hsdis.c b/src/utils/hsdis/hsdis.c index da9b6f34848..d0a6f4ea846 100644 --- a/src/utils/hsdis/hsdis.c +++ b/src/utils/hsdis/hsdis.c @@ -36,6 +36,7 @@ #include #include #include + #include "hsdis.h" #ifndef bool @@ -478,9 +479,6 @@ static const char* native_arch_name() { #endif #ifdef LIBARCH_s390x res = "s390:64-bit"; -#endif -#ifdef LIBARCH_riscv64 - res = "riscv:rv64"; #endif if (res == NULL) res = "architecture not set in Makefile!"; diff --git a/test/hotspot/jtreg/ProblemList.txt b/test/hotspot/jtreg/ProblemList.txt index 6cd261fca7c..7385ba25693 100644 --- a/test/hotspot/jtreg/ProblemList.txt +++ b/test/hotspot/jtreg/ProblemList.txt @@ -296,113 +296,3 @@ dragonwell runtime/coroutine/testJniDetachThreadHoldingMonitor.sh generic-all runtime/coroutine/testJniMonitorExit.sh generic-all gc/z/unloading/TestUnloadGarbageCollectorMXBean.java linux-aarch64 - -gc/stress/TestReclaimStringsLeaksMemory.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -resourcehogs/serviceability/sa/TestHeapDumpForLargeArray.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -runtime/cds/CdsDifferentCompactObjectHeaders.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/CDSJMapClstats.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/ClhsdbCDSCore.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/ClhsdbCDSJstackPrintAll.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/ClhsdbFindPC.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/ClhsdbInspect.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/ClhsdbJdis.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/ClhsdbJhisto.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/ClhsdbJstack.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/ClhsdbJstackXcompStress.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/ClhsdbPrintAs.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/ClhsdbPstack.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/ClhsdbSource.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/ClhsdbThread.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/ClhsdbWhere.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/DeadlockDetectionTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/JhsdbThreadInfoTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/TestClhsdbJstackLock.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/TestHeapDumpForInvokeDynamic.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/TestJhsdbJstackLock.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/TestJhsdbJstackMixed.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/TestJmapCore.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/TestJmapCoreMetaspace.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/jmap-hprof/JMapHProfLargeHeapTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -serviceability/sa/sadebugd/DebugdConnectTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v49_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v49_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v50_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v50_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v50_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v50_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v51_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v51_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v51_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v51_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v52_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v52_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v52_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v52_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v49_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v49_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v49_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v49_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v50_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v50_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v50_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v50_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v51_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v51_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v51_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v51_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v52_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v52_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v52_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v52_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v49_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v49_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v49_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v49_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v50_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v50_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v50_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v50_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v51_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v51_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v51_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v51_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v52_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v52_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v52_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v52_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v49_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v49_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v49_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v49_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v50_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v50_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v50_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v50_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v51_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v51_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v51_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v51_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v52_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v52_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v52_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v52_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v49_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v49_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v49_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v49_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v50_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v50_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v50_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v50_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v51_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v51_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v51_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v51_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v52_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v52_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v52_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v52_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/SuperCall_v52_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/SuperCall_v52_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/SuperCall_v52_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -vmTestbase/vm/runtime/defmeth/scenarios/SuperCall_v52_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java index 7805918c28a..823b9f39dbf 100644 --- a/test/hotspot/jtreg/compiler/c2/TestBit.java +++ b/test/hotspot/jtreg/compiler/c2/TestBit.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,7 +34,7 @@ * * @run driver compiler.c2.TestBit * - * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" + * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64" * @requires vm.debug == true & vm.compiler2.enabled */ public class TestBit { @@ -54,7 +54,8 @@ static void runTest(String testName) throws Exception { String expectedTestBitInstruction = "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" : "aarch64".equals(System.getProperty("os.arch")) ? "tb" : - "amd64".equals(System.getProperty("os.arch")) ? "test" : null; + "amd64".equals(System.getProperty("os.arch")) ? "test" : + "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null; if (expectedTestBitInstruction != null) { output.shouldContain(expectedTestBitInstruction); diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java index a2a8e93cc70..5a1b659bbe0 100644 --- a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java +++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java @@ -29,7 +29,6 @@ * * @build sun.hotspot.WhiteBox * @run driver ClassFileInstaller sun.hotspot.WhiteBox - * sun.hotspot.WhiteBox$WhiteBoxPermission * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI * -XX:-BackgroundCompilation -XX:-UseOnStackReplacement * compiler.floatingpoint.TestLibmIntrinsics diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java index 10664da8464..55374b116e6 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,8 +41,8 @@ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRiscv64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; +import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; @@ -55,7 +55,7 @@ public static void main(String args[]) throws Throwable { SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), - new GenericTestCaseForUnsupportedRiscv64CPU( + new GenericTestCaseForUnsupportedRISCV64CPU( SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java index 88be9ac1fca..8fb82ee4531 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,8 +41,8 @@ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRiscv64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; +import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; @@ -55,7 +55,7 @@ public static void main(String args[]) throws Throwable { SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), - new GenericTestCaseForUnsupportedRiscv64CPU( + new GenericTestCaseForUnsupportedRISCV64CPU( SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java index a3ee9bb9f12..aca32137eda 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,8 +41,8 @@ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRiscv64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; +import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; @@ -55,7 +55,7 @@ public static void main(String args[]) throws Throwable { SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), - new GenericTestCaseForUnsupportedRiscv64CPU( + new GenericTestCaseForUnsupportedRISCV64CPU( SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java index 989345abc81..8deac4f7895 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -40,8 +40,8 @@ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRiscv64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; +import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU; @@ -54,7 +54,7 @@ public static void main(String args[]) throws Throwable { SHAOptionsBase.USE_SHA_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( SHAOptionsBase.USE_SHA_OPTION), - new GenericTestCaseForUnsupportedRiscv64CPU( + new GenericTestCaseForUnsupportedRISCV64CPU( SHAOptionsBase.USE_SHA_OPTION), new UseSHASpecificTestCaseForUnsupportedCPU( SHAOptionsBase.USE_SHA_OPTION), diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java index e95d331a980..26635002040 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,15 +32,15 @@ /** * Generic test case for SHA-related options targeted to any CPU except - * AArch64, Riscv64, PPC, S390x, SPARC and X86. + * AArch64, RISCV64, PPC, S390x, SPARC and X86. */ public class GenericTestCaseForOtherCPU extends SHAOptionsBase.TestCase { public GenericTestCaseForOtherCPU(String optionName) { - // Execute the test case on any CPU except AArch64, Riscv64, PPC, S390x, SPARC and X86. + // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, SPARC and X86. super(optionName, new NotPredicate( new OrPredicate(Platform::isAArch64, - new OrPredicate(Platform::isRiscv64, + new OrPredicate(Platform::isRISCV64, new OrPredicate(Platform::isS390x, new OrPredicate(Platform::isSparc, new OrPredicate(Platform::isPPC, @@ -52,7 +52,7 @@ public GenericTestCaseForOtherCPU(String optionName) { protected void verifyWarnings() throws Throwable { String shouldPassMessage = String.format("JVM should start with " + "option '%s' without any warnings", optionName); - // Verify that on non-x86, non-SPARC, non-AArch64 CPU and non-Riscv64 usage of + // Verify that on non-x86, non-RISCV64, non-SPARC and non-AArch64 CPU usage of // SHA-related options will not cause any warnings. CommandLineOptionTest.verifySameJVMStartup(null, new String[] { ".*" + optionName + ".*" }, shouldPassMessage, diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRiscv64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java similarity index 52% rename from test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRiscv64CPU.java rename to test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java index d64eda4d512..8566d57c391 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRiscv64CPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java @@ -1,6 +1,6 @@ /* * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,15 +32,24 @@ import jdk.test.lib.cli.predicate.NotPredicate; /** - * Generic test case for SHA-related options targeted to Riscv64 CPUs + * Generic test case for SHA-related options targeted to RISCV64 CPUs * which don't support instruction required by the tested option. */ -public class GenericTestCaseForUnsupportedRiscv64CPU extends +public class GenericTestCaseForUnsupportedRISCV64CPU extends SHAOptionsBase.TestCase { - public GenericTestCaseForUnsupportedRiscv64CPU(String optionName) { - super(optionName, new AndPredicate(Platform::isRiscv64, + + final private boolean checkUseSHA; + + public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) { + this(optionName, true); + } + + public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) { + super(optionName, new AndPredicate(Platform::isRISCV64, new NotPredicate(SHAOptionsBase.getPredicateForOption( optionName)))); + + this.checkUseSHA = checkUseSHA; } @Override @@ -54,22 +63,24 @@ protected void verifyWarnings() throws Throwable { SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, CommandLineOptionTest.prepareBooleanFlag(optionName, false)); - shouldPassMessage = String.format("If JVM is started with '-XX:-" - + "%s' '-XX:+%s', output should contain warning.", - SHAOptionsBase.USE_SHA_OPTION, optionName); + if (checkUseSHA) { + shouldPassMessage = String.format("If JVM is started with '-XX:-" + + "%s' '-XX:+%s', output should contain warning.", + SHAOptionsBase.USE_SHA_OPTION, optionName); - // Verify that when the tested option is enabled, then - // a warning will occur in VM output if UseSHA is disabled. - if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) { - CommandLineOptionTest.verifySameJVMStartup( - new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) }, - null, - shouldPassMessage, - shouldPassMessage, - ExitCode.OK, - SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, - CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false), - CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + // Verify that when the tested option is enabled, then + // a warning will occur in VM output if UseSHA is disabled. + if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) { + CommandLineOptionTest.verifySameJVMStartup( + new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) }, + null, + shouldPassMessage, + shouldPassMessage, + ExitCode.OK, + SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, + CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false), + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + } } } @@ -81,22 +92,24 @@ protected void verifyOptionValues() throws Throwable { optionName), SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); - // Verify that option is disabled even if it was explicitly enabled - // using CLI options. - CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", - String.format("Option '%s' should be off on unsupported " - + "Riscv64CPU even if set to true directly", optionName), - SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, - CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + if (checkUseSHA) { + // Verify that option is disabled even if it was explicitly enabled + // using CLI options. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + String.format("Option '%s' should be off on unsupported " + + "RISCV64CPU even if set to true directly", optionName), + SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); - // Verify that option is disabled when +UseSHA was passed to JVM. - CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", - String.format("Option '%s' should be off on unsupported " - + "Riscv64CPU even if %s flag set to JVM", - optionName, CommandLineOptionTest.prepareBooleanFlag( - SHAOptionsBase.USE_SHA_OPTION, true)), - SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, - CommandLineOptionTest.prepareBooleanFlag( - SHAOptionsBase.USE_SHA_OPTION, true)); + // Verify that option is disabled when +UseSHA was passed to JVM. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + String.format("Option '%s' should be off on unsupported " + + "RISCV64CPU even if %s flag set to JVM", + optionName, CommandLineOptionTest.prepareBooleanFlag( + SHAOptionsBase.USE_SHA_OPTION, true)), + SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, + CommandLineOptionTest.prepareBooleanFlag( + SHAOptionsBase.USE_SHA_OPTION, true)); + } } } diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java index 2e3e2717a65..7be8af6d035 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java index 0e06a9e4327..797927b42bf 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java index c3cdbf37464..be8f7d586c2 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java index d33bd411f16..d96d5e29c00 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions * -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java index 992fa4b5161..b09c873d05d 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8138583 * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test - * @requires os.arch=="aarch64" + * @requires os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java index 3e79b3528b7..fe40ed6f98d 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8138583 * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test - * @requires os.arch=="aarch64" + * @requires os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java index 6603dd224ef..51631910493 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8135028 * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java index d9a0c988004..d999ae423cf 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java index 722db95aed3..65912a5c7fa 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java index f58f21feb23..fffdc2f7565 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java index c5e38ba72e7..2c866f26f08 100644 --- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java @@ -24,7 +24,7 @@ /* @test * @bug 8167409 - * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64") + * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm") * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs */ package compiler.runtime.criticalnatives.argumentcorruption; diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java index 4437367b69a..1da369fde23 100644 --- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java @@ -24,7 +24,7 @@ /* @test * @bug 8167408 - * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64") + * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm") * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp */ package compiler.runtime.criticalnatives.lookup; diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java index 284b51019cf..7afe3560f30 100644 --- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java index f7c6f11c1f1..d4d43b01ae6 100644 --- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java +++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -112,7 +112,7 @@ public static void main(String args[]) throws Exception { // It's ok for ARM not to have symbols, because it does not support NMT detail // when targeting thumb2. It's also ok for Windows not to have symbols, because // they are only available if the symbols file is included with the build. - if (Platform.isWindows() || Platform.isARM() || Platform.isRiscv64()) { + if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) { return; // we are done } output.reportDiagnosticSummary(); diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java index 8677b97b29d..eab19273ad8 100644 --- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -239,8 +239,8 @@ private static boolean isAlwaysSupportedPlatform() { return Platform.isAix() || (Platform.isLinux() && (Platform.isPPC() || Platform.isS390x() || Platform.isX64() || - Platform.isX86())) || - Platform.isOSX() || Platform.isRiscv64() || + Platform.isX86() || Platform.isRISCV64())) || + Platform.isOSX() || Platform.isSolaris(); } diff --git a/test/hotspot/jtreg/runtime/cds/CdsDifferentCompactObjectHeaders.java b/test/hotspot/jtreg/runtime/cds/CdsDifferentCompactObjectHeaders.java index 604bfb678ac..9d80d7f0a3d 100644 --- a/test/hotspot/jtreg/runtime/cds/CdsDifferentCompactObjectHeaders.java +++ b/test/hotspot/jtreg/runtime/cds/CdsDifferentCompactObjectHeaders.java @@ -30,6 +30,7 @@ * should fail when loading. * @requires vm.cds * @requires vm.bits == 64 + * @requires os.arch != "riscv64" * @library /test/lib * @run driver CdsDifferentCompactObjectHeaders */ diff --git a/test/hotspot/jtreg/test_env.sh b/test/hotspot/jtreg/test_env.sh index 7f3698c47cf..0c300d4fd96 100644 --- a/test/hotspot/jtreg/test_env.sh +++ b/test/hotspot/jtreg/test_env.sh @@ -185,11 +185,6 @@ if [ $? = 0 ] then VM_CPU="arm" fi -grep "riscv64" vm_version.out > ${NULL} -if [ $? = 0 ] -then - VM_CPU="riscv64" -fi grep "ppc" vm_version.out > ${NULL} if [ $? = 0 ] then diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java index 19db9f6c29d..d4bfe31dd7a 100644 --- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java @@ -45,7 +45,7 @@ */ public class TestMutuallyExclusivePlatformPredicates { private static enum MethodGroup { - ARCH("isRiscv64", "isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), + ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), BITNESS("is32bit", "is64bit"), OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"), VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"), diff --git a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java index b23e520bcf8..cb3348a0f5b 100644 --- a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java +++ b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java @@ -63,13 +63,13 @@ public static int run(String argv[], PrintStream ref) { try { t_a.join(); } catch (InterruptedException e) {} - checkInfo(t_a, t_a.getThreadGroup(), 1); thrinfo001b t_b = new thrinfo001b(); t_b.setPriority(Thread.MIN_PRIORITY); t_b.setDaemon(true); checkInfo(t_b, t_b.getThreadGroup(), 2); t_b.start(); + checkInfo(t_b, t_b.getThreadGroup(), 2); try { t_b.join(); } catch (InterruptedException e) {} diff --git a/test/jdk/ProblemList.txt b/test/jdk/ProblemList.txt index e7a0f6b70d8..3a3f73dd0ea 100644 --- a/test/jdk/ProblemList.txt +++ b/test/jdk/ProblemList.txt @@ -892,38 +892,3 @@ java/net/MulticastSocket/UnreferencedMulticastSockets.java https://github.com/dr java/net/SocketOption/OptionsTest.java https://github.com/dragonwell-project/dragonwell11/issues/209 linux-riscv64 java/nio/MappedByteBuffer/Truncate.java https://github.com/dragonwell-project/dragonwell11/issues/209 linux-riscv64 - -com/alibaba/rcm/TestDeadLoopKillObjectMonitor.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -com/alibaba/rcm/TestExceptionPreidicate.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -com/alibaba/rcm/TestKillThreads.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -com/alibaba/rcm/TestRCMInheritanceCallBack.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -com/alibaba/rcm/TestRcmCpu.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -com/alibaba/rcm/TestRcmRoot.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -com/alibaba/rcm/TestRcmUpdate.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -com/alibaba/rcm/TestStressedKillThreads.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -java/lang/invoke/PrivateInterfaceCall.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -java/lang/invoke/VarHandles/VarHandleTestAccessShort.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -java/net/httpclient/ShortResponseBodyGet.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -java/net/httpclient/ShortResponseBodyPost.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -java/net/httpclient/ShortResponseBodyPostWithRetry.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -java/util/concurrent/ConcurrentHashMap/ConcurrentContainsKeyTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -java/util/stream/test/org/openjdk/tests/java/util/stream/InfiniteStreamWithLimitOpTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -javax/accessibility/4529616/AccessibleJTableCellTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -javax/accessibility/4670319/AccessibleJTreePCESourceTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -javax/accessibility/4715503/AccessibleJTableCellBoundingRectangleTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -javax/accessibility/8283015/AccessibleJTableCellNameTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -javax/accessibility/AccessibilityProvider/basic.sh https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -javax/accessibility/JScrollPaneAccessibleRelationsTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -javax/accessibility/JTable/BooleanRendererHasAccessibleActionTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -javax/imageio/ReadAbortTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -javax/imageio/WriteAbortTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -jdk/incubator/vector/AddTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -jdk/incubator/vector/Vector128ConversionTests.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -jdk/incubator/vector/Vector256ConversionTests.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -jdk/incubator/vector/Vector64ConversionTests.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -jdk/incubator/vector/VectorMaxConversionTests.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -jdk/incubator/vector/VectorReshapeTests.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -sun/security/tools/jarsigner/Warning.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -sun/tools/jhsdb/BasicLauncherTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -sun/tools/jhsdb/HeapDumpTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -sun/tools/jhsdb/JShellHeapDumpTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java index a976e1a6c19..abeff80e5e8 100644 --- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java +++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -54,8 +54,8 @@ public static void main(String[] args) throws Throwable { Events.assertField(event, "hwThreads").atLeast(1); Events.assertField(event, "cores").atLeast(1); Events.assertField(event, "sockets").atLeast(1); - Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "Riscv64"); - Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "Riscv64"); + Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); + Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); } } } diff --git a/test/langtools/ProblemList.txt b/test/langtools/ProblemList.txt index 0f91307930c..9ad2b728dcd 100644 --- a/test/langtools/ProblemList.txt +++ b/test/langtools/ProblemList.txt @@ -77,6 +77,3 @@ tools/sjavac/ClasspathDependencies.java 8158002 generic-all Requires i # # jdeps -# hotspot riscv -jdk/jshell/JdiHangingLaunchExecutionControlTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 -tools/javac/failover/CheckAttributedTree.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64 diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java index e9875f321a5..b310873d1b6 100644 --- a/test/lib/jdk/test/lib/Platform.java +++ b/test/lib/jdk/test/lib/Platform.java @@ -202,7 +202,7 @@ public static boolean isARM() { return isArch("arm.*"); } - public static boolean isRiscv64() { + public static boolean isRISCV64() { return isArch("riscv64"); }